**Created by Sanskar Hasija**

**🤖LightAutoML Classification - Titanic**

**15 NOVEMBER 2021**


# <center> 🤖LIGHTAUTOML CLASSIFICATION - TITANIC</center>
## <center>If you find this notebook useful, support with an upvote👍</center>

# Installing LightAutoML

In [None]:
from IPython.display import clear_output

!pip install -U lightautoml
clear_output()

# Imports

In [None]:
import numpy as np
import pandas as pd
from lightautoml.automl.presets.tabular_presets import TabularUtilizedAutoML
from lightautoml.tasks import Task
from sklearn.metrics import f1_score

# Data Loading and Preprocessing

In [None]:
train = pd.read_csv("../input/titanic/train.csv")
test = pd.read_csv('../input/titanic/test.csv')
subs = pd.read_csv('../input/titanic/gender_submission.csv')

drop_elements = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'SibSp','Parch']
train = train.drop(drop_elements, axis = 1)
test = test.drop(drop_elements, axis = 1)

def checkNull_fillData(df):
    for col in df.columns:
        if len(df.loc[df[col].isnull() == True]) != 0:
            if df[col].dtype == "float64" or df[col].dtype == "int64":
                df.loc[df[col].isnull() == True,col] = df[col].mean()
            else:
                df.loc[df[col].isnull() == True,col] = df[col].mode()[0]
                
checkNull_fillData(train)
checkNull_fillData(test)

str_list = [] 
num_list = []
for colname, colvalue in train.iteritems():
    if type(colvalue[1]) == str:
        str_list.append(colname)
    else:
        num_list.append(colname)
        
train = pd.get_dummies(train, columns=str_list)
test = pd.get_dummies(test, columns=str_list)

# AutoML

In [None]:
N_THREADS = 4 
N_FOLDS = 5 
RANDOM_STATE = 12
TEST_SIZE = 0.2 
TIMEOUT = 1800  #30 mins

def f1_metric(y_true, y_pred, **kwargs):
    return f1_score(y_true, (y_pred > 0.5).astype(int), **kwargs)

task = Task('binary', metric = f1_metric)
roles = {
    'target': 'Survived',
}

In [None]:
automl = TabularUtilizedAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       random_state=RANDOM_STATE,
                       general_params = {'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'n_jobs': N_THREADS})
history = automl.fit_predict(train, roles = roles , verbose =1 )

### Submission

In [None]:
test_pred = automl.predict(test)
subs['Survived'] = (test_pred.data[:, 0] > 0.5).astype(int)
subs.to_csv('lightautoml.csv', index = False)
subs.head()