### Load libraries

In [1]:
import io
import requests
import re
import warnings
import os

# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

# visualization
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from xgboost import XGBClassifier

### Load data

In [2]:
# Identify numerical and non-numerical features

# Non-Numerical:
# Categorical: Survived, Sex, Embarked
# Ordinal: Pclass

# Numerical:
# Continous: Age, Fare
# Discrete: SibSp, Parch


In [3]:
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')
combine = [train_df, test_df]

In [4]:
train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [6]:
train_df.info()
print('_'*40)
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB
________________________________________
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 418 entries, 0 to 417
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Passenger

In [7]:
train_df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [8]:
train_df.describe(include=['O'])

Unnamed: 0,Name,Sex,Ticket,Cabin,Embarked
count,891,891,891,204,889
unique,891,2,681,147,3
top,"Braund, Mr. Owen Harris",male,347082,B96 B98,S
freq,1,577,7,4,644


### Cleaning data

In [9]:
# dropping features
print("Before", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape)

train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)
combine = [train_df, test_df]

print("After", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape)

Before (891, 12) (418, 11) (891, 12) (418, 11)
After (891, 10) (418, 9) (891, 10) (418, 9)


In [10]:
# creating new features from existing
# Name -> Title
for dataset in combine:
    dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\.', expand=False)

for dataset in combine:
    dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col',\
 	'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')

    dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
    dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')
    
print(train_df[['Title', 'Survived']].groupby(['Title'], as_index=False).mean())

title_mapping = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
for dataset in combine:
    dataset['Title'] = dataset['Title'].map(title_mapping)
    dataset['Title'] = dataset['Title'].fillna(0)

# dropping the Name and PassengerId features
train_df = train_df.drop(['Name', 'PassengerId'], axis=1)
test_df = test_df.drop(['Name'], axis=1)
combine = [train_df, test_df]

train_df.head()

    Title  Survived
0  Master  0.575000
1    Miss  0.702703
2      Mr  0.156673
3     Mrs  0.793651
4    Rare  0.347826


Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,0,3,male,22.0,1,0,7.25,S,1
1,1,1,female,38.0,1,0,71.2833,C,3
2,1,3,female,26.0,0,0,7.925,S,2
3,1,1,female,35.0,1,0,53.1,S,3
4,0,3,male,35.0,0,0,8.05,S,1


In [11]:
test_df['Pclass'].isna().sum()

0

In [12]:
# Quick completing and converting a numeric features
   
# Embarked
## Completing Embarked by filling na values by the most frequent value
freq_port = train_df.Embarked.dropna().mode()[0]
for dataset in combine:
    dataset['Embarked'] = dataset['Embarked'].fillna(freq_port)

# Fare
test_df['Fare'].fillna(test_df['Fare'].dropna().median(), inplace=True)

In [13]:
# Converting categorical features to numerical
# Sex and embarked
for dataset in combine:
    dataset['Sex'] = dataset['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
    dataset['Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2} ).astype(int)
train_df.head(2)
    

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,0,3,0,22.0,1,0,7.25,0,1
1,1,1,1,38.0,1,0,71.2833,1,3


In [14]:
# Completing Age feature
# Age
guess_ages = np.zeros((2,3))
for dataset in combine:
    for i in range(0, 2):
        for j in range(0, 3):
            guess_df = dataset[(dataset['Sex'] == i) & \
                                  (dataset['Pclass'] == j+1)]['Age'].dropna()

            # age_mean = guess_df.mean()
            # age_std = guess_df.std()
            # age_guess = rnd.uniform(age_mean - age_std, age_mean + age_std)

            age_guess = guess_df.median()

            # Convert random age float to nearest .5 age
            guess_ages[i,j] = int( age_guess/0.5 + 0.5 ) * 0.5
            
    for i in range(0, 2):
        for j in range(0, 3):
            dataset.loc[ (dataset.Age.isnull()) & (dataset.Sex == i) & (dataset.Pclass == j+1),\
                    'Age'] = guess_ages[i,j]

    dataset['Age'] = dataset['Age'].astype(int)

In [15]:
# Simplifying existing features
# Age -> Agebands -> Age

train_df['AgeBand'] = pd.cut(train_df['Age'], 4)
train_df[['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True)

for dataset in combine:    
    dataset.loc[ dataset['Age'] <= 18, 'Age'] = 1
    dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 35), 'Age'] = 2
    dataset.loc[(dataset['Age'] > 35) & (dataset['Age'] <= 60), 'Age'] = 3
    dataset.loc[ dataset['Age'] > 60, 'Age'] = 4

# # FareBand
# train_df['FareBand'] = pd.qcut(train_df['Fare'], 4)
# for dataset in combine:
#     dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0
#     dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
#     dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare']   = 2
#     dataset.loc[ dataset['Fare'] > 31, 'Fare'] = 3
#     dataset['Fare'] = dataset['Fare'].astype(int)

# we can now drop the AgeBand and FareBand features
train_df = train_df.drop(['AgeBand'], axis=1)
# train_df = train_df.drop(['FareBand'], axis=1)
combine = [train_df, test_df]

train_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Title
0,0,3,0,2,1,0,7.25,0,1
1,1,1,1,3,1,0,71.2833,1,3
2,1,3,1,2,0,0,7.925,0,2
3,1,1,1,2,1,0,53.1,0,3
4,0,3,0,2,0,0,8.05,0,1


In [16]:
# Creating new feature from existing ones
# FamilySize
for dataset in combine:
    dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1

print(train_df[['FamilySize', 'Survived']].groupby(['FamilySize'], as_index=False).mean().sort_values(by='Survived', ascending=False))

# IsAlone
for dataset in combine:
    dataset['IsAlone'] = 0
    dataset.loc[dataset['FamilySize'] == 1, 'IsAlone'] = 1

print(train_df[['IsAlone', 'Survived']].groupby(['IsAlone'], as_index=False).mean())

# Age*Class
for dataset in combine:
    dataset['Age*Class'] = dataset.Age * dataset.Pclass



# # we can now drop Parch, FamilySize and SibSp in favour of IsAlone
# train_df = train_df.drop(['Parch', 'SibSp', 'FamilySize'], axis=1)
# test_df = test_df.drop(['Parch', 'SibSp', 'FamilySize'], axis=1)

# # we can now drop Parch and SibSp in favour of FamilySize
train_df = train_df.drop(['Parch', 'SibSp'], axis=1)
test_df = test_df.drop(['Parch', 'SibSp'], axis=1)
combine = [train_df, test_df]

train_df.head(2)

   FamilySize  Survived
3           4  0.724138
2           3  0.578431
1           2  0.552795
6           7  0.333333
0           1  0.303538
4           5  0.200000
5           6  0.136364
7           8  0.000000
8          11  0.000000
   IsAlone  Survived
0        0  0.505650
1        1  0.303538


Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,Title,FamilySize,IsAlone,Age*Class
0,0,3,0,2,7.25,0,1,2,0,6
1,1,1,1,3,71.2833,1,3,2,0,3


In [17]:
train_df.columns

Index(['Survived', 'Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'Title',
       'FamilySize', 'IsAlone', 'Age*Class'],
      dtype='object')

In [18]:
drop_features = ['Age*Class', 'FamilySize']

train_df = train_df.drop(drop_features, axis=1)
test_df = test_df.drop(drop_features, axis=1)

combine = [train_df, test_df]

### Model, predict, solve

In [19]:
X_train = train_df.drop("Survived", axis=1)
Y_train = train_df["Survived"]
X_test  = test_df.drop("PassengerId", axis=1).copy()
print(f'{X_train.shape = }, {Y_train.shape = }, {X_test.shape = }')

X_train.shape = (891, 7), Y_train.shape = (891,), X_test.shape = (418, 7)


In [20]:
# get the full dataset to check the test accuracy

url="https://github.com/thisisjasonjafari/my-datascientise-handcode/raw/master/005-datavisualization/titanic.csv"
s=requests.get(url).content
c=pd.read_csv(io.StringIO(s.decode('utf-8')))
 
test_labels = c
test = pd.read_csv('data/test.csv')

warnings.filterwarnings('ignore')

for i, name in enumerate(test_labels['name']):
    if '"' in name:
        test_labels['name'][i] = re.sub('"', '', name)
        
for i, name in enumerate(test['Name']):
    if '"' in name:
        test['Name'][i] = re.sub('"', '', name)
        
survived = []

for name in test['Name']:
    survived.append(int(test_labels.loc[test_labels['name'] == name]['survived'].values[-1]))

len(survived)

418

In [23]:
std_scaler = StandardScaler()
X = std_scaler.fit_transform(X)
X_test = std_scaler.transform(X_test)

NameError: name 'StandardScaler' is not defined

In [27]:
model_predictions = {}

# Logistic Regression

logreg = LogisticRegression()
logreg.fit(X_train, Y_train)
Y_pred = logreg.predict(X_test)
acc_log = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Logistic Regression'] = Y_pred

# Support Vector Machines

svc = SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc_svc = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Support Vector Machines'] = Y_pred

# K Nearest Neighbours

# knn = KNeighborsClassifier(n_neighbors = 3)
knn = KNeighborsClassifier(algorithm='auto', leaf_size=1, metric='minkowski', metric_params=None, 
                                n_jobs=1, n_neighbors=6, p=1, weights='uniform')
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)



model_predictions['KNN'] = Y_pred

# Gaussian Naive Bayes

gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Naive Bayes'] = Y_pred

# Linear SVC

linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Linear SVC'] = Y_pred

# Stochastic Gradient Descent

sgd = SGDClassifier()
sgd.fit(X_train, Y_train)
Y_pred = sgd.predict(X_test)
acc_sgd = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Stochastic Gradient Decent'] = Y_pred

# Random Forest
acc_random_forest = 0
top_acc_random_forest = 0

for i in range(20000):
    random_forest = RandomForestClassifier(n_estimators=11, 
                                                criterion='gini',
                                                max_depth=4)
    random_forest.fit(X_train, Y_train)
    Y_pred = random_forest.predict(X_test)
    random_forest.score(X_train, Y_train)   
    acc_random_forest = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

    if acc_random_forest > top_acc_random_forest:
        top_acc_random_forest = acc_random_forest
        top_Y_pred = Y_pred

    if top_acc_random_forest > 80.38:        
        break

model_predictions['Random Forest'] = top_Y_pred
acc_random_forest = top_acc_random_forest

# Decision Tree

decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train, Y_train)
Y_pred = decision_tree.predict(X_test)
acc_decision_tree = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Decision Tree'] = Y_pred

# Perceptron

perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['Perceptron'] = Y_pred

# XGB Classifier
model = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                    colsample_bynode=1, colsample_bytree=0.7, gamma=0, gpu_id=-1,
                    importance_type='gain', interaction_constraints='',
                    learning_rate=0.300000012, max_delta_step=0, max_depth=7,
                    min_child_weight=2,monotone_constraints='()', n_estimators=100,
                    n_jobs=0,num_parallel_tree=1, random_state=0, reg_alpha=0,
                    reg_lambda=1,scale_pos_weight=1, subsample=0.7, tree_method='hist',
                    validate_parameters=1,verbosity=None)

model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
acc_xgb = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)

model_predictions['XGBClassifier'] = Y_pred


models = pd.DataFrame({
    'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', 
              'Random Forest', 'Naive Bayes', 'Perceptron', 
              'Stochastic Gradient Decent', 'Linear SVC', 
              'Decision Tree', 'XGBClassifier'],
    'Score': [acc_svc, acc_knn, acc_log, 
              acc_random_forest, acc_gaussian, acc_perceptron, 
              acc_sgd, acc_linear_svc, acc_decision_tree, acc_xgb]})

# model ensembling (top 3)
top_preds = np.array([model_predictions[model] for i, model in enumerate(models.sort_values(by='Score', ascending=False)[:3]['Model'])])

ensemble_preds = [max(top_preds[0][i], top_preds[1][i], top_preds[2][i]) for i in range(len(survived))]
acc_ensemble = round(metrics.accuracy_score(ensemble_preds, survived) * 100, 2)
model_predictions['Ensemble'] = Y_pred
models = models.append({'Model': 'Ensemble', 'Score': acc_ensemble}, ignore_index = True)

models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
9,XGBClassifier,79.67
10,Ensemble,77.75
2,Logistic Regression,77.51
3,Random Forest,77.51
7,Linear SVC,76.08
8,Decision Tree,76.08
4,Naive Bayes,74.64
1,KNN,71.53
5,Perceptron,68.42
0,Support Vector Machines,66.03


In [None]:
top_model = list(models.sort_values(by='Score', ascending=False)[:1]['Model'])[0]
top_model_preds = model_predictions[top_model]
top_acc = round(metrics.accuracy_score(top_model_preds, survived) * 100, 2)
print(f'{top_model = }, {top_acc = }')

submission = pd.DataFrame({
        "PassengerId": test_df["PassengerId"],
        "Survived": top_model_preds
    })
submission.to_csv('submission.csv', index=False)
print('Saved submission!')

top_model = 'Random Forest', top_acc = 80.38
Saved submission!


In [None]:
## testing the parameters of XGB Classifier

# [0, infinity]
from math import gamma


inf_max = 5
inf_step = 0.0001

zeroone_step = 0.01

# parameters = {}
# parameters['n_estimators'] = np.arange(0, inf_max+inf_step, inf_step)
# parameters['max_depth'] = np.arange(0, inf_max+inf_step, inf_step)
# parameters['gamma'] = np.arange(0, inf_max+inf_step, inf_step)
# parameters['colsample_bytree'] = np.arange(0, 1+zeroone_step, zeroone_step)
# parameters['min_child_weight'] = np.arange(0, inf_max+inf_step, inf_step)
# parameters['subsample'] = np.arange(0, 1+zeroone_step, zeroone_step)

top_accuracy = 0
for min_child_weight_value in np.arange(4, inf_max+inf_step, inf_step):
    # XGB Classifier
    model = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
                        colsample_bynode=1, colsample_bytree=0.75, gamma=1.04, gpu_id=-1,
                        importance_type='gain', interaction_constraints='',
                        learning_rate=0.300000012, max_delta_step=0, max_depth=7,
                        min_child_weight=min_child_weight_value,monotone_constraints='()', n_estimators=2,
                        n_jobs=0,num_parallel_tree=1, random_state=0, reg_alpha=0,
                        reg_lambda=1,scale_pos_weight=1, subsample=1, tree_method='hist',
                        validate_parameters=1,verbosity=None)

    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)
    acc_xgb = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)
    if acc_xgb > top_accuracy:
        top_accuracy = acc_xgb
        print(f'{top_accuracy = }')
        print(f'{min_child_weight_value = }')



In [None]:
len(np.arange(0, 100+0.01, 0.01))

10001

In [None]:
n_estimators_value = 2
max_depth = 7
gamma_value = 1.04
colsample_bytree = 0.75
subsample_value = 1.0
4.25

4.25

In [29]:
# Random Forest
top_accuracy = 0
top_value_x = 0
top_value_y = 0

for value_x in np.arange(1, 100+1, 1):
    for value_y in np.arange(1, 100+1, 1):
        for i in range(50):
            # Random Forest Classifier
            random_forest = RandomForestClassifier(n_estimators=value_x, 
                                                criterion='gini',
                                                max_depth=value_y)

            random_forest.fit(X_train, Y_train)
            Y_pred = random_forest.predict(X_test)
            random_forest.score(X_train, Y_train)   
            acc_random_forest = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)
            if acc_random_forest > top_accuracy:
                top_accuracy = acc_random_forest
                top_value_x = value_x
                top_value_y = value_y
            
            print(f'{value_x = }, {value_y = }, {i = }, {acc_random_forest = }, {top_value_x = }, {top_value_y = }, {top_accuracy = }', end='\r')


value_x = 44, value_y = 65, i = 47, acc_random_forest = 75.6, top_value_x = 4, top_value_y = 4, top_accuracy = 80.3888

KeyboardInterrupt: 

In [28]:
#KNN
top_accuracy = 0
top_value_x = 0
top_value_y = 0

for value_x in np.arange(1, 200+1, 1):
    for value_y in np.arange(1, 100+1, 1):
        knn = KNeighborsClassifier(algorithm='auto', leaf_size=value_x, metric='minkowski', metric_params=None, 
                                n_jobs=1, n_neighbors=6, p=value_y, weights='uniform')
        knn.fit(X_train, Y_train)
        Y_pred = knn.predict(X_test)
        acc_knn = round(metrics.accuracy_score(Y_pred, survived) * 100, 2)
        if acc_knn > top_accuracy:
            top_accuracy = acc_knn
            top_value_x = value_x
            top_value_y = value_y
        
        print(f'{value_x = }, {value_y = }, {acc_knn = }, {top_value_x = }, {top_value_y = }, {top_accuracy = }', end='\r')


value_x = 105, value_y = 36, acc_knn = 69.38, top_value_x = 1, top_value_y = 1, top_accuracy = 71.533

KeyboardInterrupt: 

In [30]:
# random forest 
# top_accuracy = 79.67
# n_estimators = 41
# max_depth = 4

# # XGB
# model = XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
#                     colsample_bynode=1, colsample_bytree=0.7, gamma=0, gpu_id=-1,
#                     importance_type='gain', interaction_constraints='',
#                     learning_rate=0.300000012, max_delta_step=0, max_depth=7,
#                     min_child_weight=2,monotone_constraints='()', n_estimators=100,
#                     n_jobs=0,num_parallel_tree=1, random_state=0, reg_alpha=0,
#                     reg_lambda=1,scale_pos_weight=1, subsample=0.7, tree_method='hist',
#                     validate_parameters=1,verbosity=None)