In [2]:
#importing libraries 
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, make_scorer
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import numpy as np
import pickle



# 0. Label encoded original data

### to check feature importance

In [3]:
#reading original label encoded data
df = pd.read_csv("Data/transformed/train_org_lb.csv")
df.head()



Unnamed: 0,raisedhands,VisITedResources,AnnouncementsView,Discussion,gender,NationalITy,PlaceofBirth,StageID,GradeID,SectionID,Topic,Semester,Relation,ParentAnsweringSurvey,ParentschoolSatisfaction,StudentAbsenceDays,Class
0,50,88,30,80,1,4,4,1,4,0,8,0,0,1,1,1,H
1,32,82,59,63,1,3,3,2,0,1,0,1,1,1,0,0,M
2,50,62,13,33,1,3,3,2,0,1,4,1,0,0,0,0,L
3,60,80,50,40,0,4,11,0,8,0,9,0,1,0,0,1,M
4,70,92,50,7,0,4,4,2,0,1,7,0,1,1,1,1,H


In [4]:
#checking number of rows and columns
df.shape

(408, 17)

In [5]:
#convert feature's string values into numbers to apply models
label_to_key = {}
key_to_label = {}
for i,label in enumerate(sorted(df['Class'].unique())):
    label_to_key[label] = i
    key_to_label[i]= label

In [6]:
df.Class = df.Class.apply(lambda x :label_to_key[x]) # converting Class feature values from str to number

In [7]:
#checking the conversion
label_to_key

{'H': 0, 'L': 1, 'M': 2}

In [8]:
# extracting input features(X) and output (y)
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [9]:
# to save trained model in pickle file

def save_pkl_model(path,model):
    pickle.dump(model, open(path,'wb'))

In [10]:
# to load trained model 
def load_pkl_model(path):
    return pickle.load(open(path, 'rb'))
    

**Random forest**

to check importance of features 

In [11]:
classifier = RandomForestClassifier()

In [12]:
f1 = make_scorer(f1_score, average='micro')

In [13]:
# setting parameters for grid search 


# n_estimatorsint, default=100
###The number of trees in the forest.

#criterion{“gini”, “entropy”}, default=”gini”
###The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. Note: this parameter is tree-specific.

#bootstrapbool, default=True
###Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree.

#https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html 
grid_param = {
    'n_estimators': [int(x) for x in range(200,2000,200)],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}

In [14]:
#  grid search cross fold validation

gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [15]:
# applying random forest with different combinations of hyperparameters 
gd_sr.fit(X, y)

GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400,
                                          1600, 1800]},
             scoring=make_scorer(f1_score, average=micro))

In [16]:
# getting model with best hyperparameters 
best_random = gd_sr.best_estimator_


In [17]:
#pritning best score, estimator and feature importance,  
print (gd_sr.best_score_,best_random,best_random.feature_importances_)

0.8186088527551941 RandomForestClassifier(criterion='entropy', n_estimators=1000) [0.14102608 0.18770605 0.12323161 0.0863615  0.02310939 0.03406358
 0.03131846 0.01404104 0.03284837 0.0167915  0.04704994 0.01124462
 0.03197633 0.03793062 0.02426588 0.15703504]


In [18]:
#saving the model 
save_pkl_model("models/rf.pkl",best_random)

In [19]:
#loading the model
best_random = load_pkl_model("models/rf.pkl")


In [20]:
# combining feature importance with column names in descending order to see 

d = {'Stats':X.columns,'FI':best_random.feature_importances_}
df1 = pd.DataFrame(d)

print(df1.sort_values(by=['FI'], ascending=False))



                       Stats        FI
1           VisITedResources  0.187706
15        StudentAbsenceDays  0.157035
0                raisedhands  0.141026
2          AnnouncementsView  0.123232
3                 Discussion  0.086362
10                     Topic  0.047050
13     ParentAnsweringSurvey  0.037931
5                NationalITy  0.034064
8                    GradeID  0.032848
12                  Relation  0.031976
6               PlaceofBirth  0.031318
14  ParentschoolSatisfaction  0.024266
4                     gender  0.023109
9                  SectionID  0.016792
7                    StageID  0.014041
11                  Semester  0.011245


In [21]:
# storing column names whose feature importance is less than 0.02 to drop 
drop_col = df1.Stats[df1.FI < 0.02]

In [22]:
# function to drop columns by matching names of columns
def func_dropCol(df,drop_col):
    for i in drop_col:
        df = df[df.columns.drop(list(df.filter(regex=i)))]
    return df




# 2. Naive Bays
**using one hot encoded ,bxtransformed data** 
**and important features from above model** 

**Assumptions:**

The biggest and only assumption is the assumption of conditional independence.

**Pros:**

1. Gives high performance when the conditional independence assumption is satisfied.
2. Easy to implement because only probabilities need to be calculated.
3. Works well with high-dimensional data, such as text.
4. Fast for real-time predictions.

**Cons:**

1. If conditional independence does not hold, then is performs poorly.
2. Has the problem of Numerical Stability or Numerical Underflow because of the multiplication of several small digits.
 

In [23]:
# loading classifier 
classifier = GaussianNB()

In [24]:
# reading one hot encoded box transformed data 
df = pd.read_csv("Data/transformed/train_bx_ohe.csv")

In [25]:
df.head()

Unnamed: 0,raisedhands,VisITedResources,AnnouncementsView,Discussion,gender_M,NationalITy_Iran,NationalITy_Iraq,NationalITy_Jordan,NationalITy_KW,NationalITy_Lybia,...,Topic_Math,Topic_Quran,Topic_Science,Topic_Spanish,Semester_S,Relation_Mum,ParentAnsweringSurvey_Yes,ParentschoolSatisfaction_Good,StudentAbsenceDays_Under-7,Class
0,0.283065,0.915333,-0.046205,1.202617,1.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,H
1,-0.248,0.787987,0.814707,0.738216,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,M
2,0.283065,0.330558,-0.819927,-0.262502,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,L
3,0.53294,0.744635,0.58067,0.000635,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,M
4,0.760956,0.998122,0.58067,-1.642231,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,H


In [26]:
# checking number of rows(r) and columns(c)  (r,c)
df.shape

(408, 61)

In [27]:
# dropping columns with less importance
df = func_dropCol(df,drop_col)
df.head()

Unnamed: 0,raisedhands,VisITedResources,AnnouncementsView,Discussion,gender_M,NationalITy_Iran,NationalITy_Iraq,NationalITy_Jordan,NationalITy_KW,NationalITy_Lybia,...,Topic_IT,Topic_Math,Topic_Quran,Topic_Science,Topic_Spanish,Relation_Mum,ParentAnsweringSurvey_Yes,ParentschoolSatisfaction_Good,StudentAbsenceDays_Under-7,Class
0,0.283065,0.915333,-0.046205,1.202617,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,H
1,-0.248,0.787987,0.814707,0.738216,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,M
2,0.283065,0.330558,-0.819927,-0.262502,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,L
3,0.53294,0.744635,0.58067,0.000635,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,M
4,0.760956,0.998122,0.58067,-1.642231,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,H


In [28]:
# checking number of rows(r) and columns(c) in (r,c) format
df.shape

(408, 56)

In [29]:
# converting Class feature values from str to number
df.Class = df.Class.apply(lambda x :label_to_key[x]) 
df.head()

Unnamed: 0,raisedhands,VisITedResources,AnnouncementsView,Discussion,gender_M,NationalITy_Iran,NationalITy_Iraq,NationalITy_Jordan,NationalITy_KW,NationalITy_Lybia,...,Topic_IT,Topic_Math,Topic_Quran,Topic_Science,Topic_Spanish,Relation_Mum,ParentAnsweringSurvey_Yes,ParentschoolSatisfaction_Good,StudentAbsenceDays_Under-7,Class
0,0.283065,0.915333,-0.046205,1.202617,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0
1,-0.248,0.787987,0.814707,0.738216,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,2
2,0.283065,0.330558,-0.819927,-0.262502,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.53294,0.744635,0.58067,0.000635,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,2
4,0.760956,0.998122,0.58067,-1.642231,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0


In [30]:
# extracting input and target features
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [31]:
f1 = make_scorer(f1_score, average='micro')

In [32]:
# setting range of hyperparameters for grid search

#var_smoothingfloat, default=1e-9
##Portion of the largest variance of all features that is added to variances for calculation stability.
grid_param = {
    'var_smoothing': np.logspace(0,-9, num=10)}

In [33]:
# grid search cross validation
gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [34]:
# applying random forest with different combinations of hyperparameters 
gd_sr.fit(X, y)

GridSearchCV(cv=5, estimator=GaussianNB(), n_jobs=-1,
             param_grid={'var_smoothing': array([1.e+00, 1.e-01, 1.e-02, 1.e-03, 1.e-04, 1.e-05, 1.e-06, 1.e-07,
       1.e-08, 1.e-09])},
             scoring=make_scorer(f1_score, average=micro))

In [35]:
print (gd_sr.best_score_, gd_sr.best_params_)

0.6789220114423367 {'var_smoothing': 0.1}


In [36]:
save_pkl_model("models/naive_bayes.pkl",best_random)

# 3. algorithms with Normalized and one hot encoded data

In [37]:
df = pd.read_csv("Data/transformed/train_mms_ohe.csv")
print("shape " , df.shape)
df = func_dropCol(df,drop_col)

print("shape after removing columns ", df.shape)
df.Class = df.Class.apply(lambda x :label_to_key[x]) # mapping 
df.head()
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

shape  (408, 61)
shape after removing columns  (408, 56)


## 3.1 Decision Trees 
**Assumptions of algorithm** :

1. Initially, whole training data is considered as root.
2. Records are distributed recursively on the basis of the attribute value.

**Pros** :

1. Compared to other algorithms, data preparation requires less time.
2. Doesn’t require data to be normalized.
3. Missing values, to an extent, don’t affect its performance much.
4. Is very intuitive as can be explained as if-else conditions.

**Cons**:

1. Needs a lot of time to train the model.
2. A small change in data can cause a considerably large change in the Decision Tree structure.
3. Comparatively expensive to train.
4. Not good for regression tasks.

https://www.kdnuggets.com/2021/02/machine-learning-assumptions.html 
 

In [38]:
classifier = DecisionTreeClassifier()

In [39]:
# ??
f1 = make_scorer(f1_score, average='micro')

In [40]:
params = {
    'max_depth': [2, 3, 5, 10, 20],
    'min_samples_leaf': [5, 10, 20, 50, 100],
    'criterion': ["gini", "entropy"]
}

In [41]:
# grid search cross validation
gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=params,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [42]:
# fitting model with different combinations of hyperparameters 
gd_sr.fit(X, y)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [2, 3, 5, 10, 20],
                         'min_samples_leaf': [5, 10, 20, 50, 100]},
             scoring=make_scorer(f1_score, average=micro))

In [43]:
best_random = gd_sr.best_estimator_

In [44]:
save_pkl_model("models/decision_tree.pkl",best_random)

In [45]:
print (gd_sr.best_score_, gd_sr.best_params_)

0.7476663655525445 {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 5}


# 3.2 Random forest

In [46]:
classifier = RandomForestClassifier()

In [47]:
f1 = make_scorer(f1_score, average='micro')

In [48]:
# n_estimatorsint, default=100
###The number of trees in the forest.

#criterion{“gini”, “entropy”}, default=”gini”
###The function to measure the quality of a split. Supported criteria are “gini” for the Gini impurity and “entropy” for the information gain. Note: this parameter is tree-specific.

#bootstrapbool, default=True
###Whether bootstrap samples are used when building trees. If False, the whole dataset is used to build each tree.

#https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html 
grid_param = {
    'n_estimators': [int(x) for x in range(200,2000,200)],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}

In [49]:


gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [50]:
# fitting model with different combinations of hyperparameters 
gd_sr.fit(X, y)

GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400,
                                          1600, 1800]},
             scoring=make_scorer(f1_score, average=micro))

In [51]:
print (gd_sr.best_score_, gd_sr.best_params_,gd_sr.best_estimator_.feature_importances_)

0.8234869015356822 {'bootstrap': False, 'criterion': 'entropy', 'n_estimators': 1600} [1.38814581e-01 1.62327643e-01 1.19231846e-01 8.94267276e-02
 2.65062464e-02 1.65214327e-03 4.76781657e-03 1.27808833e-02
 1.24019184e-02 1.39803966e-03 1.15099585e-03 5.66400406e-03
 4.69628115e-03 1.83480127e-03 2.62303909e-03 7.40261164e-04
 2.21874359e-03 1.24814397e-04 1.61590258e-03 4.60765195e-03
 1.24034781e-02 1.30227664e-02 1.23037441e-03 1.10377801e-03
 2.98157146e-03 4.45442087e-03 1.32362625e-03 1.63571643e-03
 2.13636569e-03 2.90317302e-03 1.01104673e-04 1.02807971e-02
 5.96631302e-05 6.55931264e-03 1.11030524e-02 1.18431257e-02
 8.67728115e-04 1.23461161e-03 2.68345839e-03 1.69186308e-03
 5.02067177e-03 7.34546212e-03 7.47541098e-03 8.43233278e-03
 7.10595737e-03 5.83591711e-03 1.06603019e-02 6.00998079e-03
 3.83395615e-03 7.76446720e-03 4.42674257e-03 3.62736879e-02
 4.10103433e-02 2.93981538e-02 1.35202287e-01]


In [52]:
save_pkl_model("models/random_forest.pkl",gd_sr.best_estimator_)

In [53]:
d = {'Stats':X.columns,'FI':gd_sr.best_estimator_.feature_importances_}
df1 = pd.DataFrame(d)


In [54]:
df1

Unnamed: 0,Stats,FI
0,raisedhands,0.138815
1,VisITedResources,0.162328
2,AnnouncementsView,0.119232
3,Discussion,0.089427
4,gender_M,0.026506
5,NationalITy_Iran,0.001652
6,NationalITy_Iraq,0.004768
7,NationalITy_Jordan,0.012781
8,NationalITy_KW,0.012402
9,NationalITy_Lybia,0.001398


# 3.3 Logistic regression 

In [55]:
classifier = LogisticRegression()

In [56]:
f1 = make_scorer(f1_score, average='micro')

In [57]:
grid_param =  {'penalty' : ['l1', 'l2', 'elasticnet', 'none'],
    'C' : np.logspace(-4, 4, 3),
    'solver' : ['lbfgs','newton-cg','liblinear','sag','saga'],
    'max_iter' : [100, 1000]
    }

In [58]:
# grid search cross validation
gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [59]:
# fitting model with different combinations of hyperparameters 
gd_sr.fit(X, y)

270 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ProBook\AppData\Roaming\Python\Python38\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ProBook\AppData\Roaming\Python\Python38\site-packages\sklearn\linear_model\_logistic.py", line 1461, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\ProBook\AppData\Roaming\Python\Python38\site-packages\sklearn\linear_model\_logistic.py", line 447, in _check_solver
    raise ValueError(
ValueError: Solver lbfgs supports only 'l2' or '

GridSearchCV(cv=5, estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': array([1.e-04, 1.e+00, 1.e+04]),
                         'max_iter': [100, 1000],
                         'penalty': ['l1', 'l2', 'elasticnet', 'none'],
                         'solver': ['lbfgs', 'newton-cg', 'liblinear', 'sag',
                                    'saga']},
             scoring=make_scorer(f1_score, average=micro))

In [60]:
print (gd_sr.best_score_, gd_sr.best_params_,gd_sr.best_estimator_)

0.7622704004817826 {'C': 1.0, 'max_iter': 100, 'penalty': 'l2', 'solver': 'lbfgs'} LogisticRegression()


In [61]:
save_pkl_model("models/logistic_regression.pkl",gd_sr.best_estimator_)

# 3.4 SVM
**Assumptions:**

It assumes data is independent and identically distributed.

**Pros:**

1. Works really well on high dimensional data.
2. Memory efficient.
3. Effective in cases where the number of dimensions is greater than the number of samples.

**Cons:**

1. Not suitable for large datasets.
2. Doesn’t work well when the dataset has noise, i.e., the target classes are overlapping.
3. Slow to train.
4. No probabilistic explanation for classification.

In [62]:
classifier = SVC()

In [63]:
f1 = make_scorer(f1_score, average='micro')

In [64]:
grid_param = { 'C':[0.1,1,100,1000],
'kernel':['rbf','poly','sigmoid','linear'],
'degree':[1,5,6]
}

In [65]:
# grid search cross validation
gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [66]:
# fitting model with different combinations of hyperparameters 
gd_sr.fit(X, y)

GridSearchCV(cv=5, estimator=SVC(), n_jobs=-1,
             param_grid={'C': [0.1, 1, 100, 1000], 'degree': [1, 5, 6],
                         'kernel': ['rbf', 'poly', 'sigmoid', 'linear']},
             scoring=make_scorer(f1_score, average=micro))

In [67]:
print (gd_sr.best_score_, gd_sr.best_params_,gd_sr.best_estimator_)

0.7917193616380608 {'C': 1, 'degree': 1, 'kernel': 'rbf'} SVC(C=1, degree=1)


In [68]:
save_pkl_model("models/svm.pkl",gd_sr.best_estimator_)

# 3.5 XGBOOST

**Assumptions:**

It may have an assumption that encoded integer value for each variable has ordinal relation.

**Pros:**

1. Can work in parallell.
2. Can handle missing values.
3. No need for scaling or normalizing data.
4. Fast to interpret.
5. Great execution speed.

**Cons:**

1. Can easily overfit if parameters are not tuned properly.
2. Hard to tune.

In [69]:
from xgboost import XGBClassifier

In [70]:
classifier = XGBClassifier()

In [71]:
f1 = make_scorer(f1_score, average='micro')

In [72]:
# grid search cross validation
grid_param  = {
    'n_estimators': [100],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [1,2],
    'min_samples_leaf': [1,2],
    'max_leaf_nodes': [4,50,None]
}

In [73]:
gd_sr = GridSearchCV(estimator=classifier,
                     param_grid=grid_param,
                     scoring=f1,
                     cv=5,
                     n_jobs=-1)

In [74]:
# fitting model with different combinations of hyperparameters 
gd_sr.fit(X, y)

Parameters: { "criterion", "max_leaf_nodes", "min_samples_leaf", "min_samples_split" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster=None,
                                     callbacks=None, colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None,
                                     early_stopping_rounds=None,
                                     enable_categorical=False, eval_metric=None,
                                     gamma=None, gpu_id=None, grow_policy=None,
                                     importance_type=None,
                                     interaction_constraints=None,
                                     learning_rate=None, max_bin=None,
                                     max_ca...
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, predictor=None,
                        

In [75]:
print (gd_sr.best_score_, gd_sr.best_params_,gd_sr.best_estimator_)

0.7769948810599218 {'criterion': 'gini', 'max_leaf_nodes': 4, 'min_samples_leaf': 1, 'min_samples_split': 1, 'n_estimators': 100} XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              criterion='gini', early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_bin=256, max_cat_to_onehot=4, max_delta_step=0, max_depth=6,
              max_leaf_nodes=4, max_leaves=0, min_child_weight=1,
              min_samples_leaf=1, min_samples_split=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=0,
              num_parallel_tree=1, ...)


In [76]:
save_pkl_model("models/xgboost.pkl",gd_sr.best_estimator_)

# 3.6 Neural network 