# Personalized emotion forecasting methods using NLP and ML
## Model training

In [None]:
!pip install optuna

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import json

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import KFold

from sklearn.pipeline import Pipeline

from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from sklearn.neighbors import LocalOutlierFactor

from sklearn.feature_selection import SelectKBest, chi2, f_classif 
from sklearn.feature_selection import mutual_info_classif

from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neighbors import KNeighborsClassifier
from lightgbm import LGBMClassifier

from sklearn.decomposition import PCA

import optuna

In [None]:
SEED = 0

## Data load

In [None]:
X = pd.read_csv('daily_summary_&_emotions.csv', index_col=0)
y = np.load('y_sad.npy')

In [None]:
print(X.shape)
print(y.shape)

In [None]:
X_0 = X.fillna(0)

In [None]:
out_detector = LocalOutlierFactor()
outliers = out_detector.fit_predict(X_0)

In [None]:
outliers_idx = np.where(outliers == -1)[0]

In [None]:
X_clean = X.drop(outliers_idx, axis=0)

In [None]:
y_clean = np.delete(y, outliers==-1)

In [None]:
print(X_clean.shape)
print(y_clean.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_clean, 
                                                    test_size=0.2, shuffle=True,
                                                    random_state=SEED)

In [None]:
s_imp_mean = SimpleImputer(strategy='mean')
s_imp_median = SimpleImputer(strategy='median')
s_imp_mode = SimpleImputer(strategy='most_frequent')
it_imp = IterativeImputer(random_state=SEED)

X_train_inputs = {
    '0': X_train.fillna(0),
    'mean': s_imp_mean.fit_transform(X_train),
    'median': s_imp_median.fit_transform(X_train),
    'most_frequent': s_imp_mode.fit_transform(X_train),
    'iterative': it_imp.fit_transform(X_train),
}

X_test_inputs = {
    '0': X_test.fillna(0),
    'mean': s_imp_mean.transform(X_test),
    'median': s_imp_median.transform(X_test),
    'most_frequent': s_imp_mode.transform(X_test),
    'iterative': it_imp.transform(X_test),
}

method_selection = {
    'f_classif': f_classif, 
    'mutual_info_classif': mutual_info_classif,
}

In [None]:
std = StandardScaler().fit(X_train_inputs['0'])
pca = PCA()
X_train_pca = std.transform(X_train_inputs['0'])
X_train_pca = pca.fit_transform(X_train_pca)

In [None]:
var_90_expl = np.argmax(pca.explained_variance_ratio_.cumsum() > 0.90)

In [None]:
X_test_pca = pca.transform(std.transform(X_test_inputs['0']))

In [None]:
X_train_inputs['pca'] = X_train_pca[:, :var_90_expl]
X_test_inputs['pca'] = X_test_pca[:, :var_90_expl]

In [None]:
cv_grid = KFold(n_splits=5, shuffle=True)

## Random Forest Classifier

In [None]:
def rfc_objective(trial):
    
    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs[method]

    #sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  #'mutual_info_classif'])
    #k = trial.suggest_int('k', 1, 27)
    bootstrap = trial.suggest_categorical('boostrap', [True, False])
    max_depth = trial.suggest_int('max_depth', 1, 20)
    n_estimators = trial.suggest_int('n_estimators', 1, 300)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 50)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])

    cv_grid = KFold(n_splits=5, shuffle=True)

    #print(f'imputation: {method}, selected_categorical: {sel}, number parameters: {k}, max depth: {max_depth}, n estimators: {n_estimators}, min_samples_split: {min_samples_split}')
    
    '''clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                    ('clf', RandomForestClassifier(max_depth=max_depth, 
                                                   bootstrap=bootstrap,
                                                   n_estimators=n_estimators,
                                                   criterion=criterion,
                                                   min_samples_split=min_samples_split))])'''

    clf = RandomForestClassifier(max_depth=max_depth, bootstrap=bootstrap,
                                 n_estimators=n_estimators, criterion=criterion,
                                 min_samples_split=min_samples_split)
    
    scores = cross_val_score(clf, X_train_aux, y_train, scoring=make_scorer(f1_score), 
                            n_jobs=-1, cv=cv_grid)
            
    return scores.mean()

In [None]:
budget = 50
np.random.seed(SEED)
study = optuna.create_study(direction='maximize')
study.optimize(rfc_objective, n_trials=budget)

Save the results of the best hyperparameter combination

In [None]:
print(study.best_params)
with open('rfc_best_params.json', 'w') as fp:
    json.dump(study.best_params, fp, indent=4)

In [None]:
with open('rfc_best_params.json', 'r') as fp:
    rfc_params = json.load(fp)
'''print(rfc_params)
rfc = Pipeline([
                ('select', SelectKBest(
                    method_selection[rfc_params['selection']], 
                    rfc_params['k'])),
                ('clf', RandomForestClassifier(max_depth=rfc_params['max_depth'],
                                               min_samples_split=rfc_params['min_samples_split'],
                                               n_estimators=rfc_params['n_estimators']))
                ])
rfc.fit(X_train_inputs[rfc_params['imputation']], y_train)'''

In [None]:
rfc_params

In [None]:
rfc = RandomForestClassifier(n_estimators=rfc_params['n_estimators'], 
                             criterion=rfc_params['criterion'],                       
                             max_depth=rfc_params['max_depth'],
                             min_samples_split=rfc_params['min_samples_split'])

In [None]:
rfc.fit(X_train_inputs['0'], y_train)

In [None]:
report = classification_report(y_test,
                          rfc.predict(X_test_inputs[rfc_params['imputation']]),
                          target_names=['happy', 'sad'], output_dict=True)

In [None]:
df = pd.DataFrame(report).transpose()

In [None]:
df.to_csv('report_classification.csv')

## Decision Tree

In [None]:
def dtc_objective(trial):
    
    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs[method]

    '''sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)'''
    max_depth = trial.suggest_int('max_depth', 1, 20)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 50)
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    splitter = trial.suggest_categorical('splitter', ['best', 'random'])

    cv_grid = KFold(n_splits=5, shuffle=True)
    
    '''clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                    ('clf', DecisionTreeClassifier(criterion=criterion, 
                                                   splitter=splitter,
                                                   max_depth=max_depth, 
                                                   min_samples_split=min_samples_split))
                    ])'''
    clf = DecisionTreeClassifier(criterion=criterion, 
                                                   splitter=splitter,
                                                   max_depth=max_depth, 
                                                   min_samples_split=min_samples_split)
    
    scores = cross_val_score(clf, X_train_aux, y_train, 
                             scoring=make_scorer(f1_score), 
                             n_jobs=-1, cv=cv_grid)
            
    return scores.mean()

In [None]:
budget = 50
np.random.seed(SEED)
study_dtc = optuna.create_study(direction='maximize')
study_dtc.optimize(dtc_objective, n_trials=budget)

In [None]:
print(study_dtr.best_params)
with open('dtc_best_params.json', 'w') as fp:
    json.dump(study_dtr.best_params, fp, indent=4)

In [None]:
with open('dtc_best_params.json', 'r') as fp:
    dtr_params = json.load(fp)
dtr_params

In [None]:
dtc = DecisionTreeClassifier(criterion=dtr_params['criterion'],
                             min_samples_split=dtr_params['min_samples_split'],
                             max_depth=dtr_params['max_depth'],
                             splitter=dtr_params['splitter'])
dtc.fit(X_train_inputs['0'], y_train)

In [None]:
print(classification_report(y_test, dtc.predict(X_test_inputs['0'])))

## Extremely Randomized Trees

In [None]:
def xrt_objective(trial):

    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs[method]

    """sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)"""
    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy'])
    bootstrap = trial.suggest_categorical('bootstrap', [True, False])
    max_depth = trial.suggest_int('max_depth', 1, 20)
    n_estimators = trial.suggest_int('n_estimators', 1, 300)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 50)

    cv_grid = KFold(n_splits=5, shuffle=True)
    
    clf = ExtraTreesClassifier(max_depth=max_depth,
                               n_estimators=n_estimators,
                               criterion=criterion,
                               bootstrap=bootstrap,
                               min_samples_split=min_samples_split)
    
    scores = cross_val_score(clf, X_train_aux, y_train, 
                             scoring=make_scorer(f1_score), 
                             n_jobs=-1, cv=cv_grid)
            
    return scores.mean()

In [None]:
budget = 50
np.random.seed(SEED)
study_xrt = optuna.create_study(direction='maximize')
study_xrt.optimize(xrt_objective, n_trials=budget)

In [None]:
print(study_xrt.best_params)
with open('xrt_best_params.json', 'w') as fp:
    json.dump(study_xrt.best_params, fp, indent=4)


In [None]:
with open('xrt_best_params.json', 'r') as fp:
    xrt_params = json.load(fp)
print(xrt_params)

In [None]:
print(xrt_params)
xrt = ExtraTreesClassifier(max_depth=xrt_params['max_depth'],
                           n_estimators=xrt_params['n_estimators'],
                           criterion=xrt_params['criterion'],
                           bootstrap=xrt_params['bootstrap'],
                           min_samples_split=xrt_params['min_samples_split'])
xrt.fit(X_train_inputs[xrt_params['imputation']], y_train)

In [None]:
print(classification_report(y_test, xrt.predict(X_test_inputs[xrt_params['imputation']])))

## SVC

In [None]:
s_imp_mean = SimpleImputer(strategy='mean')
s_imp_median = SimpleImputer(strategy='median')
s_imp_mode = SimpleImputer(strategy='most_frequent')
it_imp = IterativeImputer(random_state=SEED)

sc_inputs = {
    '0': StandardScaler().fit(X_train.fillna(0)),
    'mean': StandardScaler().fit(s_imp_mean.fit_transform(X_train)),
    'median': StandardScaler().fit(s_imp_median.fit_transform(X_train)),
    'most_frequent': StandardScaler().fit(s_imp_mode.fit_transform(X_train)),
    'iterative': StandardScaler().fit(it_imp.fit_transform(X_train)),
}

X_train_inputs_sc = {
    '0': sc_inputs['0'].transform(X_train.fillna(0)),
    'mean': sc_inputs['mean'].transform(s_imp_mean.fit_transform(X_train)),
    'median': sc_inputs['median'].transform(s_imp_median.fit_transform(X_train)),
    'most_frequent': sc_inputs['most_frequent'].transform(s_imp_mode.fit_transform(X_train)),
    'iterative': sc_inputs['iterative'].transform(it_imp.fit_transform(X_train)),
    'pca': X_train_pca[:, :18]
}

X_test_inputs_sc = {
    '0': sc_inputs['0'].transform(X_test.fillna(0)),
    'mean': sc_inputs['mean'].transform(s_imp_mean.transform(X_test)),
    'median': sc_inputs['median'].transform(s_imp_median.transform(X_test)),
    'most_frequent': sc_inputs['most_frequent'].transform(s_imp_mode.transform(X_test)),
    'iterative': sc_inputs['iterative'].transform(it_imp.transform(X_test)),
    'pca': X_test_pca[:, :18],
}

method_selection_sc = {
    'f_classif': f_classif, 
    'mutual_info_classif': mutual_info_classif,
}

In [None]:
def svc_objective(trial):

    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs_sc[method]

    sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)
    C = trial.suggest_categorical('C', [1e-3, 1e-2, 1e-1, 1e1, 1e2, 1e3])
    #kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 
                                                  #'sigmoid', 'precomputed'])
    kernel = 'rbf'
    
    #cv_grid = KFold(n_splits=5, shuffle=True)

    if kernel == 'poly':
        degree = trial.suggest_int('degree', 1, 10)
    
    if kernel == ('poly' or 'rbf' or 'sigmoid'):
        gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])
        clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                    ('clf', SVC(C=C, kernel=kernel, degree=degree, gamma=gamma))
                       ])
    else:
        clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                        ('clf', SVC(C=C, kernel=kernel))])
    
    scores = cross_val_score(clf, X_train_aux, y_train, scoring=make_scorer(f1_score), 
                            n_jobs=-1, cv=2)
            
    return scores.mean()

In [None]:
budget = 30
np.random.seed(SEED)
study_svc = optuna.create_study(direction='maximize')
study_svc.optimize(svc_objective, n_trials=budget)

In [None]:
print(study_svc.best_params)
with open('svc_best_params.json', 'w') as fp:
    json.dump(study_svc.best_params, fp, indent=4)

In [None]:
with open('svc_best_params.json', 'r') as fp:
    svm_params = json.load(fp)
svm_params

In [None]:
svm = Pipeline([('select', SelectKBest(method_selection[svm_params['selection']], 
                                       k=svm_params['k'])), 
                ('clf', SVC(C=svm_params['C'], kernel='rbf'))])\
                .fit(X_train_inputs['median'], y_train)

In [None]:
print(classification_report(y_test, svm.predict(X_test_inputs['median'])))

## KNN

In [None]:
def knn_objective(trial):

    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs_sc[method]

    sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)
    n_neighbors = trial.suggest_int('n_neighbors', 1, 50)
    weights = trial.suggest_categorical('weights', ['uniform', 'distance'])
    p = trial.suggest_int('p', 1, 2)


    clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                    ('clf', KNeighborsClassifier(n_neighbors=n_neighbors, 
                                                 weights=weights,
                                                 p=p))])
    
    scores = cross_val_score(clf, X_train_aux, y_train, 
                             scoring=make_scorer(f1_score), n_jobs=-1, cv=2)
            
    return scores.mean()

In [None]:
budget = 50
np.random.seed(SEED)
study_knn = optuna.create_study(direction='maximize')
study_knn.optimize(knn_objective, n_trials=budget)

In [None]:
print(study_knn.best_params)
with open('knn_best_params.json', 'w') as fp:
    json.dump(study_knn.best_params, fp, indent=4)

In [None]:
with open('knn_best_params.json', 'r') as fp:
    knn_params = json.load(fp)
knn_params

In [None]:
knn = Pipeline([('select', SelectKBest(method_selection[knn_params['selection']], k=knn_params['k'])), 
                    ('clf', KNeighborsClassifier(n_neighbors=knn_params['n_neighbors'], 
                                                 weights=knn_params['weights'],
                                                 p=knn_params['p']))])
knn.fit(X_train_inputs['mean'], y_train)

In [None]:
print(classification_report(y_test, knn.predict(X_test_inputs['mean'])))

## Logistic Regression

In [None]:
def logis_objective(trial):

    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative'])
    X_train_aux = X_train_inputs_sc[method]

    sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)
    C = trial.suggest_float('C', 1e-3, 1000)
    solver = trial.suggest_categorical('solver', ['newton-cg', 'lbfgs', 
                                                  'sag', 'saga'])
    max_iter = trial.suggest_int('max_iter', 100, 10000)
    
    cv_grid = KFold(n_splits=5, shuffle=True)

    clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                ('clf', LogisticRegression(C=C, solver=solver, max_iter=max_iter))])
    
    scores = cross_val_score(clf, X_train_aux, y_train, 
                             scoring=make_scorer(f1_score), 
                            n_jobs=-1, cv=cv_grid)
    
            
    return scores.mean()

In [None]:
budget = 20
np.random.seed(SEED)
study_logis = optuna.create_study(direction='maximize')
study_logis.optimize(logis_objective, n_trials=budget)

In [None]:
print(study_logis.best_params)
with open('logis_best_params.json', 'w') as fp:
    json.dump(study_logis.best_params, fp, indent=4)

In [None]:
with open('logis_best_params.json', 'r') as fp:
    logis_params = json.load(fp)
print(logis_params)
logic = Pipeline([
                ('select', SelectKBest(method_selection[logis_params['selection']], 
                                       logis_params['k'])),
                ('clf', LogisticRegression(C=logis_params['C'], 
                                           solver=logis_params['solver'], 
                                           #class_weight='balanced',
                                           max_iter=logis_params['max_iter']))])
logic.fit(X_train_inputs_sc[logis_params['imputation']], y_train)

In [None]:
logic.score(X_test_inputs_sc[logis_params['imputation']], y_test)

In [None]:
print(classification_report(y_test, logic.predict(X_test_inputs_sc[logis_params['imputation']])))

## Neuronal Network

In [None]:
X_test.shape

In [None]:
def nn_objective(trial):

    method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_aux = X_train_inputs_sc[method]

    sel = trial.suggest_categorical('selection', ['f_classif', 
                                                  'mutual_info_classif'])
    k = trial.suggest_int('k', 1, 27)

    n_layers = trial.suggest_int('n_layers', 1, 3)
    layers = tuple([trial.suggest_int('layers', 10, 300) for i in range(n_layers)])
    if n_layers == 1:
        layers = (trial.suggest_int('layer1', 10, 300))
    elif n_layers == 2:
        layers = (trial.suggest_int('layer1', 10, 300), 
                  trial.suggest_int('layer2', 10, 300))
    else:
        layers = (trial.suggest_int('layer1', 10, 300), 
                  trial.suggest_int('layer2', 10, 300), 
                  trial.suggest_int('layer3', 10, 300))
        
    activation = trial.suggest_categorical('activation', ['logistic', 'tanh', 'relu'])
    max_iter = trial.suggest_int('max_iter', 100, 100)
    learning_rate_init = trial.suggest_categorical('learning_rate_init', [1e-5, 1e-4, 1e-3, 
                                                                          1e-2, 1e-1])
    
    cv_grid = KFold(n_splits=5, shuffle=True)

    clf = Pipeline([('select', SelectKBest(method_selection[sel], k=k)), 
                ('clf', MLPClassifier(hidden_layer_sizes=layers, solver='adam', 
                                      max_iter=max_iter, activation=activation,
                                      learning_rate_init = learning_rate_init))])
    
    scores = cross_val_score(clf, X_train_aux, y_train, scoring=make_scorer(f1_score), 
                            n_jobs=-1, cv=cv_grid)
    
            
    return scores.mean()

In [None]:
budget = 25
np.random.seed(SEED)
study_nn = optuna.create_study(direction='maximize')
study_nn.optimize(nn_objective, n_trials=budget)

In [None]:
print(study_nn.best_params)
with open('nn_best_params.json', 'w') as fp:
    json.dump(study_nn.best_params, fp, indent=4)

In [None]:
{'imputation': '0', 'selection': 'f_classif', 'k': 18, 'n_layers': 3, 'layers': 76, 'max_iter': 3345, 'learning_rate_init': 0.04433049412223602}

In [None]:
with open('nn_best_params.json', 'r') as fp:
    nn_params = json.load(fp)
print(nn_params)
nn = Pipeline([
                ('select', SelectKBest(method_selection[nn_params['selection']], 
                                       nn_params['k'])),
                ('clf', MLPClassifier(hidden_layer_sizes=(nn_params['layer1'], nn_params['layer2']), solver='adam', 
                                      max_iter=nn_params['max_iter'], 
                                      learning_rate_init = nn_params['learning_rate_init']))])
nn.fit(X_train_inputs_sc[nn_params['imputation']], y_train)

In [None]:
print(classification_report(y_test, nn.predict(X_test_inputs_sc[nn_params['imputation']])))

## LGBM Classifier 

In [None]:
def lgbm_objective(trial):
    
    imp_method = trial.suggest_categorical('imputation', 
                                       ['0', 'mean', 'median', 
                                        'most_frequent', 'iterative', 'pca'])
    X_train_imp = X_train_inputs[imp_method]

    boosting_type = trial.suggest_categorical('boosting_type', ['gbdt', 'dart',
                                                                'goss'])

    num_leaves = trial.suggest_categorical('num_leaves', range(10, 51, 5))
    #num_leaves = 31
    learning_rate = trial.suggest_categorical('learning_rate', [0.1, 0.01, 
                                                                0.001, 0.0001])
    n_estimators = trial.suggest_categorical('n_estimators', range(50, 801, 25))
    class_weight = trial.suggest_categorical('class_weigth', [None, 'balanced']) 
    #class_weight = None

    clf = LGBMClassifier(boosting_type=boosting_type, 
                         num_leaves=num_leaves, learning_rate=learning_rate,
                         n_estimators=n_estimators, class_weight=class_weight,
                         random_state=SEED)
    
    '''print('boosting_type', boosting_type, 'num_leaves', num_leaves,
          'learning_rate', learning_rate, 'n_estimators', n_estimators, 
          'class_weight', class_weight)'''
    
    scores = cross_val_score(clf, X_train_imp, y_train, 
                             scoring=make_scorer(f1_score), 
                             n_jobs=-1, cv=cv_grid)
    
    return scores.mean()

In [None]:
budget = 30
np.random.seed(SEED)
study_lgbm = optuna.create_study(direction='maximize')
study_lgbm.optimize(lgbm_objective, n_trials=budget)

In [None]:
print(study_lgbm.best_params)
with open('lgbm_best_params.json', 'w') as fp:
    json.dump(study_lgbm.best_params, fp, indent=4)

In [None]:
with open('lgbm_best_params.json', 'r') as fp:
    lgbm_params = json.load(fp)
lgbm_params

In [None]:
lgbm = LGBMClassifier(boosting_type=lgbm_params['boosting_type'],
                      num_leaves=lgbm_params['num_leaves'], 
                      learning_rate=lgbm_params['learning_rate'],
                      n_estimators=lgbm_params['n_estimators'], 
                      class_weight=None,
                      random_state=SEED)
lgbm.fit(X_train_inputs['iterative'], y_train)

In [None]:
print(classification_report(y_test, lgbm.predict(X_test_inputs['iterative'])))