In [1]:
import numpy as np
import pandas as pd
import csv
import string
import emoji
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from afinn import Afinn
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
#imports above not used?
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
import joblib

#import data
df_updated = pd.read_csv("processed_data.csv")
print(df_updated.shape[1])

#-----------MODELS AND 10-FOLD CROSS VALIDATION---------------
#-----------HYPERPARAMETER TUNING VIA GRID SEARCH ------------
#-----------SAVING OF EACH RESULT IN JOBLIB FILE--------------

#splitting of data
X = df_updated
scaler = MinMaxScaler(feature_range=(0, 1))
X['sentiment'] = scaler.fit_transform(X[['sentiment_score']])
X_scaled = scaler.fit_transform(X.iloc[:, 21:])
X.iloc[:, 21:] = X_scaled

X = X.drop(['is_fake','TOTAL_TEXT','text_cleaned','sentiment_score','VERIFIED_PURCHASE'], axis=1)
y = df_updated['is_fake']
print(X.dtypes)
X_train, X_valid, y_train, y_valid = train_test_split(X,y,train_size=0.8, test_size=0.2, random_state=None)
kf = KFold(n_splits=10, shuffle=True, random_state=42)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\longb\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\longb\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\longb\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


70
RATING           int64
word_count       int64
caps_count       int64
punct_count      int64
emoji_count      int64
                ...   
46             float64
47             float64
48             float64
49             float64
sentiment      float64
Length: 66, dtype: object


In [3]:
#Naive Bayes <----FIRST--->
param_grid = {'alpha': [0.1, 1.0, 10.0]}
nb_clf = MultinomialNB()
grid_search = GridSearchCV(nb_clf, param_grid=param_grid, cv=kf)
grid_search.fit(X_train, y_train)

best_nb = grid_search.best_estimator_
y_pred = best_nb.predict(X_valid)


# nb_clf.fit(X_train, y_train)
# y_pred = nb_clf.predict(X_valid)

print("Naive Bayes")
print(classification_report(y_valid, y_pred))
print(accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_nb, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

joblib.dump(best_nb, 'best_params_rating/nb_best_params.pkl')

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'NB',
        'Parameters': str(best_nb),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

Naive Bayes
              precision    recall  f1-score   support

           0       0.66      0.32      0.43      2165
           1       0.53      0.83      0.65      2035

    accuracy                           0.56      4200
   macro avg       0.60      0.57      0.54      4200
weighted avg       0.60      0.56      0.53      4200

0.5645238095238095
Mean accuracy score: 0.57
Standard deviation: 0.01
************************


In [4]:
param_grid = {'n_neighbors': [3, 5, 7, 9],
              'metric': ['euclidean', 'manhattan', 'minkowski']}

knn = KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=kf, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_valid)

# knn.fit(X_train, y_train)
# y_pred = knn.predict(X_valid)
print("K Nearest Neighbour")
print(classification_report(y_valid, y_pred))
print("Accuracy:", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_knn, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

joblib.dump(best_knn, 'best_params_rating/knn_best_params.pkl')
print(best_knn)

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'K Nearest Neighbour',
        'Parameters': str(best_knn),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

K Nearest Neighbour
              precision    recall  f1-score   support

           0       0.58      0.61      0.60      2165
           1       0.56      0.53      0.55      2035

    accuracy                           0.57      4200
   macro avg       0.57      0.57      0.57      4200
weighted avg       0.57      0.57      0.57      4200

Accuracy: 0.5723809523809524
Mean accuracy score: 0.57
Standard deviation: 0.01
************************
KNeighborsClassifier(metric='manhattan', n_neighbors=9)


In [3]:
param_grid = {'max_depth': [2, 4, 6, 8],
              'min_samples_split': [2, 4, 6, 8]}
dectreeclf = DecisionTreeClassifier()
grid_search = GridSearchCV(dectreeclf, param_grid=param_grid, cv=kf)
grid_search.fit(X_train, y_train)

best_clf = grid_search.best_estimator_
y_pred = best_clf.predict(X_valid)

# dectreeclf.fit(X_train, y_train)
# y_pred = dectreeclf.predict(X_valid)
print("Decision Tree")
print(classification_report(y_valid, y_pred))
print("Accuracy",accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_clf, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Decision Tree',
        'Parameters': str(best_clf),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_clf, 'dectree_best_params.pkl')

Decision Tree
              precision    recall  f1-score   support

           0       0.60      0.59      0.60      2123
           1       0.59      0.60      0.60      2077

    accuracy                           0.60      4200
   macro avg       0.60      0.60      0.60      4200
weighted avg       0.60      0.60      0.60      4200

Accuracy 0.5985714285714285
Mean accuracy score: 0.59
Standard deviation: 0.01
************************


['dectree_best_params.pkl']

In [4]:
param_grid = {'n_estimators': [50, 100, 200],
              'learning_rate': [0.01, 0.1, 0.5],
              'max_depth': [1, 2, 3, 4]}

grid_search = GridSearchCV(estimator=GradientBoostingClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=kf,
                           n_jobs=-1)

grid_search.fit(X_train, y_train)
best_gb = grid_search.best_estimator_
y_pred = best_gb.predict(X_valid)

# gradientbooster = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42)
# gradientbooster.fit(X_train, y_train)
# y_pred = gradientbooster.predict(X_valid)

print("Gradient Boosting")
print(classification_report(y_valid, y_pred))
print("Accuracy", accuracy_score(y_valid, y_pred))


scores = cross_val_score(best_gb, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Gradient Booster',
        'Parameters': str(best_gb),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_gb, 'best_params_rating/gb_best_params.pkl')

Gradient Boosting
              precision    recall  f1-score   support

           0       0.65      0.65      0.65      2123
           1       0.64      0.65      0.64      2077

    accuracy                           0.65      4200
   macro avg       0.65      0.65      0.65      4200
weighted avg       0.65      0.65      0.65      4200

Accuracy 0.6461904761904762
Mean accuracy score: 0.64
Standard deviation: 0.02
************************


['best_params_rating/gb_best_params.pkl']

In [5]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

rfc = RandomForestClassifier(random_state = 40)
grid_search = GridSearchCV(rfc, param_grid=param_grid, cv=kf, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_


rfc = RandomForestClassifier(**best_params, random_state=42)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_valid)
print("Random Forest")
print(classification_report(y_valid, y_pred))
print("Accuracy", accuracy_score(y_valid, y_pred))

scores = cross_val_score(rfc, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)
print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Random Forest',
        'Parameters': str(best_params),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_params, 'best_params_rating/rf_best_params.pkl')

Random Forest
              precision    recall  f1-score   support

           0       0.67      0.62      0.64      2123
           1       0.64      0.68      0.66      2077

    accuracy                           0.65      4200
   macro avg       0.65      0.65      0.65      4200
weighted avg       0.65      0.65      0.65      4200

Accuracy 0.6511904761904762
Mean accuracy score: 0.64
Standard deviation: 0.01
************************


['best_params_rating/rf_best_params.pkl']

In [6]:
param_grid = {
    'penalty': ['l1', 'l2'],
    'C': [0.1, 1, 10],
}

regressor = LogisticRegression(max_iter=10000, solver='saga')

grid_search = GridSearchCV(regressor, param_grid, cv=kf)
grid_search.fit(X_train, y_train)

best_logreg = grid_search.best_estimator_
# regressor.fit(X_train, y_train)
# y_pred = regressor.predict(X_valid)
y_pred = best_logreg.predict(X_valid)

print("Logistic Regression")
print(classification_report(y_valid, y_pred))
print(accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_logreg, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Logistic Regression',
        'Parameters': str(best_logreg),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_logreg, 'best_params_rating/lr_best_params.pkl')

Logistic Regression
              precision    recall  f1-score   support

           0       0.61      0.50      0.55      2123
           1       0.57      0.68      0.62      2077

    accuracy                           0.59      4200
   macro avg       0.59      0.59      0.58      4200
weighted avg       0.59      0.59      0.58      4200

0.5857142857142857
Mean accuracy score: 0.58
Standard deviation: 0.01
************************


['best_params_rating/lr_best_params.pkl']

In [2]:
svm = LinearSVC()


param_grid = {'penalty': ['l1','l2'],
              'loss': ['hinge', 'squared_hinge'],
              'dual':[False],
              'C': [0.1, 1, 10, 100],
              'max_iter':[10000]
              }

grid_search = GridSearchCV(svm, param_grid, cv=kf)
grid_search.fit(X_train, y_train)
best_svm = grid_search.best_estimator_
y_pred = best_svm.predict(X_valid)


print("SVM")
print(classification_report(y_valid, y_pred))
print("Accuracy:", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_svm, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Linear SVM',
        'Parameters': str(best_svm),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_svm, 'best_params_rating/linearsvm_best_params.pkl')

80 fits failed out of a total of 160.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
40 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\longb\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\longb\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\svm\_classes.py", line 274, in fit
    self.coef_, self.intercept_, n_iter_ = _fit_liblinear(
                                           ^^^^^^^^^^^^^^^
  File "c:\Users\longb\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\svm\_base.py", line 1223, in _fit_liblinear
    solver

SVM
              precision    recall  f1-score   support

           0       0.62      0.64      0.63      2076
           1       0.64      0.62      0.63      2124

    accuracy                           0.63      4200
   macro avg       0.63      0.63      0.63      4200
weighted avg       0.63      0.63      0.63      4200

Accuracy: 0.6290476190476191
Mean accuracy score: 0.62
Standard deviation: 0.01
************************


['best_params_rating/linearsvm_best_params.pkl']

In [3]:
svm = SVC()


param_grid = {'C': [0.1, 1, 10, 100],'gamma': [0.1, 1, 10, 100]}

grid_search = GridSearchCV(svm, param_grid, cv=kf)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_estimator_
y_pred = grid_search.best_estimator_.predict(X_valid)


print("SVM")
print(classification_report(y_valid, y_pred))
print("Accuracy:", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_params, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'Gaussian SVM',
        'Parameters': str(best_params),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_params, 'best_params_rating/gaussiansvm_best_params.pkl')

SVM
              precision    recall  f1-score   support

           0       0.60      0.56      0.58      2076
           1       0.60      0.63      0.61      2124

    accuracy                           0.60      4200
   macro avg       0.60      0.60      0.60      4200
weighted avg       0.60      0.60      0.60      4200

Accuracy: 0.5971428571428572
Mean accuracy score: 0.60
Standard deviation: 0.01
************************


['best_params_rating/gaussiansvm_best_params.pkl']

In [4]:
param_grid = {
    'hidden_layer_sizes': [(10,),(50,)],
    'activation': ['logistic','relu'],
    'alpha': [0.0001, 0.001, 0.01],
    'solver': ['adam','sgd','lbfgs'],
    'learning_rate': ['adaptive','constant'],
    'batch_size': [32, 64, 128],
    'max_iter': [500]
}

mlp = MLPClassifier()
grid_search = GridSearchCV(mlp, param_grid, cv=kf)
grid_search.fit(X_train, y_train)
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_valid)
print(grid_search.best_score_)

print("Multi Layer Perceptron")
print(classification_report(y_valid, y_pred))
print("Accuracy: ", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_mlp, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'MLP1',
        'Parameters': str(best_mlp),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_mlp, 'best_params_rating/mlp1_best_params.pkl')

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

0.6463095238095238
Multi Layer Perceptron
              precision    recall  f1-score   support

           0       0.61      0.74      0.67      2076
           1       0.68      0.54      0.60      2124

    accuracy                           0.64      4200
   macro avg       0.65      0.64      0.64      4200
weighted avg       0.65      0.64      0.64      4200

Accuracy:  0.6388095238095238
Mean accuracy score: 0.65
Standard deviation: 0.01
************************


['best_params_rating/mlp1_best_params.pkl']

In [5]:
param_grid = {
    'hidden_layer_sizes': [(10,),(50,)],
    'activation': ['logistic'],
    'alpha': [0.0001, 0.001, 0.01],
    'solver': ['adam'],
    'learning_rate': ['adaptive'],
    'batch_size': [32, 64, 128],
    'max_iter': [1000,2000]
}

mlp = MLPClassifier()
grid_search = GridSearchCV(mlp, param_grid, cv=kf)
grid_search.fit(X_train, y_train)
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_valid)
print(grid_search.best_score_)

print("Multi Layer Perceptron")
print(classification_report(y_valid, y_pred))
print("Accuracy: ", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_mlp, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'MLP2',
        'Parameters': str(best_mlp),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_mlp, 'best_params_rating/mlp1_best_params.pkl')

0.6470238095238094
Multi Layer Perceptron
              precision    recall  f1-score   support

           0       0.67      0.59      0.63      2076
           1       0.64      0.71      0.67      2124

    accuracy                           0.65      4200
   macro avg       0.65      0.65      0.65      4200
weighted avg       0.65      0.65      0.65      4200

Accuracy:  0.6519047619047619
Mean accuracy score: 0.65
Standard deviation: 0.01
************************


['best_params_rating/mlp1_best_params.pkl']

In [6]:
param_grid = {
    'hidden_layer_sizes': [(100,)],
    'activation': ['logistic'],
    'alpha': [0.0001],
    'solver': ['adam'],
    'learning_rate': ['adaptive'],
    'batch_size': [32, 64, 128],
    'max_iter': [500,1000,2000]
}

mlp = MLPClassifier()
grid_search = GridSearchCV(mlp, param_grid, cv=kf)
grid_search.fit(X_train, y_train)
best_mlp = grid_search.best_estimator_
y_pred = best_mlp.predict(X_valid)
print(grid_search.best_score_)

print("Multi Layer Perceptron")
print(classification_report(y_valid, y_pred))
print("Accuracy: ", accuracy_score(y_valid, y_pred))

scores = cross_val_score(best_mlp, X, y, cv=kf)
mean_score = np.mean(scores)
std_score = np.std(scores)

print(f"Mean accuracy score: {mean_score:.2f}")
print(f"Standard deviation: {std_score:.2f}")
print("************************")

with open('results_rating.csv', 'a', newline='') as csvfile:
    fieldnames = ['Model', 'Parameters', 'Accuracy', 'Mean Score', 'Std Score']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writerow({
        'Model': 'MLP3',
        'Parameters': str(best_mlp),
        'Accuracy': accuracy_score(y_valid, y_pred),
        'Mean Score': mean_score,
        'Std Score': std_score
    })

joblib.dump(best_mlp, 'best_params_rating/mlp3_best_params.pkl')

0.6492261904761905
Multi Layer Perceptron
              precision    recall  f1-score   support

           0       0.67      0.56      0.61      2076
           1       0.63      0.73      0.68      2124

    accuracy                           0.65      4200
   macro avg       0.65      0.65      0.64      4200
weighted avg       0.65      0.65      0.64      4200

Accuracy:  0.6471428571428571
Mean accuracy score: 0.64
Standard deviation: 0.01
************************


['best_params_rating/mlp3_best_params.pkl']