##### IMPORTS

In [33]:
from ptracking.topic.lda_tomoto import *
from ptracking.predict import Dataset
from ptracking.sentiment.corenlp import sentiment, ner
from ptracking.database.database import Fetcher
from ptracking.twitter_scraper.twitter_scraper import TwitterFetcher

import numpy as np

from imblearn.over_sampling import SMOTE

from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit
from sklearn.naive_bayes import ComplementNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, ConfusionMatrixDisplay

from xgboost import XGBClassifier

skf = StratifiedKFold()
tss = TimeSeriesSplit()

Get features

In [None]:
topics, _ = tomoto_topics(30,30, gvmt_period='second')
sentiments = sentiment()
named_ent = ner()
same_day = Fetcher().number_of_petitions_on_same_day()
twitter = TwitterFetcher().get_twitter_features()

#### Baseline

Create Baseline Dataset

In [None]:
dataset = topics.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

#### Baseline + Sentiments

Create Baseline + Sentiments Dataset

In [None]:
data = topics.join(sentiments)

dataset = data.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

#### Baseline + Sentiments + NER

Create Baseline + Sentiments + NER Dataset

In [None]:
data = topics.join(sentiments).join(ner)

dataset = data.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

#### Baseline + Sentiments + NER + Petitions on same day

Create Baseline + Sentiments + NER + Petitions on same day Dataset

In [None]:
data = topics.join(sentiments).join(ner).join(same_day)

dataset = data.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

#### Baseline + Sentiments + NER + Petitions on same day + Debate count + Google trends

Create Baseline + Sentiments + NER + Petitions on same day + Debate count Dataset

In [None]:
data = topics.join(sentiments).join(ner).join(same_day).join(debate_count).join(trends)

dataset = data.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

#### Baseline + Sentiments + NER + Petitions on same day + Debate count + Google trends + Twitter features

Create Baseline + Sentiments + NER + Petitions on same day + Debate count + Twitter features Dataset

In [None]:
data = topics.join(sentiments).join(ner).join(same_day).join(debate_count).join(trends).join(twitter)

dataset = data.join(Dataset().prepare(columns=["created_at"]))
dataset.sort_values("created_at", inplace=True)
dataset = dataset.reset_index()

X = np.array(dataset.iloc[:,1:-3].values.tolist())
y = np.array(dataset['class'].values.tolist())

##### Classifiers trained with stratified cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

##### Classifiers trained with timesplit cross validation

KNN

In [None]:
knn = KNeighborsClassifier(n_jobs=-1)
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(knn, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

NB

In [None]:
nb = ComplementNB()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(nb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

SVC

In [None]:
svc = SVC(class_weight='balanced')
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(svc, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

DT

In [None]:
dt = DecisionTreeClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(dt, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

RF

In [None]:
rf = RandomForestClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(rf, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

GB

In [None]:
gb = XGBClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  gb.fit(X_train, y_train)
  y_pred = gb.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(gb, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))

MLP

In [None]:
mlp = MLPClassifier()
mcc_scores = list()
acc_scores = list()
f1_scores = list()

for train_index, test_index in tss.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  mcc_scores.append(round(matthews_corrcoef(y_test,y_pred),2))
  acc_scores.append(round(accuracy_score(y_test,y_pred),2))
  f1_scores.append(round(f1_score(y_test,y_pred, average='weighted'),2))
  ConfusionMatrixDisplay.from_estimator(mlp, X_test, y_test)

print("MCC", round(np.mean(mcc_scores),2))
print("Accuracy", round(np.mean(acc_scores),2))
print("F1", round(np.mean(f1_scores),2))