In [2]:
from ptracking.topic.lda import *
import numpy as np
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.naive_bayes import ComplementNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import matthews_corrcoef, accuracy_score
from xgboost import XGBClassifier
from ptracking.topic.lda_tomoto import tomoto_load_model
skf = StratifiedKFold()

In [10]:
topics = tomoto_load_model('C:/Users/mihut/Desktop/petition-tracking/ptracking/topic/tomoto_topic.mdl')[0]

In [11]:
from ptracking.predict import Dataset

dataset = Dataset().prepare(columns=["created_at"]).join(topics)
dataset.sort_values("created_at", inplace=True)

dataset = dataset.reset_index()

In [13]:
X = np.array(dataset.iloc[:,4:].values.tolist())
y = np.array(dataset['class'].values.tolist())

In [14]:
xgb = XGBClassifier()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  xgb.fit(X_train, y_train)
  y_pred = xgb.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.05, 0.03, 0.0, 0.01, 0.03]
0.02


In [48]:
knn = KNeighborsClassifier(n_jobs=-1)
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  knn.fit(X_train, y_train)
  y_pred = knn.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.05, 0.02, 0.0, 0.0, 0.01]
0.02


In [42]:
svc = SVC()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.0, 0.0, 0.0, 0.0, 0.0]
0.0


In [43]:
svc = SVC(class_weight='balanced')
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  svc.fit(X_train, y_train)
  y_pred = svc.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.02, 0.04, 0.03, 0.0, 0.01]
0.02


In [49]:
nb = ComplementNB()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  nb.fit(X_train, y_train)
  y_pred = nb.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.06, 0.07, 0.05, 0.03, 0.07]
0.06


In [50]:
dt = DecisionTreeClassifier()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  dt.fit(X_train, y_train)
  y_pred = dt.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.01, 0.02, 0.01, 0.03, 0.03]
0.02


In [51]:
rf = RandomForestClassifier()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  rf.fit(X_train, y_train)
  y_pred = rf.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))

[0.01, 0.0, 0.02, 0.03, -0.0]
0.01


In [52]:
mlp = MLPClassifier()
scores = list()

for train_index, test_index in skf.split(X, y):
  X_train, X_test = X[train_index], X[test_index]
  y_train, y_test = y[train_index], y[test_index]
  mlp.fit(X_train, y_train)
  y_pred = mlp.predict(X_test)
  scores.append(round(matthews_corrcoef(y_test,y_pred),2))

print(scores)
print(round(np.mean(scores),2))



[0.02, 0.03, 0.04, 0.04, 0.05]
0.04


