# Scikit-learn solution

In [2]:
import pandas as pd

X_train = pd.read_csv('data/processed/X_train.csv')
y_train = pd.read_csv('data/processed/y_train.csv')

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score, KFold


algorithms = [
    LogisticRegression(max_iter=400),
    SGDClassifier(),
    Perceptron(),
    SVC(),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    GaussianNB(),
]
kfold = KFold(n_splits=5, shuffle=True)

model_scores = {}
for model in algorithms:
  scores = cross_val_score(model, X_train, y_train.values.ravel(), cv=kfold, scoring='precision')
  model_scores[type(model).__name__] = scores.mean()

model_scores = dict(sorted(model_scores.items(), key=lambda item: item[1], reverse=True))

print("{:^25} | {:^20}".format('Model', 'Mean CV Precision Score'))
for k, v in model_scores.items():
  print("{:<25} | {:<20}".format(k, v))

          Model           | Mean CV Precision Score
SVC                       | 0.8577441077441078  
SGDClassifier             | 0.7796813984891063  
LogisticRegression        | 0.778693024377044   
RandomForestClassifier    | 0.7531565729047072  
DecisionTreeClassifier    | 0.746065547998459   
GaussianNB                | 0.7248897187992036  
Perceptron                | 0.63914842105901    
