## Vectorizers + Models
#### Currency exchange topic

### Load

Tokens

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
import pandas as pd
import numpy as np

In [6]:
tokens_izq = pd.read_pickle('tokens/topic_tokens_izq_new.pkl')
tokens_der = pd.read_pickle('tokens/topic_tokens_der_new.pkl')

In [7]:
tokens_izq_ex = tokens_izq[tokens_izq.topics == 'exchange'].drop('topics', axis=1)
tokens_izq_ex.insert(loc=0, column='publication', value='left')

In [8]:
tokens_der_ex = tokens_der[tokens_der.topics == 'exchange'].drop('topics', axis=1)
tokens_der_ex.insert(loc=0, column='publication', value='right')

In [9]:
tokens = tokens_izq_ex.append(tokens_der_ex)
tokens.reset_index(inplace=True)
tokens.drop('index', axis=1, inplace=True)
tokens.fillna(0, inplace=True)

In [10]:
tokens.sample(5)

Unnamed: 0,publication,abajo,abarca,abastecimiento,abc,abierta,abiertamente,abierto,abiertos,abre,...,vuelva,vuelvan,vulgarmente,woyecheszen,xx,yaguarete,york,yuanes,zamora,zonas
8,left,0,0.0,0,0.0,0,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,left,0,0.0,0,0.0,0,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25,left,0,0.0,0,0.0,0,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24,left,0,0.0,0,0.0,0,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
29,right,0,0.0,0,0.0,0,0.0,0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Train - Test Split

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
# data balance
tokens.publication.value_counts(normalize=True)

right    0.535714
left     0.464286
Name: publication, dtype: float64

In [13]:
X = tokens.drop('publication', axis=1)

In [14]:
y = tokens['publication']

In [15]:
train, test, y_train, y_test = train_test_split(X, y, train_size=0.75, stratify=tokens.publication)

Shapes and balance

In [16]:
# shape train
display(train.shape[0],
        train.shape[0] / X.shape[0])

42

0.75

In [17]:
# shape test
display(test.shape[0],
        test.shape[0] / X.shape[0])

14

0.25

In [18]:
# data balance - test
display(y_train.value_counts(normalize=True), 
        y_test.value_counts(normalize=True))

right    0.52381
left     0.47619
Name: publication, dtype: float64

right    0.571429
left     0.428571
Name: publication, dtype: float64

Vectorización con TF-IDF

In [19]:
from sklearn.feature_extraction.text import TfidfTransformer

In [20]:
tfidf_vector = TfidfTransformer().fit(train)
train_tfidf = tfidf_vector.transform(train)

In [21]:
test_tfidf = tfidf_vector.transform(test)

#### Models calling

In [22]:
from sklearn.naive_bayes import MultinomialNB

In [23]:
from sklearn.linear_model import LogisticRegression, SGDClassifier

In [24]:
from sklearn.neighbors import KNeighborsClassifier

In [25]:
from sklearn.tree import DecisionTreeClassifier

In [26]:
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier

In [27]:
from xgboost import XGBClassifier

### GridSearchCV (Vectorization + Models)

In [28]:
from sklearn.metrics import accuracy_score

In [29]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold

In [30]:
folds = StratifiedKFold(n_splits=5, random_state=19, shuffle=True)

### 1. Naive Bayes

CountVectorizer

In [31]:
parameters = {
        'alpha': (1e-2, 1e-3, 1e-1),
    }

In [32]:
grid_mnb = GridSearchCV(MultinomialNB(), parameters, cv=folds, scoring='accuracy')

In [33]:
grid_mnb.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=MultinomialNB(),
             param_grid={'alpha': (0.01, 0.001, 0.1)}, scoring='accuracy')

In [34]:
mnb_train_score_cv = grid_mnb.score(train, y_train)
mnb_test_score_cv = accuracy_score(grid_mnb.predict(test), y_test)

In [35]:
print(f'''Best parameters: {grid_mnb.best_params_}
Best cv score: {grid_mnb.best_score_}
Train score: {mnb_train_score_cv}
Test test score: {mnb_test_score_cv}''')

Best parameters: {'alpha': 0.001}
Best cv score: 1.0
Train score: 1.0
Test test score: 0.9285714285714286


Tfidfvectorizer

In [36]:
grid_mnb.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=MultinomialNB(),
             param_grid={'alpha': (0.01, 0.001, 0.1)}, scoring='accuracy')

In [37]:
mnb_train_score_td = grid_mnb.score(train_tfidf, y_train)
mnb_test_score_td = accuracy_score(grid_mnb.predict(test_tfidf), y_test)

In [38]:
print(f'''Best parameters: {grid_mnb.best_params_}
Best cv score: {grid_mnb.best_score_}
Train score: {mnb_train_score_td}
Test test score: {mnb_test_score_td}''')

Best parameters: {'alpha': 0.001}
Best cv score: 1.0
Train score: 1.0
Test test score: 0.9285714285714286


### 2. LogisticRegression

CountVectorizer

In [39]:
parameters = {
        'C': [1, 10, 100, 1000],
        'penalty': ['l1', 'l2',],
        'solver': ['saga']
    }

In [40]:
grid_log = GridSearchCV(LogisticRegression(), parameters, cv=folds, scoring='accuracy')

In [41]:
grid_log.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=LogisticRegression(),
             param_grid={'C': [1, 10, 100, 1000], 'penalty': ['l1', 'l2'],
                         'solver': ['saga']},
             scoring='accuracy')

In [42]:
log_cv_train_score = grid_log.score(train, y_train)
log_cv_test_score = accuracy_score(grid_log.predict(test), y_test)

In [43]:
print(f'''Best parameters: {grid_log.best_params_}
Best cv score: {grid_log.best_score_}
Train score: {log_cv_train_score}
Test test score: {log_cv_test_score}''')

Best parameters: {'C': 10, 'penalty': 'l1', 'solver': 'saga'}
Best cv score: 0.925
Train score: 1.0
Test test score: 1.0


Tfidfvectorizer

In [44]:
grid_log.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=LogisticRegression(),
             param_grid={'C': [1, 10, 100, 1000], 'penalty': ['l1', 'l2'],
                         'solver': ['saga']},
             scoring='accuracy')

In [45]:
log_td_train_score = grid_log.score(train_tfidf, y_train)
log_td_test_score = accuracy_score(grid_log.predict(test_tfidf), y_test)

In [46]:
print(f'''Best parameters: {grid_log.best_params_}
Best cv score: {grid_log.best_score_}
Train score: {log_td_train_score}
Test test score: {log_td_test_score}''')

Best parameters: {'C': 10, 'penalty': 'l2', 'solver': 'saga'}
Best cv score: 0.975
Train score: 1.0
Test test score: 0.9285714285714286


### 3. SGDClassifier

CountVectorizer

In [47]:
parameters ={
    'penalty': ('l2', 'elasticnet', 'l1'),
    'max_iter': [50, 80],
    'tol': [1e-4],
    'loss': ['hinge', 'log', 'modified_huber'],
}

In [48]:
grid_sgd = GridSearchCV(SGDClassifier(), parameters, cv=folds, scoring='accuracy')

In [49]:
grid_sgd.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=SGDClassifier(),
             param_grid={'loss': ['hinge', 'log', 'modified_huber'],
                         'max_iter': [50, 80],
                         'penalty': ('l2', 'elasticnet', 'l1'),
                         'tol': [0.0001]},
             scoring='accuracy')

In [50]:
sgd_cv_train_score = grid_sgd.score(train, y_train)
sgd_cv_test_score = accuracy_score(grid_sgd.predict(test), y_test)

In [51]:
print(f'''Best parameters: {grid_sgd.best_params_}
Best cv score: {grid_sgd.best_score_}
Train score: {sgd_cv_train_score}
Test test score: {sgd_cv_test_score}''')

Best parameters: {'loss': 'hinge', 'max_iter': 80, 'penalty': 'l1', 'tol': 0.0001}
Best cv score: 0.975
Train score: 1.0
Test test score: 0.9285714285714286


Tfidfvectorizer

In [52]:
grid_sgd.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=SGDClassifier(),
             param_grid={'loss': ['hinge', 'log', 'modified_huber'],
                         'max_iter': [50, 80],
                         'penalty': ('l2', 'elasticnet', 'l1'),
                         'tol': [0.0001]},
             scoring='accuracy')

In [53]:
sgd_td_train_score = grid_sgd.score(train_tfidf, y_train)
sgd_td_test_score = accuracy_score(grid_sgd.predict(test_tfidf), y_test)

In [54]:
print(f'''Best parameters: {grid_sgd.best_params_}
Best cv score: {grid_sgd.best_score_}
Train score: {log_td_train_score}
Test test score: {log_td_test_score}''')

Best parameters: {'loss': 'hinge', 'max_iter': 50, 'penalty': 'l2', 'tol': 0.0001}
Best cv score: 1.0
Train score: 1.0
Test test score: 0.9285714285714286


### 4. KNeighborsClassifier

CountVectorizer

In [55]:
parameters ={
        'n_neighbors': range(1,4),
        'weights' : ['uniform', 'distance'],
        'p' : [1, 2, 3]
}

In [56]:
grid_knn = GridSearchCV(KNeighborsClassifier(), parameters, cv=folds, scoring='accuracy')

In [57]:
grid_knn.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': range(1, 4), 'p': [1, 2, 3],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

In [58]:
knn_cv_train_score = grid_knn.score(train, y_train)
knn_cv_test_score = accuracy_score(grid_knn.predict(test), y_test)

In [59]:
print(f'''Best parameters: {grid_knn.best_params_}
Best cv score: {grid_knn.best_score_}
Train score: {knn_cv_train_score}
Test test score: {knn_cv_test_score}''')

Best parameters: {'n_neighbors': 1, 'p': 3, 'weights': 'uniform'}
Best cv score: 0.6222222222222222
Train score: 1.0
Test test score: 0.5


Tfidfvectorizer

In [60]:
grid_knn.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': range(1, 4), 'p': [1, 2, 3],
                         'weights': ['uniform', 'distance']},
             scoring='accuracy')

In [61]:
knn_td_train_score = grid_knn.score(train_tfidf, y_train)
knn_td_test_score = accuracy_score(grid_knn.predict(test_tfidf), y_test)

In [62]:
print(f'''Best parameters: {grid_knn.best_params_}
Best cv score: {grid_knn.best_score_}
Train score: {knn_td_train_score}
Test test score: {knn_td_test_score}''')

Best parameters: {'n_neighbors': 3, 'p': 2, 'weights': 'uniform'}
Best cv score: 0.9055555555555556
Train score: 1.0
Test test score: 0.9285714285714286


### 5. CART

CountVectorizer

In [63]:
parameters ={
        "criterion" : ["gini", "entropy"],
        "min_samples_leaf": [5,10,15,20,2], 
        "max_depth" : [1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17],
        "min_samples_split": [2, 3, 4,None]
}

In [64]:
grid_dtc = GridSearchCV(DecisionTreeClassifier(), parameters, cv=folds, scoring='accuracy')

In [65]:
grid_dtc.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13,
                                       14, 15, 16, 17],
                         'min_samples_leaf': [5, 10, 15, 20, 2],
                         'min_samples_split': [2, 3, 4, None]},
             scoring='accuracy')

In [66]:
dtc_cv_train_score = grid_dtc.score(train, y_train)
dtc_cv_test_score = accuracy_score(grid_dtc .predict(test), y_test)

In [67]:
print(f'''Best parameters: {grid_dtc.best_params_}
Best cv score: {grid_dtc.best_score_}
Train score: {dtc_cv_train_score}
Test test score: {dtc_cv_test_score}''')

Best parameters: {'criterion': 'gini', 'max_depth': 8, 'min_samples_leaf': 2, 'min_samples_split': 4}
Best cv score: 0.9777777777777779
Train score: 1.0
Test test score: 0.9285714285714286


Tfidfvectorizer

In [68]:
grid_dtc.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13,
                                       14, 15, 16, 17],
                         'min_samples_leaf': [5, 10, 15, 20, 2],
                         'min_samples_split': [2, 3, 4, None]},
             scoring='accuracy')

In [69]:
dtc_td_train_score = grid_dtc.score(train_tfidf, y_train)
dtc_td_test_score = accuracy_score(grid_dtc.predict(test_tfidf), y_test)

In [70]:
print(f'''Best parameters: {grid_dtc.best_params_}
Best cv score: {grid_dtc.best_score_}
Train score: {knn_td_train_score}
Test test score: {knn_td_test_score}''')

Best parameters: {'criterion': 'gini', 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 4}
Best cv score: 0.9555555555555555
Train score: 1.0
Test test score: 0.9285714285714286


### 6. AdaBoost

CountVectorizer

In [71]:
parameters = {
    'learning_rate': [.5]
}

In [72]:
grid_ada = GridSearchCV(AdaBoostClassifier(), parameters, cv=folds, scoring='accuracy')

In [73]:
grid_ada.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=AdaBoostClassifier(),
             param_grid={'learning_rate': [0.5]}, scoring='accuracy')

In [74]:
ada_cv_train_score = grid_ada.score(train, y_train)
ada_cv_test_score = accuracy_score(grid_ada .predict(test), y_test)

In [75]:
print(f'''Best parameters: {grid_ada.best_params_}
Best cv score: {grid_ada.best_score_}
Train score: {ada_cv_train_score}
Test test score: {ada_cv_test_score}''')

Best parameters: {'learning_rate': 0.5}
Best cv score: 0.9305555555555556
Train score: 1.0
Test test score: 0.9285714285714286


Tfidfvectorizer

In [76]:
grid_ada.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=AdaBoostClassifier(),
             param_grid={'learning_rate': [0.5]}, scoring='accuracy')

In [77]:
ada_td_train_score = grid_ada.score(train_tfidf, y_train)
ada_td_test_score = accuracy_score(grid_ada.predict(test_tfidf), y_test)

In [78]:
print(f'''Best parameters: {grid_ada.best_params_}
Best cv score: {grid_ada.best_score_}
Train score: {ada_td_train_score}
Test test score: {ada_td_test_score}''')

Best parameters: {'learning_rate': 0.5}
Best cv score: 0.9027777777777779
Train score: 1.0
Test test score: 0.8571428571428571


### 7. GradientBoosting

CountVectorizer

In [79]:
parameters = {
    'loss':['deviance', 'exponential']
}

In [80]:
grid_grb = GridSearchCV(GradientBoostingClassifier(), parameters, cv=folds, scoring='accuracy')

In [81]:
grid_grb.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=GradientBoostingClassifier(),
             param_grid={'loss': ['deviance', 'exponential']},
             scoring='accuracy')

In [82]:
grb_cv_train_score = grid_grb.score(train, y_train)
grb_cv_test_score = accuracy_score(grid_grb.predict(test), y_test)

In [83]:
print(f'''Best parameters: {grid_grb.best_params_}
Best cv score: {grid_grb.best_score_}
Train score: {grb_cv_train_score}
Test test score: {grb_cv_test_score}''')

Best parameters: {'loss': 'deviance'}
Best cv score: 0.9305555555555556
Train score: 1.0
Test test score: 0.9285714285714286


Tfidfvectorizer

In [84]:
grid_grb.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=GradientBoostingClassifier(),
             param_grid={'loss': ['deviance', 'exponential']},
             scoring='accuracy')

In [85]:
grb_td_train_score = grid_grb.score(train_tfidf, y_train)
grb_td_test_score = accuracy_score(grid_grb.predict(test_tfidf), y_test)

In [86]:
print(f'''Best parameters: {grid_grb.best_params_}
Best cv score: {grid_grb.best_score_}
Train score: {grb_td_train_score}
Test test score: {grb_td_test_score}''')

Best parameters: {'loss': 'deviance'}
Best cv score: 0.8805555555555555
Train score: 1.0
Test test score: 0.9285714285714286


### 8. XGBoost

CountVectorizer

In [87]:
grid_xgb = GridSearchCV(XGBClassifier(), param_grid={}, cv=folds, scoring='accuracy')

In [88]:
grid_xgb.fit(train, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=Non

In [89]:
xgb_cv_train_score = grid_xgb.score(train, y_train)
xgb_cv_test_score = accuracy_score(grid_xgb.predict(test), y_test)

In [90]:
print(f'''Best parameters: {grid_xgb.best_params_}
Best cv score: {grid_xgb.best_score_}
Train score: {xgb_cv_train_score}
Test test score: {xgb_cv_test_score}''')

Best parameters: {}
Best cv score: 0.9305555555555556
Train score: 1.0
Test test score: 0.8571428571428571


Tfidfvectorizer

In [91]:
grid_xgb.fit(train_tfidf, y_train)

GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=19, shuffle=True),
             estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=100, n_jobs=None,
                                     num_parallel_tree=None, random_state=None,
                                     reg_alpha=None, reg_lambda=None,
                                     scale_pos_weight=None, subsample=Non

In [92]:
xgb_td_train_score = grid_xgb.score(train_tfidf, y_train)
xgb_td_test_score = accuracy_score(grid_xgb.predict(test_tfidf), y_test)

In [93]:
print(f'''Best parameters: {grid_xgb.best_params_}
Best cv score: {grid_xgb.best_score_}
Train score: {xgb_td_train_score}
Test test score: {xgb_td_test_score}''')

Best parameters: {}
Best cv score: 0.8305555555555555
Train score: 1.0
Test test score: 1.0


## Results

In [94]:
import seaborn as sns
import matplotlib.pyplot as plt

In [104]:
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score

In [96]:
models = ['MultinomialNB_cv', 
          'MultinomialNB_td', 
          'LogisticRegression_cv', 
          'LogisticRegression_td',
          'SGD_cv',
          'SGD_td',
          'KNeighbors_cv', 
          'KNeighbors_td',
          'DecisionTree_cv',
          'DecisionTree_td',
          'AdaBoost_cv',
          'AdaBoost_td',
          'GradientBoosting_cv',
          'GradientBoosting_td',
          'XGB_cv',
          'XGB_td']

In [97]:
train_score_cv = [mnb_train_score_cv, log_cv_train_score, sgd_cv_train_score, knn_cv_train_score, dtc_cv_train_score, ada_cv_train_score, grb_cv_train_score, xgb_cv_train_score]
train_score_td = [mnb_train_score_td, log_td_train_score, sgd_td_train_score, knn_td_train_score, dtc_td_train_score, ada_td_train_score, grb_td_train_score, xgb_td_train_score]

In [98]:
test_score_cv = [mnb_test_score_cv, log_cv_test_score, sgd_cv_test_score, knn_cv_test_score, dtc_cv_test_score, ada_cv_test_score, grb_cv_test_score, xgb_cv_test_score]
test_score_td = [mnb_test_score_td, log_td_test_score, sgd_td_test_score, knn_td_test_score, dtc_td_test_score, ada_td_test_score, grb_td_test_score, xgb_td_test_score]

In [99]:
pd.DataFrame({'model':models, 'train score': train_score_cv + train_score_td, 'test score': test_score_cv + test_score_td})

Unnamed: 0,model,train score,test score
0,MultinomialNB_cv,1.0,0.928571
1,MultinomialNB_td,1.0,1.0
2,LogisticRegression_cv,1.0,0.928571
3,LogisticRegression_td,1.0,0.5
4,SGD_cv,1.0,0.928571
5,SGD_td,1.0,0.928571
6,KNeighbors_cv,1.0,0.928571
7,KNeighbors_td,1.0,0.857143
8,DecisionTree_cv,1.0,0.928571
9,DecisionTree_td,1.0,0.928571


In [122]:
estimators = [
    ('MNB', MultinomialNB()),
    ('LR', LogisticRegression()),
    ('SGD', SGDClassifier()),
    ('KNN', KNeighborsClassifier()),
    ('CART', DecisionTreeClassifier()),
    ('ADA', AdaBoostClassifier()),
    ('GDB', GradientBoostingClassifier()),
    ('XGB', XGBClassifier())
]
results = []

In [123]:
results = []
scoring = 'accuracy'
print("model\tCV mean\t CV std")
for name, model in estimators:
 	cv_results = cross_val_score(model, X, y, cv=folds, scoring='accuracy')
 	results.append(cv_results)
 	msg = "%s:\t%f (%f)" % (name, cv_results.mean(), cv_results.std())
 	print(msg)

model	CV mean	 CV std
MNB:	0.963636 (0.072727)
LR:	0.928788 (0.035727)
SGD:	0.946970 (0.043387)
KNN:	0.463636 (0.018182)
CART:	0.946970 (0.043387)
ADA:	0.946970 (0.043387)
GDB:	0.928788 (0.035727)
XGB:	0.928788 (0.035727)
