# Voting classifier test

## Imports

In [2]:
import numpy as np
import pandas as pd
import re
from sklearn.model_selection import train_test_split
import matplotlib
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import random
from sklearn.metrics import r2_score, mean_squared_error, accuracy_score, log_loss
from sklearn import svm #support vector machines
from sklearn.feature_extraction.text import CountVectorizer

from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

from functions import *

  from ._conv import register_converters as _register_converters


## Read in data

In [3]:
print("Importing design matrix ...")
X_train, y_train, features = get_design_matrix(cleaning_function = clean, min_df = 3)
print("Done.")
print("There are %d recipies and %d feautres" % (X_train.shape[0], len(features)))

k = 3 # folds in the k-fold cross validation


Importing design matrix ...
Done.
There are 39774 recipies and 2176 feautres


## Setting up the voting classifier

First we set up the basic classifiers with their optimal parameters

In [4]:
logistic_clf = LogisticRegression(solver='lbfgs', multi_class='multinomial', C = 1)
forrest_clf = RandomForestClassifier(n_estimators = 100, max_depth = None)
mlp_clf = MLPClassifier(hidden_layer_sizes = (1500), alpha = 0.01, max_iter = 10)
svm_clf = svm.LinearSVC(C = 0.1)

Now we systematically go through combinations and voting styles

In [8]:
voting_lf_soft = VotingClassifier(estimators=[('logistic', logistic_clf), ('forrest', forrest_clf)], voting='soft')
print("Cross validation for soft voting with logistic and forrest.")
score_lf_soft = np.mean(cross_val_score(voting_lf_soft, X_train, y_train, cv=3))
print(score_lf_soft)

Cross validation for soft voting with logistic and forrest.


  if diff:
  if diff:


0.7930061698387049


  if diff:


In [5]:
voting_lmlp_soft = VotingClassifier(estimators=[('logistic', logistic_clf), ('mlp', mlp_clf)], voting='soft')
print("Cross validation for soft voting with logistic and mlp.")
score_lmlp_soft = np.mean(cross_val_score(voting_lmlp_soft, X_train, y_train, cv=3))
print(score_lmlp_soft)

Cross validation for soft voting with logistic and mlp.


  if diff:
  if diff:


0.7886058188748845


  if diff:


In [6]:
voting_fmlp_soft = VotingClassifier(estimators=[('forrest', forrest_clf), ('mlp', mlp_clf)], voting='soft')
print("Cross validation for soft voting with forrest and mlp.")
score_fmlp_soft = np.mean(cross_val_score(voting_fmlp_soft, X_train, y_train, cv=3))
print(score_fmlp_soft)

Cross validation for soft voting with forrest and mlp.


  if diff:
  if diff:


0.7982103573435676


  if diff:


In [7]:
voting_all_soft = VotingClassifier(estimators=[('forrest', forrest_clf), ('mlp', mlp_clf), ('logistic', logistic_clf)], voting='soft')
print("Cross validation for soft voting with forrest, mlp, and logistic.")
score_all_soft = np.mean(cross_val_score(voting_all_soft, X_train, y_train, cv=3))
print(score_all_soft)

Cross validation for soft voting with forrest, mlp, and logistic.


  if diff:
  if diff:
  if diff:


0.7969028509972337


In [8]:
voting_hard = VotingClassifier(estimators=[('svm', svm_clf), ('forrest', forrest_clf), ('mlp', mlp_clf)], voting='hard')
print("Cross validation for hard voting with svm and forrests and mlp.")
score_hard = np.mean(cross_val_score(voting_hard, X_train, y_train, cv=3))
print(score_hard)

Cross validation for hard voting with svm and forrests and mlp.


  if diff:
  if diff:


0.7981853137811373


  if diff:


In [10]:
voting_hard_4 = VotingClassifier(estimators=[('svm', svm_clf), ('logistic', logistic_clf), ('forrest', forrest_clf), ('mlp', mlp_clf)], voting='hard')
print("Cross validation for hard voting with svm, logistic, forrests, and mlp.")
score_hard_4 = np.mean(cross_val_score(voting_hard_4, X_train, y_train, cv=3))
print(score_hard_4)

Cross validation for hard voting with svm, logistic, forrests, and mlp.


  if diff:
  if diff:


0.7927292335880916


  if diff:
