##### Feature extraction 

In [1]:
import pandas as pd
X_train = pd.read_csv("/code/data/train.csv")['headline']
y_train = pd.read_csv("/code/data/train.csv")['is_sarcastic']

X_val = pd.read_csv("/code/data/val.csv")['headline']
y_val = pd.read_csv("/code/data/val.csv")['is_sarcastic']

In [2]:
from sklearn.feature_extraction import text
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_vectorizer = TfidfVectorizer()

tfidf_train = tfidf_vectorizer.fit_transform(X_train) 
tfidf_val = tfidf_vectorizer.transform(X_val)


## Classifiers 

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn import svm
from sklearn.naive_bayes import MultinomialNB

clf1 = LogisticRegression(multi_class='multinomial', random_state=1,  solver='newton-cg')
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = svm.SVC(gamma='scale', probability=True)
clf4 = MultinomialNB() 


## Generate combinations

In [4]:
import itertools
def perm(n, seq):
    comb=[]
    for p in itertools.product(seq, repeat=n):
        comb.append(p)
    return comb


## Grid Search

In [8]:
from sklearn.ensemble import VotingClassifier
from itertools import combinations_with_replacement
from sklearn import metrics
import numpy as np

results = [[0],[0,0,0,0]]
comb=perm(4, [0,1,2,3,4])
for i, combination in enumerate(comb[1:]):
    print(i,combination)

    eclf = VotingClassifier(estimators=[('clf1', clf1),('clf2', clf2),('clf3', clf3), ('clf4', clf4)],
                            n_jobs=1, voting='soft', weights=combination)
    
    eclf = eclf.fit(tfidf_train, y_train)
    pred3 = eclf.predict(tfidf_val)
    result = metrics.accuracy_score(y_val, pred3)
    print(result)
    results[0].append(result)
    results[1].append(combination)
    
pd.DataFrame(np.asarray(results)).to_csv("results2.csv")

0 (0, 0, 0, 1)
0.8322851153039832
1 (0, 0, 0, 2)
0.8322851153039832
2 (0, 0, 0, 3)
0.8322851153039832
3 (0, 0, 0, 4)
0.8322851153039832
4 (0, 0, 1, 0)
0.8495224784532961
5 (0, 0, 1, 1)
0.8616352201257862
6 (0, 0, 1, 2)
0.8572094106685302
7 (0, 0, 1, 3)
0.8551129746098299
8 (0, 0, 1, 4)
0.852317726531563
9 (0, 0, 2, 0)
0.8492895411134405
10 (0, 0, 2, 1)
0.856743535988819
11 (0, 0, 2, 2)
0.8616352201257862
12 (0, 0, 2, 3)
0.8590729093873748
13 (0, 0, 2, 4)
0.8572094106685302
14 (0, 0, 3, 0)
0.8495224784532961
15 (0, 0, 3, 1)
0.8537153505706965
16 (0, 0, 3, 2)
0.8588399720475192
17 (0, 0, 3, 3)
0.8621010948054973
18 (0, 0, 3, 4)
0.859538784067086
19 (0, 0, 4, 0)
0.8499883531330072
20 (0, 0, 4, 1)
0.8527836012112742
21 (0, 0, 4, 2)
0.856743535988819
22 (0, 0, 4, 3)
0.8604705334265083
23 (0, 0, 4, 4)
0.8621010948054973
24 (0, 1, 0, 0)
0.8101560680177032
25 (0, 1, 0, 1)
0.8481248544141626
26 (0, 1, 0, 2)
0.8565105986489634
27 (0, 1, 0, 3)
0.8527836012112742
28 (0, 1, 0, 4)
0.8532494758909853

0.8457954810156068
232 (1, 4, 1, 3)
0.8504542278127184
233 (1, 4, 1, 4)
0.8537153505706965
234 (1, 4, 2, 0)
0.8432331702771955
235 (1, 4, 2, 1)
0.8467272303750292
236 (1, 4, 2, 2)
0.8513859771721407
237 (1, 4, 2, 3)
0.856743535988819
238 (1, 4, 2, 4)
0.8576752853482413
239 (1, 4, 3, 0)
0.8443978569764733
240 (1, 4, 3, 1)
0.8483577917540182
241 (1, 4, 3, 2)
0.8527836012112742
242 (1, 4, 3, 3)
0.8560447239692522
243 (1, 4, 3, 4)
0.8581411600279525
244 (1, 4, 4, 0)
0.8464942930351735
245 (1, 4, 4, 1)
0.8495224784532961
246 (1, 4, 4, 2)
0.8530165385511298
247 (1, 4, 4, 3)
0.8548800372699744
248 (1, 4, 4, 4)
0.8581411600279525
249 (2, 0, 0, 0)
0.8469601677148847
250 (2, 0, 0, 1)
0.8527836012112742
251 (2, 0, 0, 2)
0.8565105986489634
252 (2, 0, 0, 3)
0.8527836012112742
253 (2, 0, 0, 4)
0.852317726531563
254 (2, 0, 1, 0)
0.8469601677148847
255 (2, 0, 1, 1)
0.8525506638714185
256 (2, 0, 1, 2)
0.8581411600279525
257 (2, 0, 1, 3)
0.8600046587467971
258 (2, 0, 1, 4)
0.8569764733286745
259 (2, 0, 

0.8506871651525739
461 (3, 3, 2, 2)
0.8548800372699744
462 (3, 3, 2, 3)
0.8576752853482413
463 (3, 3, 2, 4)
0.8588399720475192
464 (3, 3, 3, 0)
0.8497554157931516
465 (3, 3, 3, 1)
0.8527836012112742
466 (3, 3, 3, 2)
0.8553459119496856
467 (3, 3, 3, 3)
0.8546470999301188
468 (3, 3, 3, 4)
0.856743535988819
469 (3, 3, 4, 0)
0.8499883531330072
470 (3, 3, 4, 1)
0.8544141625902633
471 (3, 3, 4, 2)
0.8555788492895411
472 (3, 3, 4, 3)
0.8548800372699744
473 (3, 3, 4, 4)
0.8558117866293967
474 (3, 4, 0, 0)
0.8385744234800838
475 (3, 4, 0, 1)
0.843466107617051
476 (3, 4, 0, 2)
0.8481248544141626
477 (3, 4, 0, 3)
0.8520847891917074
478 (3, 4, 0, 4)
0.8546470999301188
479 (3, 4, 1, 0)
0.8416026088982064
480 (3, 4, 1, 1)
0.8464942930351735
481 (3, 4, 1, 2)
0.8518518518518519
482 (3, 4, 1, 3)
0.8574423480083857
483 (3, 4, 1, 4)
0.8590729093873748
484 (3, 4, 2, 0)
0.8439319822967621
485 (3, 4, 2, 1)
0.8483577917540182
486 (3, 4, 2, 2)
0.8527836012112742
487 (3, 4, 2, 3)
0.8558117866293967
488 (3, 4, 