In [1]:
# #!/usr/bin/python3

%matplotlib inline
%config InlineBackend.figure_formats = ["retina"]
# %load_ext autotime

#The features are borrowed from Eike Dehling's kernel at  https://www.kaggle.com/eikedehling/feature-engineering

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import string
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC
import xgboost as xgb
from tqdm import tqdm
import gc
from sklearn.feature_extraction.text import TfidfVectorizer
import scipy
import multiprocessing

# Display progress logs on stdout
import logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s %(levelname)s %(message)s')

# from nltk.corpus import stopwords
# stop_words = stopwords.words('english')

In [6]:
dtypes = {
    'comment_text'   : np.unicode ,
    'toxic':         np.int16, 
    'severe_toxic': np.int16,
    'obscene': np.int16,
    'threat': np.int16,
    'insult': np.int16,
    'identity_hate': np.int16
}

train = pd.read_csv('data/train.csv', dtype=dtypes, encoding='utf-8')
test = pd.read_csv('data/test.csv', dtype=dtypes, encoding='utf-8')

train.comment_text.fillna("unknown", inplace=True)
test.comment_text.fillna("unknown",  inplace=True)

In [3]:
train_mes, valid_mes, train_l, valid_l = train_test_split(train['comment_text'],
                                                          train[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']],
                                                          test_size=0.2, random_state=42)

In [4]:
#Using the tokenize function from Jeremy's kernel
import re, string
re_tok = re.compile(u'([{string.punctuation}“”¨«»®´·º½¾¿¡§£₤‘’])')
def tokenize(s):
    return re_tok.sub(r' \1 ', s).split()

transform_com = TfidfVectorizer(ngram_range=(1,2), tokenizer=tokenize,
               min_df=3, max_df=0.9, strip_accents='unicode', use_idf=1,
               smooth_idf=1, sublinear_tf=1).fit(train['comment_text'])

comments_train = transform_com.transform(train_mes)
comments_valid = transform_com.transform(valid_mes)
comments_test = transform_com.transform(test['comment_text'])
gc.collect()

train_mes = pd.DataFrame(train_mes)
valid_mes = pd.DataFrame(valid_mes)
data = [train_mes, valid_mes, test]

In [9]:
# These features are borrowed from https://www.kaggle.com/eikedehling/feature-engineering
for element in data:
    element['total_length'] = element['comment_text'].apply(len)
    element['capitals'] = element['comment_text'].apply(lambda comment: sum(1 for c in comment if c.isupper()))
    element['caps_vs_length'] = element.apply(lambda row: float(row['capitals'])/float(row['total_length']), axis=1)
    element['num_exclamation_marks'] = element['comment_text'].apply(lambda comment: comment.count('!'))
    element['num_question_marks'] = element['comment_text'].apply(lambda comment: comment.count('?'))
    element['num_punctuation'] = element['comment_text'].apply(lambda comment: sum(comment.count(w) for w in '.,;:'))
    element['num_symbols'] = element['comment_text'].apply(lambda comment: sum(comment.count(w) for w in '*&$%'))
    element['num_words'] = element['comment_text'].apply(lambda comment: len(comment.split()))
    element['num_unique_words'] = element['comment_text'].apply(lambda comment: len(set(w for w in comment.split())))
    element['words_vs_unique'] = element['num_unique_words'] / element['num_words']
    element['num_smilies'] = element['comment_text'].apply(lambda comment: sum(comment.count(w) for w in (':-)', ':)', ';-)', ';)')))
    
col = ['total_length', 'capitals', 'caps_vs_length',
       'num_exclamation_marks', 'num_question_marks', 'num_punctuation',
       'num_symbols', 'num_words', 'num_unique_words', 'words_vs_unique',
       'num_smilies']
columns = ('toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate')

# Now, let's Combine both the Feature Vectors and the Engineered Features

In [14]:
train_mes = scipy.sparse.csr_matrix(train_mes[col].values)
valid_mes = scipy.sparse.csr_matrix(valid_mes[col].values)
test = scipy.sparse.csr_matrix(test[col].values)


comments_train = scipy.sparse.hstack([train_mes.tocsr(),comments_train.tocsr()])
comments_valid = scipy.sparse.hstack([valid_mes,comments_valid])
comments_test = scipy.sparse.hstack([test,comments_test])

In [15]:
def runXGB(train_X, train_y, test_X, test_y=None, feature_names=None, seed_val=2017, num_rounds=500):
    param = {}
    param['objective'] = 'binary:logistic'
    param['eta'] = 0.1
    param['max_depth'] = 6
    param['silent'] = 1
    param['eval_metric'] = 'auc'
    param['min_child_weight'] = 1
    param['subsample'] = 0.7
    param['colsample_bytree'] = 0.7
    param['seed'] = seed_val
    num_rounds = num_rounds

    plst = list(param.items())
    xgtrain = xgb.DMatrix(train_X, label=train_y)

    if test_y is not None:
        xgtest = xgb.DMatrix(test_X, label=test_y)
        watchlist = [ (xgtrain,'train'), (xgtest, 'valid') ]
        model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=20)
    else:
        xgtest = xgb.DMatrix(test_X)
        model = xgb.train(plst, xgtrain, num_rounds)

    return model 

In [16]:
col = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
preds = np.zeros((test.shape[0], len(col)))

for i, j in enumerate(col):
    print('fit '+j)
    model = runXGB(comments_train, train_l[j], comments_valid, valid_l[j])
    preds[:,i] = model.predict(xgb.DMatrix(comments_test), ntree_limit = model.best_ntree_limit)
    gc.collect()

fit toxic
[0]	train-auc:0.77242	test-auc:0.765702
Multiple eval metrics have been passed: 'test-auc' will be used for early stopping.

Will train until test-auc hasn't improved in 20 rounds.
[1]	train-auc:0.815782	test-auc:0.809515
[2]	train-auc:0.830233	test-auc:0.824112
[3]	train-auc:0.842537	test-auc:0.833436
[4]	train-auc:0.851532	test-auc:0.842671
[5]	train-auc:0.853684	test-auc:0.845302
[6]	train-auc:0.857449	test-auc:0.847592
[7]	train-auc:0.862579	test-auc:0.849572
[8]	train-auc:0.866918	test-auc:0.852981
[9]	train-auc:0.869632	test-auc:0.855417
[10]	train-auc:0.871686	test-auc:0.857329
[11]	train-auc:0.874539	test-auc:0.860705
[12]	train-auc:0.877057	test-auc:0.861877
[13]	train-auc:0.878491	test-auc:0.861911
[14]	train-auc:0.880472	test-auc:0.863216
[15]	train-auc:0.882349	test-auc:0.864087
[16]	train-auc:0.88567	test-auc:0.867465
[17]	train-auc:0.889556	test-auc:0.869415
[18]	train-auc:0.893246	test-auc:0.871862
[19]	train-auc:0.894864	test-auc:0.872676
[20]	train-auc:0.8969

[191]	train-auc:0.987448	test-auc:0.934408
[192]	train-auc:0.987509	test-auc:0.934325
[193]	train-auc:0.987708	test-auc:0.934493
[194]	train-auc:0.987807	test-auc:0.934405
[195]	train-auc:0.987865	test-auc:0.934369
[196]	train-auc:0.987988	test-auc:0.934483
[197]	train-auc:0.98814	test-auc:0.934603
[198]	train-auc:0.988197	test-auc:0.934683
[199]	train-auc:0.988302	test-auc:0.934815
[200]	train-auc:0.98837	test-auc:0.934845
[201]	train-auc:0.988402	test-auc:0.934776
[202]	train-auc:0.988572	test-auc:0.934836
[203]	train-auc:0.988609	test-auc:0.934972
[204]	train-auc:0.988696	test-auc:0.935083
[205]	train-auc:0.988818	test-auc:0.935163
[206]	train-auc:0.988875	test-auc:0.935183
[207]	train-auc:0.98895	test-auc:0.935126
[208]	train-auc:0.989019	test-auc:0.935157
[209]	train-auc:0.989059	test-auc:0.935247
[210]	train-auc:0.989135	test-auc:0.935222
[211]	train-auc:0.989273	test-auc:0.935234
[212]	train-auc:0.989439	test-auc:0.935295
[213]	train-auc:0.989553	test-auc:0.935275
[214]	train-au

[13]	train-auc:0.943353	test-auc:0.91348
[14]	train-auc:0.944051	test-auc:0.913089
[15]	train-auc:0.944211	test-auc:0.912897
[16]	train-auc:0.955572	test-auc:0.940885
[17]	train-auc:0.955691	test-auc:0.941323
[18]	train-auc:0.962142	test-auc:0.945871
[19]	train-auc:0.967778	test-auc:0.953219
[20]	train-auc:0.967777	test-auc:0.953625
[21]	train-auc:0.970079	test-auc:0.954729
[22]	train-auc:0.970785	test-auc:0.955409
[23]	train-auc:0.970955	test-auc:0.955318
[24]	train-auc:0.971681	test-auc:0.954873
[25]	train-auc:0.975182	test-auc:0.955163
[26]	train-auc:0.976377	test-auc:0.954916
[27]	train-auc:0.976363	test-auc:0.954942
[28]	train-auc:0.976381	test-auc:0.954955
[29]	train-auc:0.976696	test-auc:0.955593
[30]	train-auc:0.977971	test-auc:0.955898
[31]	train-auc:0.983087	test-auc:0.955338
[32]	train-auc:0.983905	test-auc:0.953854
[33]	train-auc:0.986247	test-auc:0.95315
[34]	train-auc:0.987517	test-auc:0.95668
[35]	train-auc:0.988089	test-auc:0.957027
[36]	train-auc:0.988446	test-auc:0.95

[99]	train-auc:0.993243	test-auc:0.95687
[100]	train-auc:0.993333	test-auc:0.956776
[101]	train-auc:0.993556	test-auc:0.957113
[102]	train-auc:0.993649	test-auc:0.957214
[103]	train-auc:0.993679	test-auc:0.957329
[104]	train-auc:0.993806	test-auc:0.957315
[105]	train-auc:0.993892	test-auc:0.95733
[106]	train-auc:0.993926	test-auc:0.957427
[107]	train-auc:0.994068	test-auc:0.957602
[108]	train-auc:0.994118	test-auc:0.957737
[109]	train-auc:0.994241	test-auc:0.957855
[110]	train-auc:0.994303	test-auc:0.958048
[111]	train-auc:0.994353	test-auc:0.957979
[112]	train-auc:0.994456	test-auc:0.957861
[113]	train-auc:0.994518	test-auc:0.957801
[114]	train-auc:0.994586	test-auc:0.957963
[115]	train-auc:0.99464	test-auc:0.958274
[116]	train-auc:0.99472	test-auc:0.958432
[117]	train-auc:0.994784	test-auc:0.958475
[118]	train-auc:0.994831	test-auc:0.958384
[119]	train-auc:0.994966	test-auc:0.958639
[120]	train-auc:0.995007	test-auc:0.958664
[121]	train-auc:0.995057	test-auc:0.958868
[122]	train-auc:

[39]	train-auc:0.980066	test-auc:0.913364
[40]	train-auc:0.980064	test-auc:0.910812
[41]	train-auc:0.980647	test-auc:0.919733
[42]	train-auc:0.980309	test-auc:0.917757
[43]	train-auc:0.981158	test-auc:0.922466
[44]	train-auc:0.983381	test-auc:0.921396
[45]	train-auc:0.984731	test-auc:0.919606
[46]	train-auc:0.984836	test-auc:0.918707
[47]	train-auc:0.985199	test-auc:0.916094
[48]	train-auc:0.988201	test-auc:0.919753
[49]	train-auc:0.991159	test-auc:0.920017
[50]	train-auc:0.991522	test-auc:0.918763
[51]	train-auc:0.993616	test-auc:0.917049
[52]	train-auc:0.993934	test-auc:0.919724
[53]	train-auc:0.995926	test-auc:0.928921
[54]	train-auc:0.996245	test-auc:0.932596
[55]	train-auc:0.996683	test-auc:0.937738
[56]	train-auc:0.996972	test-auc:0.940543
[57]	train-auc:0.997303	test-auc:0.941461
[58]	train-auc:0.997504	test-auc:0.943013
[59]	train-auc:0.997852	test-auc:0.941857
[60]	train-auc:0.998071	test-auc:0.941992
[61]	train-auc:0.998237	test-auc:0.944663
[62]	train-auc:0.998387	test-auc:0

[99]	train-auc:0.988678	test-auc:0.944324
[100]	train-auc:0.988958	test-auc:0.944406
[101]	train-auc:0.989146	test-auc:0.94471
[102]	train-auc:0.989297	test-auc:0.944776
[103]	train-auc:0.989369	test-auc:0.944956
[104]	train-auc:0.989606	test-auc:0.945048
[105]	train-auc:0.989788	test-auc:0.94487
[106]	train-auc:0.989993	test-auc:0.945303
[107]	train-auc:0.990123	test-auc:0.945383
[108]	train-auc:0.990203	test-auc:0.945477
[109]	train-auc:0.990418	test-auc:0.94583
[110]	train-auc:0.990563	test-auc:0.945996
[111]	train-auc:0.990749	test-auc:0.945914
[112]	train-auc:0.990812	test-auc:0.946006
[113]	train-auc:0.99101	test-auc:0.945958
[114]	train-auc:0.991213	test-auc:0.946349
[115]	train-auc:0.991347	test-auc:0.946729
[116]	train-auc:0.991434	test-auc:0.94683
[117]	train-auc:0.991535	test-auc:0.946927
[118]	train-auc:0.991697	test-auc:0.947019
[119]	train-auc:0.991822	test-auc:0.947268
[120]	train-auc:0.991895	test-auc:0.947253
[121]	train-auc:0.991957	test-auc:0.947365
[122]	train-auc:0

[82]	train-auc:0.997723	test-auc:0.933482
[83]	train-auc:0.997914	test-auc:0.934327
[84]	train-auc:0.998101	test-auc:0.933809
[85]	train-auc:0.998151	test-auc:0.93438
[86]	train-auc:0.998202	test-auc:0.934004
[87]	train-auc:0.998284	test-auc:0.933203
[88]	train-auc:0.998333	test-auc:0.933148
[89]	train-auc:0.998551	test-auc:0.934569
[90]	train-auc:0.998641	test-auc:0.933932
[91]	train-auc:0.998687	test-auc:0.934429
[92]	train-auc:0.99879	test-auc:0.934636
[93]	train-auc:0.998886	test-auc:0.93564
[94]	train-auc:0.998937	test-auc:0.935705
[95]	train-auc:0.998961	test-auc:0.936943
[96]	train-auc:0.998993	test-auc:0.936804
[97]	train-auc:0.999022	test-auc:0.937946
[98]	train-auc:0.999087	test-auc:0.937456
[99]	train-auc:0.999144	test-auc:0.937387
[100]	train-auc:0.999206	test-auc:0.937433
[101]	train-auc:0.999232	test-auc:0.937461
[102]	train-auc:0.999271	test-auc:0.937041
[103]	train-auc:0.999324	test-auc:0.935862
[104]	train-auc:0.999332	test-auc:0.935584
[105]	train-auc:0.999368	test-au

In [33]:
subm = pd.read_csv('data/sample_submission.csv')    
submid = pd.DataFrame({'id': subm["id"].values.astype(str)}, dtype=np.str)
submission = pd.concat([submid, pd.DataFrame(preds, columns = col)], axis=1)
submission.to_csv('submissions/sample_submission_xgb.csv', index=False)

     ### got AUC of train-auc:0.999022	valid-auc:0.937946


# Try GloVe Word Vectors

In [7]:
train_mes, valid_mes, train_l, valid_l = train_test_split(train['comment_text'],
                                                          train[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']],
                                                          test_size=0.2, random_state=42)
train_mes = pd.DataFrame(train_mes)
valid_mes = pd.DataFrame(valid_mes)

In [23]:
# load the GloVe vectors in a dictionary:

embeddings_index = {}
f = open('../Sentiment_Model_Template/glove.840B.300d.txt', encoding='utf-8')
for line in tqdm(f):
    values = line.split()
    word = values[0]
    try:
        coefs = np.asarray(values[1:], dtype='float32')
    except:
        continue
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

2196017it [04:09, 8808.55it/s] 

Found 2195884 word vectors.





In [9]:
# this function creates a normalized vector for the whole sentence
def sent2vec(s, embeddings_index):
    words = str(s).lower()
    words = tokenize(words)
#     words = [w for w in words if not w in stop_words]
    words = [w for w in words if w.isalpha()]
    M = []
    for w in words:
        try:
            M.append(embeddings_index[w])
        except:
            continue
    M = np.array(M)
    v = M.sum(axis=0)
    if type(v) != np.ndarray:
        return np.zeros(300)
    return v / np.sqrt((v ** 2).sum())

In [11]:
gc.collect() # To free up some Memory

0

In [52]:
# create sentence vectors using the above function for training and validation set
xtrain_glove = [sent2vec(x, embeddings_index) for x in tqdm(train_mes['comment_text'])]
xvalid_glove = [sent2vec(x, embeddings_index) for x in tqdm(valid_mes['comment_text'])]

xtrain_glove = np.array(xtrain_glove)
xvalid_glove = np.array(xvalid_glove)




  0%|          | 0/76680 [00:00<?, ?it/s][A[A[A


  0%|          | 136/76680 [00:00<00:58, 1310.42it/s][A[A[A


  1%|          | 392/76680 [00:00<00:39, 1921.80it/s][A[A[A


  1%|          | 684/76680 [00:00<00:33, 2247.58it/s][A[A[A


  1%|          | 953/76680 [00:00<00:32, 2349.12it/s][A[A[A


  2%|▏         | 1202/76680 [00:00<00:31, 2376.52it/s][A[A[A


  2%|▏         | 1451/76680 [00:00<00:31, 2393.55it/s][A[A[A


  2%|▏         | 1715/76680 [00:00<00:30, 2428.17it/s][A[A[A


  3%|▎         | 1967/76680 [00:00<00:30, 2435.60it/s][A[A[A


  3%|▎         | 2233/76680 [00:00<00:30, 2459.30it/s][A[A[A


  3%|▎         | 2520/76680 [00:01<00:29, 2498.95it/s][A[A[A


  4%|▎         | 2810/76680 [00:01<00:29, 2534.71it/s][A[A[A


  4%|▍         | 3078/76680 [00:01<00:29, 2531.88it/s][A[A[A


  4%|▍         | 3368/76680 [00:01<00:28, 2557.63it/s][A[A[A


  5%|▍         | 3638/76680 [00:01<00:28, 2556.87it/s][A[A[A


  5%|▌         | 3918/76

 92%|█████████▏| 70205/76680 [00:25<00:02, 2736.03it/s][A[A[A


 92%|█████████▏| 70492/76680 [00:25<00:02, 2736.47it/s][A[A[A


 92%|█████████▏| 70838/76680 [00:25<00:02, 2739.29it/s][A[A[A


 93%|█████████▎| 71145/76680 [00:25<00:02, 2740.45it/s][A[A[A


 93%|█████████▎| 71445/76680 [00:26<00:01, 2740.70it/s][A[A[A


 94%|█████████▎| 71740/76680 [00:26<00:01, 2740.15it/s][A[A[A


 94%|█████████▍| 72025/76680 [00:26<00:01, 2739.95it/s][A[A[A


 94%|█████████▍| 72309/76680 [00:26<00:01, 2740.29it/s][A[A[A


 95%|█████████▍| 72600/76680 [00:26<00:01, 2740.84it/s][A[A[A


 95%|█████████▌| 72883/76680 [00:26<00:01, 2740.51it/s][A[A[A


 95%|█████████▌| 73203/76680 [00:26<00:01, 2742.19it/s][A[A[A


 96%|█████████▌| 73494/76680 [00:26<00:01, 2742.10it/s][A[A[A


 96%|█████████▋| 73816/76680 [00:26<00:01, 2743.79it/s][A[A[A


 97%|█████████▋| 74112/76680 [00:27<00:00, 2744.30it/s][A[A[A


 97%|█████████▋| 74405/76680 [00:27<00:00, 2744.42it/s][A[A

In [53]:
xtrain_glove.shape

(76680, 300)

In [54]:
xvalid_glove.shape

(19171, 300)

In [55]:
scores = []
col = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
preds = np.zeros((valid_mes.shape[0], len(col)))

for i, class_name in enumerate(col):
    print('fit '+ class_name)
#     classifier = LogisticRegression(C=0.1, solver='sag')
    classifier = SGDClassifier(loss='log', max_iter=1000, epsilon=0.001, n_jobs=-1)
#     classifier = SVC(C=1.0, probability=True)

    cv_score = np.mean(cross_val_score(classifier, xtrain_glove, train_l[class_name], cv=5, scoring='roc_auc'))
    scores.append(cv_score)
    print('CV score for class {} is {}'.format(class_name, cv_score))

    classifier.fit(xtrain_glove, train_l[class_name])
    preds[:,i] = classifier.predict_proba(xvalid_glove) 
    
print('Total CV score is {}'.format(np.mean(scores)))


fit toxic
CV score for class toxic is 0.7815914773233411
fit severe_toxic
CV score for class severe_toxic is 0.9074135380000475
fit obscene
CV score for class obscene is 0.8259538580137147
fit threat
CV score for class threat is 0.8624474536468689
fit insult
CV score for class insult is 0.8230832481127066
fit identity_hate
CV score for class identity_hate is 0.8288065779076506
Total CV score is 0.8382160255007216


In [36]:
# Generate Word vectors of test data
xtest_glove = [sent2vec(x, embeddings_index) for x in tqdm(test['comment_text'])]
xtest_glove = np.array(xtest_glove)

In [28]:
testpreds_glove = np.zeros((test.shape[0], len(col)))
for i, class_name in enumerate(col):
    print('fit '+ class_name)
    testpreds_google[:,i] = classifier.predict_proba(xtest_glove)[:,1] 

fit toxic
fit severe_toxic
fit obscene
fit threat
fit insult
fit identity_hate


In [58]:
submission = pd.concat([submid, pd.DataFrame(testpreds_glove, columns = col)], axis=1)
submission.to_csv('submissions/sample_submission_glove_sgd.csv', index=False)

    ### got total AUC of 0.838 on validation set

# Try Google Word Vectors

In [3]:
gc.collect()

0

In [4]:
# load the GloVe vectors in a dictionary:
embeddings_index_google = {}
f = open('../Sentiment_Model_Template/GoogleNews-vectors-negative300.txt')
for line in tqdm(f):
    values = line.split()
    word = values[0]
    try:
        coefs = np.asarray(values[1:], dtype='float32')
    except:
        continue
    embeddings_index_google[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index_google))

3000001it [03:00, 16636.95it/s]

Found 3000000 word vectors.





In [19]:
gc.collect()

0

In [14]:
# create sentence vectors using the above function for training and validation set
xtrain_google = [sent2vec(x, embeddings_index_google) for x in tqdm(train_mes['comment_text'])]
xvalid_google = [sent2vec(x, embeddings_index_google) for x in tqdm(valid_mes['comment_text'])]

xtrain_google = np.array(xtrain_google)
xvalid_google = np.array(xvalid_google)



  0%|          | 0/76680 [00:00<?, ?it/s][A[A

  0%|          | 279/76680 [00:00<00:27, 2761.13it/s][A[A

  1%|          | 580/76680 [00:00<00:26, 2876.78it/s][A[A

  1%|          | 829/76680 [00:00<00:27, 2743.46it/s][A[A

  1%|▏         | 1080/76680 [00:00<00:28, 2684.11it/s][A[A

  2%|▏         | 1325/76680 [00:00<00:28, 2634.96it/s][A[A

  2%|▏         | 1574/76680 [00:00<00:29, 2580.84it/s][A[A

  2%|▏         | 1836/76680 [00:00<00:28, 2587.94it/s][A[A

  3%|▎         | 2068/76680 [00:00<00:29, 2537.41it/s][A[A

  3%|▎         | 2338/76680 [00:00<00:29, 2552.90it/s][A[A

  3%|▎         | 2604/76680 [00:01<00:28, 2563.17it/s][A[A

  4%|▎         | 2873/76680 [00:01<00:28, 2572.88it/s][A[A

  4%|▍         | 3174/76680 [00:01<00:28, 2607.04it/s][A[A

  5%|▍         | 3484/76680 [00:01<00:27, 2644.25it/s][A[A

  5%|▍         | 3764/76680 [00:01<00:27, 2640.50it/s][A[A

  5%|▌         | 4066/76680 [00:01<00:27, 2664.40it/s][A[A

  6%|▌         | 4360/

 49%|████▉     | 37940/76680 [00:13<00:13, 2798.10it/s][A[A

 50%|████▉     | 38243/76680 [00:13<00:13, 2795.70it/s][A[A

 50%|█████     | 38546/76680 [00:13<00:13, 2797.38it/s][A[A

 51%|█████     | 38914/76680 [00:13<00:13, 2803.74it/s][A[A

 51%|█████     | 39242/76680 [00:13<00:13, 2807.08it/s][A[A

 52%|█████▏    | 39561/76680 [00:14<00:13, 2806.78it/s][A[A

 52%|█████▏    | 39868/76680 [00:14<00:13, 2805.95it/s][A[A

 52%|█████▏    | 40187/76680 [00:14<00:12, 2808.63it/s][A[A

 53%|█████▎    | 40513/76680 [00:14<00:12, 2811.74it/s][A[A

 53%|█████▎    | 40823/76680 [00:14<00:12, 2811.86it/s][A[A

 54%|█████▎    | 41152/76680 [00:14<00:12, 2815.09it/s][A[A

 54%|█████▍    | 41462/76680 [00:14<00:12, 2816.08it/s][A[A

 54%|█████▍    | 41768/76680 [00:14<00:12, 2816.85it/s][A[A

 55%|█████▍    | 42070/76680 [00:14<00:12, 2817.11it/s][A[A

 55%|█████▌    | 42386/76680 [00:15<00:12, 2819.35it/s][A[A

 56%|█████▌    | 42710/76680 [00:15<00:12, 2821.90it/s]

  5%|▍         | 941/19171 [00:00<00:05, 3124.13it/s][A[A

  6%|▋         | 1228/19171 [00:00<00:05, 3048.71it/s][A[A

  8%|▊         | 1533/19171 [00:00<00:05, 3047.62it/s][A[A

 10%|▉         | 1877/19171 [00:00<00:05, 3109.58it/s][A[A

 11%|█▏        | 2186/19171 [00:00<00:05, 3104.82it/s][A[A

 13%|█▎        | 2516/19171 [00:00<00:05, 3123.18it/s][A[A

 15%|█▍        | 2822/19171 [00:00<00:05, 3114.43it/s][A[A

 16%|█▋        | 3150/19171 [00:01<00:05, 3130.73it/s][A[A

 18%|█▊        | 3459/19171 [00:01<00:05, 3097.15it/s][A[A

 20%|█▉        | 3760/19171 [00:01<00:04, 3089.86it/s][A[A

 21%|██        | 4061/19171 [00:01<00:04, 3053.93it/s][A[A

 23%|██▎       | 4352/19171 [00:01<00:04, 3033.56it/s][A[A

 24%|██▍       | 4639/19171 [00:01<00:04, 3018.60it/s][A[A

 26%|██▌       | 4931/19171 [00:01<00:04, 3012.40it/s][A[A

 27%|██▋       | 5242/19171 [00:01<00:04, 3016.64it/s][A[A

 29%|██▉       | 5536/19171 [00:01<00:04, 2963.10it/s][A[A

 30%|███ 

In [15]:
xtrain_google.shape

(76680, 300)

In [16]:
xvalid_google.shape

(19171, 300)

In [22]:
scores = []
col = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
preds_google = np.zeros((valid_mes.shape[0], len(col)))

for i, class_name in enumerate(col):
    print('fit '+ class_name)
    classifier = SGDClassifier(alpha= 1e-05, max_iter= 10000, loss='log', penalty= 'l2', n_jobs=-1, random_state=42)

    cv_score = np.mean(cross_val_score(classifier, xtrain_google, train_l[class_name], cv=5, scoring='roc_auc'))
    scores.append(cv_score)
    print('CV score for class {} is {}'.format(class_name, cv_score))

    classifier.fit(xtrain_google, train_l[class_name])
    preds_google[:,i] = classifier.predict_proba(xvalid_google)[:,1] 
    
print('Total CV score is {}'.format(np.mean(scores)))


fit toxic
CV score for class toxic is 0.8010249179465745
fit severe_toxic
CV score for class severe_toxic is 0.9250436307925302
fit obscene
CV score for class obscene is 0.8458443652900014
fit threat
CV score for class threat is 0.8794754857125302
fit insult
CV score for class insult is 0.8432770129647181
fit identity_hate
CV score for class identity_hate is 0.8365719828043383
Total CV score is 0.8552062325851154


In [25]:
gc.collect()

0

In [26]:
# Generate Word vectors of test data
xtest_google = [sent2vec(x, embeddings_index_google) for x in tqdm(test['comment_text'])]
xtest_google = np.array(xtest_google)



  0%|          | 0/226998 [00:00<?, ?it/s][A[A

  0%|          | 1/226998 [00:00<26:15:24,  2.40it/s][A[A

  0%|          | 2/226998 [00:01<32:35:42,  1.93it/s][A[A

  0%|          | 3/226998 [00:01<30:20:40,  2.08it/s][A[A

  0%|          | 6/226998 [00:02<23:46:27,  2.65it/s][A[A

  0%|          | 7/226998 [00:02<22:14:41,  2.83it/s][A[A

  0%|          | 10/226998 [00:02<16:42:23,  3.77it/s][A[A

  0%|          | 11/226998 [00:02<15:48:00,  3.99it/s][A[A

  0%|          | 12/226998 [00:02<15:07:11,  4.17it/s][A[A

  0%|          | 15/226998 [00:03<12:44:37,  4.95it/s][A[A

  0%|          | 17/226998 [00:03<12:35:15,  5.01it/s][A[A

  0%|          | 19/226998 [00:03<11:38:20,  5.42it/s][A[A

  0%|          | 21/226998 [00:03<11:14:50,  5.61it/s][A[A

  0%|          | 23/226998 [00:03<10:35:51,  5.95it/s][A[A

  0%|          | 25/226998 [00:03<10:00:59,  6.29it/s][A[A

  0%|          | 27/226998 [00:04<9:43:35,  6.48it/s] [A[A

  0%|          | 32/226

  0%|          | 816/226998 [00:22<1:41:40, 37.07it/s][A[A

  0%|          | 825/226998 [00:22<1:41:02, 37.31it/s][A[A

  0%|          | 834/226998 [00:22<1:40:34, 37.48it/s][A[A

  0%|          | 852/226998 [00:22<1:39:07, 38.03it/s][A[A

  0%|          | 861/226998 [00:22<1:38:39, 38.20it/s][A[A

  0%|          | 872/226998 [00:22<1:37:53, 38.50it/s][A[A

  0%|          | 881/226998 [00:22<1:37:44, 38.55it/s][A[A

  0%|          | 889/226998 [00:22<1:37:16, 38.74it/s][A[A

  0%|          | 898/226998 [00:23<1:36:50, 38.91it/s][A[A

  0%|          | 910/226998 [00:23<1:35:55, 39.28it/s][A[A

  0%|          | 920/226998 [00:23<1:35:24, 39.49it/s][A[A

  0%|          | 932/226998 [00:23<1:34:34, 39.84it/s][A[A

  0%|          | 942/226998 [00:23<1:34:20, 39.93it/s][A[A

  0%|          | 951/226998 [00:23<1:34:08, 40.02it/s][A[A

  0%|          | 961/226998 [00:23<1:33:34, 40.26it/s][A[A

  0%|          | 976/226998 [00:23<1:32:31, 40.71it/s][A[A

  0%|   

  1%|          | 2075/226998 [00:39<1:12:13, 51.90it/s][A[A

  1%|          | 2086/226998 [00:40<1:12:02, 52.03it/s][A[A

  1%|          | 2094/226998 [00:40<1:11:58, 52.08it/s][A[A

  1%|          | 2111/226998 [00:40<1:11:38, 52.32it/s][A[A

  1%|          | 2120/226998 [00:40<1:11:33, 52.38it/s][A[A

  1%|          | 2134/226998 [00:40<1:11:16, 52.59it/s][A[A

  1%|          | 2144/226998 [00:40<1:11:09, 52.66it/s][A[A

  1%|          | 2157/226998 [00:40<1:10:55, 52.84it/s][A[A

  1%|          | 2168/226998 [00:40<1:10:43, 52.98it/s][A[A

  1%|          | 2179/226998 [00:41<1:10:35, 53.08it/s][A[A

  1%|          | 2190/226998 [00:41<1:10:24, 53.21it/s][A[A

  1%|          | 2201/226998 [00:41<1:10:15, 53.33it/s][A[A

  1%|          | 2211/226998 [00:41<1:10:08, 53.42it/s][A[A

  1%|          | 2225/226998 [00:41<1:09:52, 53.62it/s][A[A

  1%|          | 2236/226998 [00:41<1:09:44, 53.71it/s][A[A

  1%|          | 2248/226998 [00:41<1:09:33, 53.86it/s]

  2%|▏         | 3639/226998 [00:57<58:54, 63.20it/s][A[A

  2%|▏         | 3652/226998 [00:57<58:47, 63.32it/s][A[A

  2%|▏         | 3662/226998 [00:57<58:48, 63.30it/s][A[A

  2%|▏         | 3672/226998 [00:57<58:45, 63.35it/s][A[A

  2%|▏         | 3681/226998 [00:58<58:42, 63.39it/s][A[A

  2%|▏         | 3690/226998 [00:58<58:40, 63.43it/s][A[A

  2%|▏         | 3699/226998 [00:58<58:41, 63.41it/s][A[A

  2%|▏         | 3707/226998 [00:58<58:41, 63.41it/s][A[A

  2%|▏         | 3715/226998 [00:58<58:39, 63.44it/s][A[A

  2%|▏         | 3728/226998 [00:58<58:32, 63.56it/s][A[A

  2%|▏         | 3737/226998 [00:58<58:32, 63.57it/s][A[A

  2%|▏         | 3750/226998 [00:58<58:26, 63.67it/s][A[A

  2%|▏         | 3760/226998 [00:59<58:25, 63.69it/s][A[A

  2%|▏         | 3770/226998 [00:59<58:21, 63.74it/s][A[A

  2%|▏         | 3779/226998 [00:59<58:20, 63.76it/s][A[A

  2%|▏         | 3788/226998 [00:59<58:19, 63.78it/s][A[A

  2%|▏         | 3799/22

  2%|▏         | 4943/226998 [01:15<56:13, 65.82it/s][A[A

  2%|▏         | 4957/226998 [01:15<56:10, 65.89it/s][A[A

  2%|▏         | 4973/226998 [01:15<56:06, 65.95it/s][A[A

  2%|▏         | 4986/226998 [01:15<56:03, 66.01it/s][A[A

  2%|▏         | 4998/226998 [01:15<55:59, 66.08it/s][A[A

  2%|▏         | 5011/226998 [01:15<55:54, 66.17it/s][A[A

  2%|▏         | 5025/226998 [01:15<55:50, 66.25it/s][A[A

  2%|▏         | 5039/226998 [01:15<55:45, 66.34it/s][A[A

  2%|▏         | 5052/226998 [01:16<55:42, 66.39it/s][A[A

  2%|▏         | 5066/226998 [01:16<55:38, 66.48it/s][A[A

  2%|▏         | 5079/226998 [01:16<55:34, 66.56it/s][A[A

  2%|▏         | 5091/226998 [01:16<55:34, 66.55it/s][A[A

  2%|▏         | 5103/226998 [01:16<55:30, 66.62it/s][A[A

  2%|▏         | 5119/226998 [01:16<55:24, 66.73it/s][A[A

  2%|▏         | 5131/226998 [01:16<55:23, 66.75it/s][A[A

  2%|▏         | 5174/226998 [01:16<55:00, 67.21it/s][A[A

  2%|▏         | 5214/22

  4%|▎         | 8215/226998 [01:46<47:17, 77.10it/s][A[A

  4%|▎         | 8243/226998 [01:46<47:11, 77.25it/s][A[A

  4%|▎         | 8278/226998 [01:46<47:01, 77.51it/s][A[A

  4%|▎         | 8310/226998 [01:46<46:53, 77.73it/s][A[A

  4%|▎         | 8338/226998 [01:47<46:47, 77.89it/s][A[A

  4%|▎         | 8364/226998 [01:47<46:41, 78.06it/s][A[A

  4%|▎         | 8390/226998 [01:47<46:34, 78.23it/s][A[A

  4%|▎         | 8416/226998 [01:47<46:29, 78.36it/s][A[A

  4%|▎         | 8440/226998 [01:47<46:25, 78.47it/s][A[A

  4%|▎         | 8461/226998 [01:47<46:29, 78.35it/s][A[A

  4%|▎         | 8487/226998 [01:48<46:23, 78.51it/s][A[A

  4%|▎         | 8506/226998 [01:48<46:19, 78.60it/s][A[A

  4%|▍         | 8525/226998 [01:48<46:16, 78.70it/s][A[A

  4%|▍         | 8548/226998 [01:48<46:10, 78.84it/s][A[A

  4%|▍         | 8580/226998 [01:48<46:03, 79.05it/s][A[A

  4%|▍         | 8619/226998 [01:48<45:53, 79.32it/s][A[A

  4%|▍         | 8653/22

  6%|▌         | 12737/226998 [02:03<34:42, 102.89it/s][A[A

  6%|▌         | 12769/226998 [02:03<34:38, 103.06it/s][A[A

  6%|▌         | 12802/226998 [02:04<34:34, 103.24it/s][A[A

  6%|▌         | 12832/226998 [02:04<34:31, 103.36it/s][A[A

  6%|▌         | 12873/226998 [02:04<34:26, 103.61it/s][A[A

  6%|▌         | 12905/226998 [02:04<34:23, 103.75it/s][A[A

  6%|▌         | 12935/226998 [02:04<34:20, 103.89it/s][A[A

  6%|▌         | 12963/226998 [02:04<34:17, 104.01it/s][A[A

  6%|▌         | 12989/226998 [02:04<34:15, 104.12it/s][A[A

  6%|▌         | 13023/226998 [02:04<34:11, 104.31it/s][A[A

  6%|▌         | 13051/226998 [02:05<34:09, 104.38it/s][A[A

  6%|▌         | 13095/226998 [02:05<34:04, 104.64it/s][A[A

  6%|▌         | 13129/226998 [02:05<34:00, 104.82it/s][A[A

  6%|▌         | 13160/226998 [02:05<33:57, 104.95it/s][A[A

  6%|▌         | 13193/226998 [02:05<33:53, 105.12it/s][A[A

  6%|▌         | 13233/226998 [02:05<33:49, 105.35it/s]

  8%|▊         | 17335/226998 [02:20<28:16, 123.57it/s][A[A

  8%|▊         | 17376/226998 [02:20<28:13, 123.76it/s][A[A

  8%|▊         | 17414/226998 [02:20<28:11, 123.91it/s][A[A

  8%|▊         | 17449/226998 [02:20<28:09, 124.04it/s][A[A

  8%|▊         | 17487/226998 [02:20<28:06, 124.22it/s][A[A

  8%|▊         | 17521/226998 [02:20<28:04, 124.34it/s][A[A

  8%|▊         | 17552/226998 [02:21<28:02, 124.46it/s][A[A

  8%|▊         | 17582/226998 [02:21<28:00, 124.58it/s][A[A

  8%|▊         | 17612/226998 [02:21<27:59, 124.63it/s][A[A

  8%|▊         | 17643/226998 [02:21<27:57, 124.77it/s][A[A

  8%|▊         | 17670/226998 [02:21<27:59, 124.65it/s][A[A

  8%|▊         | 17692/226998 [02:21<27:58, 124.72it/s][A[A

  8%|▊         | 17717/226998 [02:21<27:56, 124.81it/s][A[A

  8%|▊         | 17739/226998 [02:22<27:56, 124.85it/s][A[A

  8%|▊         | 17797/226998 [02:22<27:51, 125.17it/s][A[A

  8%|▊         | 17829/226998 [02:22<27:49, 125.28it/s]

 10%|▉         | 22037/226998 [02:41<25:06, 136.05it/s][A[A

 10%|▉         | 22052/226998 [02:42<25:06, 136.00it/s][A[A

 10%|▉         | 22065/226998 [02:42<25:07, 135.97it/s][A[A

 10%|▉         | 22079/226998 [02:42<25:07, 135.97it/s][A[A

 10%|▉         | 22092/226998 [02:42<25:07, 135.94it/s][A[A

 10%|▉         | 22112/226998 [02:42<25:06, 135.98it/s][A[A

 10%|▉         | 22131/226998 [02:42<25:06, 136.01it/s][A[A

 10%|▉         | 22152/226998 [02:42<25:05, 136.03it/s][A[A

 10%|▉         | 22177/226998 [02:42<25:04, 136.10it/s][A[A

 10%|▉         | 22205/226998 [02:43<25:03, 136.18it/s][A[A

 10%|▉         | 22226/226998 [02:43<25:06, 135.88it/s][A[A

 10%|▉         | 22250/226998 [02:43<25:06, 135.95it/s][A[A

 10%|▉         | 22269/226998 [02:43<25:05, 135.94it/s][A[A

 10%|▉         | 22310/226998 [02:43<25:03, 136.10it/s][A[A

 10%|▉         | 22336/226998 [02:44<25:02, 136.18it/s][A[A

 10%|▉         | 22360/226998 [02:44<25:02, 136.24it/s]

 12%|█▏        | 26414/226998 [03:00<22:47, 146.66it/s][A[A

 12%|█▏        | 26469/226998 [03:00<22:45, 146.85it/s][A[A

 12%|█▏        | 26501/226998 [03:00<22:44, 146.94it/s][A[A

 12%|█▏        | 26535/226998 [03:00<22:43, 147.04it/s][A[A

 12%|█▏        | 26567/226998 [03:00<22:42, 147.13it/s][A[A

 12%|█▏        | 26598/226998 [03:00<22:42, 147.07it/s][A[A

 12%|█▏        | 26623/226998 [03:01<22:43, 147.00it/s][A[A

 12%|█▏        | 26644/226998 [03:01<22:42, 147.03it/s][A[A

 12%|█▏        | 26671/226998 [03:01<22:42, 147.06it/s][A[A

 12%|█▏        | 26691/226998 [03:01<22:41, 147.08it/s][A[A

 12%|█▏        | 26727/226998 [03:01<22:40, 147.18it/s][A[A

 12%|█▏        | 26767/226998 [03:01<22:39, 147.32it/s][A[A

 12%|█▏        | 26833/226998 [03:01<22:36, 147.60it/s][A[A

 12%|█▏        | 26873/226998 [03:01<22:35, 147.69it/s][A[A

 12%|█▏        | 26932/226998 [03:02<22:32, 147.93it/s][A[A

 12%|█▏        | 26973/226998 [03:02<22:30, 148.06it/s]

 14%|█▍        | 31977/226998 [03:16<20:00, 162.43it/s][A[A

 14%|█▍        | 32019/226998 [03:16<19:59, 162.56it/s][A[A

 14%|█▍        | 32061/226998 [03:17<19:58, 162.66it/s][A[A

 14%|█▍        | 32117/226998 [03:17<19:56, 162.85it/s][A[A

 14%|█▍        | 32164/226998 [03:17<19:55, 163.00it/s][A[A

 14%|█▍        | 32208/226998 [03:17<19:54, 163.14it/s][A[A

 14%|█▍        | 32254/226998 [03:17<19:52, 163.29it/s][A[A

 14%|█▍        | 32315/226998 [03:17<19:50, 163.48it/s][A[A

 14%|█▍        | 32359/226998 [03:17<19:49, 163.61it/s][A[A

 14%|█▍        | 32411/226998 [03:17<19:47, 163.79it/s][A[A

 14%|█▍        | 32468/226998 [03:17<19:46, 164.00it/s][A[A

 14%|█▍        | 32517/226998 [03:18<19:44, 164.15it/s][A[A

 14%|█▍        | 32565/226998 [03:18<19:43, 164.28it/s][A[A

 14%|█▍        | 32609/226998 [03:18<19:42, 164.40it/s][A[A

 14%|█▍        | 32666/226998 [03:18<19:40, 164.60it/s][A[A

 14%|█▍        | 32712/226998 [03:18<19:39, 164.74it/s]

 17%|█▋        | 38136/226998 [03:32<17:33, 179.35it/s][A[A

 17%|█▋        | 38180/226998 [03:32<17:32, 179.46it/s][A[A

 17%|█▋        | 38218/226998 [03:32<17:31, 179.55it/s][A[A

 17%|█▋        | 38255/226998 [03:32<17:30, 179.62it/s][A[A

 17%|█▋        | 38302/226998 [03:33<17:29, 179.76it/s][A[A

 17%|█▋        | 38341/226998 [03:33<17:29, 179.83it/s][A[A

 17%|█▋        | 38393/226998 [03:33<17:27, 179.99it/s][A[A

 17%|█▋        | 38434/226998 [03:33<17:27, 180.09it/s][A[A

 17%|█▋        | 38474/226998 [03:33<17:26, 180.17it/s][A[A

 17%|█▋        | 38512/226998 [03:34<17:27, 179.95it/s][A[A

 17%|█▋        | 38541/226998 [03:34<17:27, 179.97it/s][A[A

 17%|█▋        | 38568/226998 [03:34<17:27, 179.96it/s][A[A

 17%|█▋        | 38629/226998 [03:34<17:25, 180.15it/s][A[A

 17%|█▋        | 38672/226998 [03:34<17:24, 180.27it/s][A[A

 17%|█▋        | 38709/226998 [03:34<17:24, 180.34it/s][A[A

 17%|█▋        | 38767/226998 [03:34<17:22, 180.53it/s]

 20%|█▉        | 44650/226998 [03:51<15:44, 193.01it/s][A[A

 20%|█▉        | 44712/226998 [03:51<15:43, 193.19it/s][A[A

 20%|█▉        | 44763/226998 [03:51<15:43, 193.24it/s][A[A

 20%|█▉        | 44806/226998 [03:51<15:42, 193.32it/s][A[A

 20%|█▉        | 44867/226998 [03:51<15:41, 193.49it/s][A[A

 20%|█▉        | 44943/226998 [03:51<15:39, 193.73it/s][A[A

 20%|█▉        | 44998/226998 [03:52<15:38, 193.83it/s][A[A

 20%|█▉        | 45068/226998 [03:52<15:37, 194.05it/s][A[A

 20%|█▉        | 45123/226998 [03:52<15:36, 194.11it/s][A[A

 20%|█▉        | 45196/226998 [03:52<15:35, 194.33it/s][A[A

 20%|█▉        | 45265/226998 [03:52<15:34, 194.55it/s][A[A

 20%|█▉        | 45322/226998 [03:53<15:34, 194.38it/s][A[A

 20%|██        | 45408/226998 [03:53<15:32, 194.66it/s][A[A

 20%|██        | 45464/226998 [03:53<15:31, 194.82it/s][A[A

 20%|██        | 45519/226998 [03:53<15:30, 194.96it/s][A[A

 20%|██        | 45572/226998 [03:53<15:29, 195.10it/s]

 23%|██▎       | 51374/226998 [04:08<14:10, 206.45it/s][A[A

 23%|██▎       | 51405/226998 [04:08<14:10, 206.48it/s][A[A

 23%|██▎       | 51437/226998 [04:09<14:10, 206.42it/s][A[A

 23%|██▎       | 51486/226998 [04:09<14:09, 206.54it/s][A[A

 23%|██▎       | 51518/226998 [04:09<14:09, 206.58it/s][A[A

 23%|██▎       | 51556/226998 [04:09<14:09, 206.63it/s][A[A

 23%|██▎       | 51589/226998 [04:09<14:08, 206.68it/s][A[A

 23%|██▎       | 51626/226998 [04:09<14:08, 206.75it/s][A[A

 23%|██▎       | 51660/226998 [04:09<14:08, 206.75it/s][A[A

 23%|██▎       | 51702/226998 [04:09<14:07, 206.83it/s][A[A

 23%|██▎       | 51734/226998 [04:16<14:30, 201.36it/s][A[A

 23%|██▎       | 51784/226998 [04:17<14:29, 201.48it/s][A[A

 23%|██▎       | 51814/226998 [04:17<14:30, 201.29it/s][A[A

 23%|██▎       | 51838/226998 [04:17<14:30, 201.28it/s][A[A

 23%|██▎       | 51859/226998 [04:18<14:31, 201.00it/s][A[A

 23%|██▎       | 51876/226998 [04:18<14:32, 200.77it/s]

 25%|██▍       | 56542/226998 [04:32<13:41, 207.44it/s][A[A

 25%|██▍       | 56597/226998 [04:32<13:41, 207.47it/s][A[A

 25%|██▍       | 56660/226998 [04:32<13:40, 207.62it/s][A[A

 25%|██▍       | 56709/226998 [04:33<13:39, 207.69it/s][A[A

 25%|██▌       | 56754/226998 [04:33<13:39, 207.74it/s][A[A

 25%|██▌       | 56794/226998 [04:33<13:39, 207.81it/s][A[A

 25%|██▌       | 56855/226998 [04:33<13:38, 207.94it/s][A[A

 25%|██▌       | 56900/226998 [04:33<13:37, 208.02it/s][A[A

 25%|██▌       | 56956/226998 [04:33<13:36, 208.14it/s][A[A

 25%|██▌       | 57001/226998 [04:33<13:36, 208.20it/s][A[A

 25%|██▌       | 57044/226998 [04:33<13:36, 208.26it/s][A[A

 25%|██▌       | 57084/226998 [04:34<13:35, 208.30it/s][A[A

 25%|██▌       | 57120/226998 [04:34<13:35, 208.35it/s][A[A

 25%|██▌       | 57162/226998 [04:34<13:34, 208.43it/s][A[A

 25%|██▌       | 57212/226998 [04:34<13:34, 208.54it/s][A[A

 25%|██▌       | 57272/226998 [04:34<13:33, 208.67it/s]

 28%|██▊       | 62970/226998 [04:48<12:30, 218.54it/s][A[A

 28%|██▊       | 63009/226998 [04:48<12:30, 218.57it/s][A[A

 28%|██▊       | 63075/226998 [04:48<12:29, 218.69it/s][A[A

 28%|██▊       | 63114/226998 [04:48<12:29, 218.63it/s][A[A

 28%|██▊       | 63162/226998 [04:48<12:29, 218.67it/s][A[A

 28%|██▊       | 63193/226998 [04:49<12:31, 218.05it/s][A[A

 28%|██▊       | 63235/226998 [04:49<12:30, 218.11it/s][A[A

 28%|██▊       | 63264/226998 [04:50<12:30, 218.12it/s][A[A

 28%|██▊       | 63308/226998 [04:50<12:30, 218.19it/s][A[A

 28%|██▊       | 63349/226998 [04:50<12:29, 218.26it/s][A[A

 28%|██▊       | 63413/226998 [04:50<12:28, 218.40it/s][A[A

 28%|██▊       | 63460/226998 [04:50<12:28, 218.48it/s][A[A

 28%|██▊       | 63518/226998 [04:50<12:27, 218.61it/s][A[A

 28%|██▊       | 63566/226998 [04:50<12:27, 218.65it/s][A[A

 28%|██▊       | 63609/226998 [04:50<12:27, 218.68it/s][A[A

 28%|██▊       | 63688/226998 [04:50<12:26, 218.88it/s]

 30%|███       | 68663/226998 [05:06<11:46, 224.01it/s][A[A

 30%|███       | 68703/226998 [05:06<11:46, 224.06it/s][A[A

 30%|███       | 68740/226998 [05:06<11:46, 223.98it/s][A[A

 30%|███       | 68800/226998 [05:07<11:45, 224.10it/s][A[A

 30%|███       | 68839/226998 [05:07<11:45, 224.15it/s][A[A

 30%|███       | 68910/226998 [05:07<11:44, 224.30it/s][A[A

 30%|███       | 68957/226998 [05:07<11:44, 224.36it/s][A[A

 30%|███       | 69001/226998 [05:07<11:44, 224.42it/s][A[A

 30%|███       | 69043/226998 [05:07<11:43, 224.45it/s][A[A

 30%|███       | 69087/226998 [05:07<11:43, 224.51it/s][A[A

 30%|███       | 69126/226998 [05:07<11:43, 224.56it/s][A[A

 30%|███       | 69184/226998 [05:07<11:42, 224.67it/s][A[A

 31%|███       | 69236/226998 [05:08<11:41, 224.77it/s][A[A

 31%|███       | 69313/226998 [05:08<11:41, 224.93it/s][A[A

 31%|███       | 69367/226998 [05:08<11:40, 224.96it/s][A[A

 31%|███       | 69440/226998 [05:08<11:39, 225.09it/s]

 33%|███▎      | 75134/226998 [05:22<10:52, 232.68it/s][A[A

 33%|███▎      | 75179/226998 [05:23<10:52, 232.70it/s][A[A

 33%|███▎      | 75220/226998 [05:23<10:52, 232.77it/s][A[A

 33%|███▎      | 75268/226998 [05:23<10:51, 232.84it/s][A[A

 33%|███▎      | 75311/226998 [05:23<10:51, 232.88it/s][A[A

 33%|███▎      | 75353/226998 [05:23<10:50, 232.95it/s][A[A

 33%|███▎      | 75401/226998 [05:23<10:50, 233.03it/s][A[A

 33%|███▎      | 75445/226998 [05:23<10:50, 233.02it/s][A[A

 33%|███▎      | 75497/226998 [05:23<10:49, 233.11it/s][A[A

 33%|███▎      | 75557/226998 [05:23<10:49, 233.22it/s][A[A

 33%|███▎      | 75604/226998 [05:24<10:48, 233.27it/s][A[A

 33%|███▎      | 75654/226998 [05:24<10:48, 233.35it/s][A[A

 33%|███▎      | 75698/226998 [05:24<10:48, 233.40it/s][A[A

 33%|███▎      | 75741/226998 [05:24<10:48, 233.36it/s][A[A

 33%|███▎      | 75793/226998 [05:24<10:47, 233.43it/s][A[A

 33%|███▎      | 75832/226998 [05:24<10:47, 233.46it/s]

 36%|███▌      | 81398/226998 [05:38<10:06, 240.15it/s][A[A

 36%|███▌      | 81471/226998 [05:39<10:05, 240.29it/s][A[A

 36%|███▌      | 81522/226998 [05:39<10:05, 240.15it/s][A[A

 36%|███▌      | 81565/226998 [05:39<10:05, 240.21it/s][A[A

 36%|███▌      | 81606/226998 [05:39<10:05, 240.25it/s][A[A

 36%|███▌      | 81646/226998 [05:39<10:04, 240.30it/s][A[A

 36%|███▌      | 81703/226998 [05:39<10:04, 240.40it/s][A[A

 36%|███▌      | 81759/226998 [05:39<10:03, 240.49it/s][A[A

 36%|███▌      | 81807/226998 [05:40<10:03, 240.54it/s][A[A

 36%|███▌      | 81854/226998 [05:40<10:03, 240.61it/s][A[A

 36%|███▌      | 81932/226998 [05:40<10:02, 240.76it/s][A[A

 36%|███▌      | 82018/226998 [05:40<10:01, 240.95it/s][A[A

 36%|███▌      | 82082/226998 [05:40<10:01, 241.06it/s][A[A

 36%|███▌      | 82164/226998 [05:40<10:00, 241.23it/s][A[A

 36%|███▌      | 82234/226998 [05:40<09:59, 241.33it/s][A[A

 36%|███▋      | 82303/226998 [05:40<09:59, 241.45it/s]

 39%|███▉      | 88228/226998 [05:55<09:18, 248.39it/s][A[A

 39%|███▉      | 88274/226998 [05:55<09:18, 248.42it/s][A[A

 39%|███▉      | 88329/226998 [05:55<09:18, 248.50it/s][A[A

 39%|███▉      | 88374/226998 [05:55<09:17, 248.54it/s][A[A

 39%|███▉      | 88425/226998 [05:55<09:17, 248.61it/s][A[A

 39%|███▉      | 88477/226998 [05:55<09:17, 248.69it/s][A[A

 39%|███▉      | 88524/226998 [05:55<09:16, 248.74it/s][A[A

 39%|███▉      | 88580/226998 [05:56<09:16, 248.82it/s][A[A

 39%|███▉      | 88629/226998 [05:56<09:16, 248.86it/s][A[A

 39%|███▉      | 88673/226998 [05:56<09:15, 248.90it/s][A[A

 39%|███▉      | 88715/226998 [05:56<09:15, 248.93it/s][A[A

 39%|███▉      | 88766/226998 [05:56<09:15, 249.00it/s][A[A

 39%|███▉      | 88809/226998 [05:56<09:14, 249.02it/s][A[A

 39%|███▉      | 88849/226998 [05:56<09:14, 249.02it/s][A[A

 39%|███▉      | 88887/226998 [05:56<09:14, 249.05it/s][A[A

 39%|███▉      | 88947/226998 [05:57<09:14, 249.14it/s]

 41%|████▏     | 94102/226998 [06:11<08:45, 253.03it/s][A[A

 41%|████▏     | 94144/226998 [06:12<08:44, 253.07it/s][A[A

 41%|████▏     | 94185/226998 [06:12<08:44, 253.11it/s][A[A

 42%|████▏     | 94237/226998 [06:12<08:44, 253.17it/s][A[A

 42%|████▏     | 94279/226998 [06:12<08:44, 253.21it/s][A[A

 42%|████▏     | 94320/226998 [06:12<08:43, 253.25it/s][A[A

 42%|████▏     | 94362/226998 [06:12<08:43, 253.27it/s][A[A

 42%|████▏     | 94400/226998 [06:12<08:43, 253.24it/s][A[A

 42%|████▏     | 94445/226998 [06:12<08:43, 253.29it/s][A[A

 42%|████▏     | 94499/226998 [06:12<08:42, 253.36it/s][A[A

 42%|████▏     | 94559/226998 [06:13<08:42, 253.46it/s][A[A

 42%|████▏     | 94614/226998 [06:13<08:42, 253.53it/s][A[A

 42%|████▏     | 94666/226998 [06:13<08:41, 253.60it/s][A[A

 42%|████▏     | 94715/226998 [06:13<08:41, 253.64it/s][A[A

 42%|████▏     | 94760/226998 [06:13<08:41, 253.66it/s][A[A

 42%|████▏     | 94801/226998 [06:13<08:41, 253.55it/s]

 44%|████▍     | 100890/226998 [06:28<08:06, 259.40it/s][A[A

 44%|████▍     | 100949/226998 [06:29<08:05, 259.47it/s][A[A

 44%|████▍     | 101006/226998 [06:29<08:05, 259.54it/s][A[A

 45%|████▍     | 101061/226998 [06:29<08:05, 259.57it/s][A[A

 45%|████▍     | 101109/226998 [06:29<08:04, 259.62it/s][A[A

 45%|████▍     | 101155/226998 [06:29<08:04, 259.66it/s][A[A

 45%|████▍     | 101201/226998 [06:29<08:04, 259.70it/s][A[A

 45%|████▍     | 101247/226998 [06:29<08:04, 259.75it/s][A[A

 45%|████▍     | 101291/226998 [06:29<08:03, 259.74it/s][A[A

 45%|████▍     | 101329/226998 [06:30<08:03, 259.76it/s][A[A

 45%|████▍     | 101368/226998 [06:30<08:03, 259.79it/s][A[A

 45%|████▍     | 101427/226998 [06:30<08:03, 259.87it/s][A[A

 45%|████▍     | 101470/226998 [06:30<08:03, 259.89it/s][A[A

 45%|████▍     | 101517/226998 [06:30<08:02, 259.95it/s][A[A

 45%|████▍     | 101576/226998 [06:30<08:02, 260.03it/s][A[A

 45%|████▍     | 101623/226998 [06:30<08

 47%|████▋     | 107111/226998 [06:44<07:32, 264.91it/s][A[A

 47%|████▋     | 107155/226998 [06:44<07:32, 264.92it/s][A[A

 47%|████▋     | 107207/226998 [06:44<07:32, 264.98it/s][A[A

 47%|████▋     | 107263/226998 [06:44<07:31, 265.05it/s][A[A

 47%|████▋     | 107312/226998 [06:44<07:31, 265.10it/s][A[A

 47%|████▋     | 107365/226998 [06:44<07:31, 265.17it/s][A[A

 47%|████▋     | 107414/226998 [06:45<07:30, 265.20it/s][A[A

 47%|████▋     | 107460/226998 [06:45<07:30, 265.23it/s][A[A

 47%|████▋     | 107503/226998 [06:45<07:30, 265.24it/s][A[A

 47%|████▋     | 107541/226998 [06:45<07:30, 265.26it/s][A[A

 47%|████▋     | 107579/226998 [06:45<07:30, 265.28it/s][A[A

 47%|████▋     | 107619/226998 [06:45<07:29, 265.31it/s][A[A

 47%|████▋     | 107663/226998 [06:45<07:29, 265.35it/s][A[A

 47%|████▋     | 107719/226998 [06:45<07:29, 265.43it/s][A[A

 47%|████▋     | 107763/226998 [06:45<07:29, 265.45it/s][A[A

 47%|████▋     | 107811/226998 [06:46<07

 50%|█████     | 113736/226998 [07:00<06:58, 270.49it/s][A[A

 50%|█████     | 113780/226998 [07:00<06:58, 270.54it/s][A[A

 50%|█████     | 113822/226998 [07:00<06:58, 270.47it/s][A[A

 50%|█████     | 113864/226998 [07:00<06:58, 270.50it/s][A[A

 50%|█████     | 113936/226998 [07:01<06:57, 270.61it/s][A[A

 50%|█████     | 113984/226998 [07:01<06:57, 270.62it/s][A[A

 50%|█████     | 114040/226998 [07:01<06:57, 270.69it/s][A[A

 50%|█████     | 114093/226998 [07:01<06:57, 270.75it/s][A[A

 50%|█████     | 114143/226998 [07:01<06:56, 270.80it/s][A[A

 50%|█████     | 114192/226998 [07:01<06:56, 270.82it/s][A[A

 50%|█████     | 114236/226998 [07:01<06:56, 270.84it/s][A[A

 50%|█████     | 114293/226998 [07:01<06:56, 270.90it/s][A[A

 50%|█████     | 114337/226998 [07:02<06:55, 270.91it/s][A[A

 50%|█████     | 114379/226998 [07:02<06:55, 270.93it/s][A[A

 50%|█████     | 114418/226998 [07:02<06:55, 270.96it/s][A[A

 50%|█████     | 114457/226998 [07:02<06

 53%|█████▎    | 119797/226998 [07:16<06:30, 274.69it/s][A[A

 53%|█████▎    | 119866/226998 [07:16<06:29, 274.78it/s][A[A

 53%|█████▎    | 119914/226998 [07:16<06:29, 274.83it/s][A[A

 53%|█████▎    | 119962/226998 [07:16<06:29, 274.87it/s][A[A

 53%|█████▎    | 120018/226998 [07:16<06:29, 274.92it/s][A[A

 53%|█████▎    | 120065/226998 [07:16<06:28, 274.96it/s][A[A

 53%|█████▎    | 120118/226998 [07:16<06:28, 275.02it/s][A[A

 53%|█████▎    | 120165/226998 [07:16<06:28, 275.06it/s][A[A

 53%|█████▎    | 120212/226998 [07:17<06:28, 275.06it/s][A[A

 53%|█████▎    | 120254/226998 [07:17<06:28, 275.09it/s][A[A

 53%|█████▎    | 120306/226998 [07:17<06:27, 275.14it/s][A[A

 53%|█████▎    | 120363/226998 [07:17<06:27, 275.21it/s][A[A

 53%|█████▎    | 120411/226998 [07:17<06:27, 275.23it/s][A[A

 53%|█████▎    | 120455/226998 [07:17<06:27, 275.25it/s][A[A

 53%|█████▎    | 120500/226998 [07:17<06:26, 275.29it/s][A[A

 53%|█████▎    | 120548/226998 [07:17<06

 56%|█████▌    | 126867/226998 [07:31<05:56, 281.24it/s][A[A

 56%|█████▌    | 126914/226998 [07:31<05:55, 281.26it/s][A[A

 56%|█████▌    | 126959/226998 [07:31<05:55, 281.29it/s][A[A

 56%|█████▌    | 127002/226998 [07:31<05:55, 281.31it/s][A[A

 56%|█████▌    | 127044/226998 [07:31<05:55, 281.33it/s][A[A

 56%|█████▌    | 127131/226998 [07:31<05:54, 281.46it/s][A[A

 56%|█████▌    | 127187/226998 [07:31<05:54, 281.52it/s][A[A

 56%|█████▌    | 127241/226998 [07:31<05:54, 281.56it/s][A[A

 56%|█████▌    | 127292/226998 [07:32<05:54, 281.61it/s][A[A

 56%|█████▌    | 127348/226998 [07:32<05:53, 281.67it/s][A[A

 56%|█████▌    | 127415/226998 [07:32<05:53, 281.75it/s][A[A

 56%|█████▌    | 127472/226998 [07:32<05:53, 281.81it/s][A[A

 56%|█████▌    | 127526/226998 [07:32<05:52, 281.84it/s][A[A

 56%|█████▌    | 127579/226998 [07:32<05:52, 281.89it/s][A[A

 56%|█████▌    | 127630/226998 [07:32<05:52, 281.94it/s][A[A

 56%|█████▌    | 127680/226998 [07:32<05

 59%|█████▉    | 133412/226998 [07:46<05:27, 286.11it/s][A[A

 59%|█████▉    | 133478/226998 [07:46<05:26, 286.19it/s][A[A

 59%|█████▉    | 133531/226998 [07:46<05:26, 286.25it/s][A[A

 59%|█████▉    | 133591/226998 [07:46<05:26, 286.30it/s][A[A

 59%|█████▉    | 133644/226998 [07:46<05:26, 286.35it/s][A[A

 59%|█████▉    | 133695/226998 [07:46<05:25, 286.37it/s][A[A

 59%|█████▉    | 133742/226998 [07:47<05:25, 286.37it/s][A[A

 59%|█████▉    | 133789/226998 [07:47<05:25, 286.42it/s][A[A

 59%|█████▉    | 133847/226998 [07:47<05:25, 286.48it/s][A[A

 59%|█████▉    | 133895/226998 [07:47<05:24, 286.51it/s][A[A

 59%|█████▉    | 133946/226998 [07:47<05:24, 286.55it/s][A[A

 59%|█████▉    | 134010/226998 [07:47<05:24, 286.62it/s][A[A

 59%|█████▉    | 134061/226998 [07:47<05:24, 286.64it/s][A[A

 59%|█████▉    | 134111/226998 [07:47<05:24, 286.68it/s][A[A

 59%|█████▉    | 134161/226998 [07:47<05:23, 286.72it/s][A[A

 59%|█████▉    | 134214/226998 [07:48<05

 62%|██████▏   | 141145/226998 [08:01<04:52, 293.10it/s][A[A

 62%|██████▏   | 141193/226998 [08:01<04:52, 293.14it/s][A[A

 62%|██████▏   | 141241/226998 [08:01<04:52, 293.16it/s][A[A

 62%|██████▏   | 141295/226998 [08:01<04:52, 293.21it/s][A[A

 62%|██████▏   | 141342/226998 [08:02<04:52, 293.23it/s][A[A

 62%|██████▏   | 141386/226998 [08:02<04:51, 293.24it/s][A[A

 62%|██████▏   | 141428/226998 [08:02<04:51, 293.27it/s][A[A

 62%|██████▏   | 141478/226998 [08:02<04:51, 293.31it/s][A[A

 62%|██████▏   | 141522/226998 [08:02<04:51, 293.32it/s][A[A

 62%|██████▏   | 141565/226998 [08:02<04:51, 293.34it/s][A[A

 62%|██████▏   | 141626/226998 [08:02<04:50, 293.41it/s][A[A

 62%|██████▏   | 141672/226998 [08:02<04:50, 293.43it/s][A[A

 62%|██████▏   | 141725/226998 [08:02<04:50, 293.48it/s][A[A

 62%|██████▏   | 141777/226998 [08:03<04:50, 293.52it/s][A[A

 62%|██████▏   | 141824/226998 [08:03<04:50, 293.55it/s][A[A

 63%|██████▎   | 141885/226998 [08:03<04

 65%|██████▌   | 147734/226998 [08:16<04:26, 297.47it/s][A[A

 65%|██████▌   | 147787/226998 [08:16<04:26, 297.52it/s][A[A

 65%|██████▌   | 147834/226998 [08:16<04:26, 297.53it/s][A[A

 65%|██████▌   | 147878/226998 [08:16<04:25, 297.55it/s][A[A

 65%|██████▌   | 147932/226998 [08:17<04:25, 297.59it/s][A[A

 65%|██████▌   | 147978/226998 [08:17<04:25, 297.62it/s][A[A

 65%|██████▌   | 148022/226998 [08:17<04:25, 297.64it/s][A[A

 65%|██████▌   | 148083/226998 [08:17<04:25, 297.70it/s][A[A

 65%|██████▌   | 148135/226998 [08:17<04:24, 297.74it/s][A[A

 65%|██████▌   | 148186/226998 [08:17<04:24, 297.79it/s][A[A

 65%|██████▌   | 148251/226998 [08:17<04:24, 297.85it/s][A[A

 65%|██████▌   | 148306/226998 [08:17<04:24, 297.90it/s][A[A

 65%|██████▌   | 148364/226998 [08:17<04:23, 297.95it/s][A[A

 65%|██████▌   | 148417/226998 [08:18<04:23, 297.97it/s][A[A

 65%|██████▌   | 148491/226998 [08:18<04:23, 298.06it/s][A[A

 65%|██████▌   | 148546/226998 [08:18<04

 69%|██████▊   | 155803/226998 [08:32<03:54, 304.06it/s][A[A

 69%|██████▊   | 155869/226998 [08:32<03:53, 304.12it/s][A[A

 69%|██████▊   | 155921/226998 [08:32<03:53, 304.16it/s][A[A

 69%|██████▊   | 155975/226998 [08:32<03:53, 304.21it/s][A[A

 69%|██████▊   | 156028/226998 [08:32<03:53, 304.20it/s][A[A

 69%|██████▉   | 156080/226998 [08:33<03:53, 304.25it/s][A[A

 69%|██████▉   | 156129/226998 [08:33<03:52, 304.28it/s][A[A

 69%|██████▉   | 156177/226998 [08:33<03:52, 304.29it/s][A[A

 69%|██████▉   | 156242/226998 [08:33<03:52, 304.35it/s][A[A

 69%|██████▉   | 156296/226998 [08:33<03:52, 304.39it/s][A[A

 69%|██████▉   | 156345/226998 [08:33<03:52, 304.42it/s][A[A

 69%|██████▉   | 156393/226998 [08:33<03:51, 304.40it/s][A[A

 69%|██████▉   | 156434/226998 [08:34<03:52, 304.15it/s][A[A

 69%|██████▉   | 156465/226998 [08:34<03:52, 303.97it/s][A[A

 69%|██████▉   | 156494/226998 [08:34<03:51, 303.95it/s][A[A

 69%|██████▉   | 156518/226998 [08:35<03

 71%|███████   | 160269/226998 [08:52<03:41, 301.23it/s][A[A

 71%|███████   | 160324/226998 [08:52<03:41, 301.28it/s][A[A

 71%|███████   | 160377/226998 [08:52<03:41, 301.32it/s][A[A

 71%|███████   | 160426/226998 [08:52<03:40, 301.35it/s][A[A

 71%|███████   | 160495/226998 [08:52<03:40, 301.42it/s][A[A

 71%|███████   | 160549/226998 [08:52<03:40, 301.45it/s][A[A

 71%|███████   | 160601/226998 [08:52<03:40, 301.47it/s][A[A

 71%|███████   | 160671/226998 [08:52<03:39, 301.55it/s][A[A

 71%|███████   | 160729/226998 [08:52<03:39, 301.59it/s][A[A

 71%|███████   | 160783/226998 [08:53<03:39, 301.61it/s][A[A

 71%|███████   | 160842/226998 [08:53<03:39, 301.66it/s][A[A

 71%|███████   | 160901/226998 [08:53<03:39, 301.71it/s][A[A

 71%|███████   | 160955/226998 [08:53<03:38, 301.75it/s][A[A

 71%|███████   | 161009/226998 [08:53<03:38, 301.79it/s][A[A

 71%|███████   | 161098/226998 [08:53<03:38, 301.90it/s][A[A

 71%|███████   | 161159/226998 [08:53<03

 74%|███████▍  | 167438/226998 [09:07<03:14, 305.56it/s][A[A

 74%|███████▍  | 167540/226998 [09:08<03:14, 305.69it/s][A[A

 74%|███████▍  | 167624/226998 [09:08<03:14, 305.79it/s][A[A

 74%|███████▍  | 167715/226998 [09:08<03:13, 305.90it/s][A[A

 74%|███████▍  | 167806/226998 [09:08<03:13, 306.01it/s][A[A

 74%|███████▍  | 167892/226998 [09:08<03:13, 306.06it/s][A[A

 74%|███████▍  | 167967/226998 [09:08<03:12, 306.13it/s][A[A

 74%|███████▍  | 168036/226998 [09:08<03:12, 306.17it/s][A[A

 74%|███████▍  | 168118/226998 [09:08<03:12, 306.26it/s][A[A

 74%|███████▍  | 168196/226998 [09:09<03:11, 306.31it/s][A[A

 74%|███████▍  | 168278/226998 [09:09<03:11, 306.38it/s][A[A

 74%|███████▍  | 168378/226998 [09:09<03:11, 306.51it/s][A[A

 74%|███████▍  | 168450/226998 [09:09<03:10, 306.57it/s][A[A

 74%|███████▍  | 168519/226998 [09:09<03:10, 306.64it/s][A[A

 74%|███████▍  | 168586/226998 [09:09<03:10, 306.70it/s][A[A

 74%|███████▍  | 168694/226998 [09:09<03

 77%|███████▋  | 175295/226998 [09:23<02:46, 311.14it/s][A[A

 77%|███████▋  | 175361/226998 [09:23<02:45, 311.20it/s][A[A

 77%|███████▋  | 175417/226998 [09:23<02:45, 311.24it/s][A[A

 77%|███████▋  | 175492/226998 [09:23<02:45, 311.30it/s][A[A

 77%|███████▋  | 175550/226998 [09:23<02:45, 311.33it/s][A[A

 77%|███████▋  | 175621/226998 [09:23<02:44, 311.40it/s][A[A

 77%|███████▋  | 175679/226998 [09:24<02:44, 311.38it/s][A[A

 77%|███████▋  | 175734/226998 [09:24<02:44, 311.42it/s][A[A

 77%|███████▋  | 175784/226998 [09:24<02:44, 311.44it/s][A[A

 77%|███████▋  | 175852/226998 [09:24<02:44, 311.50it/s][A[A

 77%|███████▋  | 175906/226998 [09:24<02:44, 311.53it/s][A[A

 78%|███████▊  | 175957/226998 [09:24<02:43, 311.55it/s][A[A

 78%|███████▊  | 176005/226998 [09:24<02:43, 311.55it/s][A[A

 78%|███████▊  | 176054/226998 [09:25<02:43, 311.58it/s][A[A

 78%|███████▊  | 176120/226998 [09:25<02:43, 311.64it/s][A[A

 78%|███████▊  | 176186/226998 [09:25<02

 81%|████████  | 182792/226998 [09:38<02:20, 315.75it/s][A[A

 81%|████████  | 182846/226998 [09:39<02:19, 315.79it/s][A[A

 81%|████████  | 182902/226998 [09:39<02:19, 315.83it/s][A[A

 81%|████████  | 182950/226998 [09:39<02:19, 315.85it/s][A[A

 81%|████████  | 183014/226998 [09:39<02:19, 315.91it/s][A[A

 81%|████████  | 183080/226998 [09:39<02:18, 315.96it/s][A[A

 81%|████████  | 183135/226998 [09:39<02:18, 316.00it/s][A[A

 81%|████████  | 183190/226998 [09:39<02:18, 315.99it/s][A[A

 81%|████████  | 183242/226998 [09:39<02:18, 316.02it/s][A[A

 81%|████████  | 183302/226998 [09:39<02:18, 316.07it/s][A[A

 81%|████████  | 183359/226998 [09:40<02:18, 316.11it/s][A[A

 81%|████████  | 183411/226998 [09:40<02:17, 316.14it/s][A[A

 81%|████████  | 183483/226998 [09:40<02:17, 316.20it/s][A[A

 81%|████████  | 183537/226998 [09:40<02:17, 316.15it/s][A[A

 81%|████████  | 183581/226998 [09:40<02:17, 316.14it/s][A[A

 81%|████████  | 183627/226998 [09:40<02

 84%|████████▍ | 191295/226998 [09:54<01:50, 322.01it/s][A[A

 84%|████████▍ | 191371/226998 [09:54<01:50, 322.07it/s][A[A

 84%|████████▍ | 191442/226998 [09:54<01:50, 322.12it/s][A[A

 84%|████████▍ | 191523/226998 [09:54<01:50, 322.20it/s][A[A

 84%|████████▍ | 191594/226998 [09:54<01:49, 322.26it/s][A[A

 84%|████████▍ | 191664/226998 [09:54<01:49, 322.32it/s][A[A

 84%|████████▍ | 191733/226998 [09:54<01:49, 322.35it/s][A[A

 84%|████████▍ | 191794/226998 [09:54<01:49, 322.39it/s][A[A

 85%|████████▍ | 191893/226998 [09:55<01:48, 322.50it/s][A[A

 85%|████████▍ | 191963/226998 [09:55<01:48, 322.56it/s][A[A

 85%|████████▍ | 192067/226998 [09:55<01:48, 322.68it/s][A[A

 85%|████████▍ | 192146/226998 [09:55<01:47, 322.75it/s][A[A

 85%|████████▍ | 192223/226998 [09:55<01:47, 322.82it/s][A[A

 85%|████████▍ | 192297/226998 [09:55<01:47, 322.87it/s][A[A

 85%|████████▍ | 192366/226998 [09:55<01:47, 322.90it/s][A[A

 85%|████████▍ | 192436/226998 [09:55<01

 88%|████████▊ | 198814/226998 [10:09<01:26, 326.28it/s][A[A

 88%|████████▊ | 198870/226998 [10:09<01:26, 326.29it/s][A[A

 88%|████████▊ | 198932/226998 [10:09<01:26, 326.33it/s][A[A

 88%|████████▊ | 198985/226998 [10:09<01:25, 326.34it/s][A[A

 88%|████████▊ | 199034/226998 [10:09<01:25, 326.31it/s][A[A

 88%|████████▊ | 199075/226998 [10:10<01:25, 326.31it/s][A[A

 88%|████████▊ | 199137/226998 [10:10<01:25, 326.35it/s][A[A

 88%|████████▊ | 199182/226998 [10:10<01:25, 326.37it/s][A[A

 88%|████████▊ | 199236/226998 [10:10<01:25, 326.41it/s][A[A

 88%|████████▊ | 199296/226998 [10:10<01:24, 326.45it/s][A[A

 88%|████████▊ | 199347/226998 [10:10<01:24, 326.47it/s][A[A

 88%|████████▊ | 199410/226998 [10:10<01:24, 326.52it/s][A[A

 88%|████████▊ | 199471/226998 [10:10<01:24, 326.56it/s][A[A

 88%|████████▊ | 199525/226998 [10:10<01:24, 326.58it/s][A[A

 88%|████████▊ | 199602/226998 [10:11<01:23, 326.65it/s][A[A

 88%|████████▊ | 199660/226998 [10:11<01

 91%|█████████ | 206392/226998 [10:25<01:02, 330.10it/s][A[A

 91%|█████████ | 206450/226998 [10:25<01:02, 330.14it/s][A[A

 91%|█████████ | 206506/226998 [10:25<01:02, 330.16it/s][A[A

 91%|█████████ | 206557/226998 [10:25<01:01, 330.15it/s][A[A

 91%|█████████ | 206628/226998 [10:25<01:01, 330.21it/s][A[A

 91%|█████████ | 206685/226998 [10:25<01:01, 330.24it/s][A[A

 91%|█████████ | 206737/226998 [10:25<01:01, 330.26it/s][A[A

 91%|█████████ | 206794/226998 [10:26<01:01, 330.30it/s][A[A

 91%|█████████ | 206846/226998 [10:26<01:01, 330.33it/s][A[A

 91%|█████████ | 206914/226998 [10:26<01:00, 330.38it/s][A[A

 91%|█████████ | 206973/226998 [10:26<01:00, 330.42it/s][A[A

 91%|█████████ | 207029/226998 [10:26<01:00, 330.45it/s][A[A

 91%|█████████ | 207088/226998 [10:26<01:00, 330.49it/s][A[A

 91%|█████████▏| 207145/226998 [10:26<01:00, 330.53it/s][A[A

 91%|█████████▏| 207202/226998 [10:26<00:59, 330.57it/s][A[A

 91%|█████████▏| 207281/226998 [10:26<00

 95%|█████████▍| 215323/226998 [10:40<00:34, 336.04it/s][A[A

 95%|█████████▍| 215397/226998 [10:40<00:34, 336.10it/s][A[A

 95%|█████████▍| 215492/226998 [10:40<00:34, 336.19it/s][A[A

 95%|█████████▍| 215569/226998 [10:41<00:33, 336.24it/s][A[A

 95%|█████████▍| 215642/226998 [10:41<00:33, 336.29it/s][A[A

 95%|█████████▌| 215721/226998 [10:41<00:33, 336.36it/s][A[A

 95%|█████████▌| 215810/226998 [10:41<00:33, 336.44it/s][A[A

 95%|█████████▌| 215887/226998 [10:41<00:33, 336.50it/s][A[A

 95%|█████████▌| 216012/226998 [10:41<00:32, 336.65it/s][A[A

 95%|█████████▌| 216116/226998 [10:41<00:32, 336.75it/s][A[A

 95%|█████████▌| 216209/226998 [10:41<00:32, 336.82it/s][A[A

 95%|█████████▌| 216304/226998 [10:42<00:31, 336.91it/s][A[A

 95%|█████████▌| 216388/226998 [10:42<00:31, 336.95it/s][A[A

 95%|█████████▌| 216465/226998 [10:42<00:31, 337.03it/s][A[A

 95%|█████████▌| 216539/226998 [10:42<00:31, 337.04it/s][A[A

 95%|█████████▌| 216626/226998 [10:42<00

 99%|█████████▊| 223634/226998 [10:56<00:09, 340.90it/s][A[A

 99%|█████████▊| 223713/226998 [10:56<00:09, 340.97it/s][A[A

 99%|█████████▊| 223771/226998 [10:56<00:09, 341.00it/s][A[A

 99%|█████████▊| 223828/226998 [10:56<00:09, 341.00it/s][A[A

 99%|█████████▊| 223879/226998 [10:56<00:09, 341.00it/s][A[A

 99%|█████████▊| 223924/226998 [10:56<00:09, 341.00it/s][A[A

 99%|█████████▊| 223981/226998 [10:56<00:08, 341.04it/s][A[A

 99%|█████████▊| 224029/226998 [10:56<00:08, 341.01it/s][A[A

 99%|█████████▊| 224095/226998 [10:57<00:08, 341.06it/s][A[A

 99%|█████████▊| 224148/226998 [10:57<00:08, 341.08it/s][A[A

 99%|█████████▉| 224195/226998 [10:57<00:08, 341.07it/s][A[A

 99%|█████████▉| 224268/226998 [10:57<00:08, 341.13it/s][A[A

 99%|█████████▉| 224320/226998 [10:57<00:07, 341.15it/s][A[A

 99%|█████████▉| 224373/226998 [10:57<00:07, 341.16it/s][A[A

 99%|█████████▉| 224431/226998 [10:57<00:07, 341.21it/s][A[A

 99%|█████████▉| 224495/226998 [10:57<00

In [28]:
testpreds_google = np.zeros((test.shape[0], len(col)))
for i, class_name in enumerate(col):
    print('fit '+ class_name)
    testpreds_google[:,i] = classifier.predict_proba(xtest_google)[:,1] 

fit toxic
fit severe_toxic
fit obscene
fit threat
fit insult
fit identity_hate


In [34]:
submission = pd.concat([submid, pd.DataFrame(testpreds_google, columns = col)], axis=1)
submission.to_csv('submissions/sample_submission_google_sgd.csv', index=False)