In [1]:
import pickle
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest,f_classif
from sklearn.decomposition import PCA
from pprint import pprint
from sklearn.metrics import f1_score,r2_score,classification_report,roc_auc_score
from sklearn.model_selection import ParameterGrid
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier,RandomForestRegressor
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.metrics import confusion_matrix,f1_score,precision_score,recall_score,accuracy_score
import itertools
from sklearn.model_selection import ParameterGrid, cross_val_predict, GroupKFold,GridSearchCV,StratifiedKFold
from sklearn import preprocessing
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime
from joblib import Parallel,delayed

def svar(X):
    n = float(len(X))
    svar=(sum([(x-np.mean(X))**2 for x in X]) / n)* n/(n-1.)
    return svar

def CronbachAlpha(itemscores):
    itemvars = [svar(item) for item in itemscores]
    tscores = [0] * len(itemscores[0])
    for item in itemscores:
        for i in range(len(item)):
            tscores[i]+= item[i]
    nitems = len(itemscores)
    Calpha=nitems/(nitems-1.) * (1-sum(itemvars)/ svar(tscores))
    return Calpha

from scipy.stats import iqr,skew,kurtosis,pearsonr
from scipy.stats import variation,moment

def get_features1(x):
    temp = []
    temp.append(x[-1])
    temp.append(x[0])
    temp.append(x[len(x)//2])
    temp.append(iqr(x))
    temp.append(skew(x))
    temp.append(kurtosis(x))
    temp.append(variation(x))
    temp.append(iqr(x[:len(x)//2]))
    temp.append(skew(x[:len(x)//2]))
    temp.append(kurtosis(x[:len(x)//2]))
    temp.append(variation(x[:len(x)//2]))
    temp.append(iqr(x[len(x)//2:]))
    temp.append(skew(x[len(x)//2:]))
    temp.append(kurtosis(x[len(x)//2:]))
    temp.append(variation(x[len(x)//2:]))
    return np.array(temp)

# def get_features()

def get_features(temp,user):
    if temp.shape[0]<5:
        return -1,-1,-1,-1,-1
    if temp['time'].values[-1]-temp['time'].values[0]>1000:
        return -1,-1,-1,-1,-1
    if np.isnan(temp['score'].values[-1]):
        return -1,-1,-1,-1,-1
    a = temp['stress_likelihood'].values
    b = temp['all_scores'].values[-1]
    c = user
    hour = datetime.fromtimestamp(temp['time'].values[0]).hour
    if hour<8:
        d = 0
    elif hour>8 and hour < 16:
        d = 1
    else:
        d = 2
    e = user+temp['day'].values[-1]
    return a,b,c,d,e

def get_user_data(user_data,user):
    X_date = []
    X = []
    y = []
    groups = []
    days = []
    for day in np.unique(user_data['day'].values):
        user_day_data =  user_data[user_data.day.isin([day])].reset_index(drop=True)
        ema_data = user_day_data.dropna().reset_index(drop=True)
        if ema_data.shape[0]==0:
            continue
        user_day_data_values = user_day_data[['time','stress_likelihood','imputed']].values
        user_day_data_values = user_day_data_values[user_day_data_values[:,0].argsort()]
        for i,row in ema_data.iterrows():
            if np.abs(user_day_data_values[:,0]-row['time']).min()>180:
                continue
            index = np.abs(user_day_data_values[:,0]-row['time']).argmin()
            if index<=5 or len(user_day_data_values[(index-10):(index+1),1])<5:
                continue
            X.append(user_day_data_values[(index-10):(index+1),1])
            y.append(row['all_scores'])
            hour = datetime.fromtimestamp(row['time']).hour
            if hour<8:
                X_date.append(0)
            elif hour>8 and hour < 16:
                X_date.append(1)
            else:
                X_date.append(2)
            groups.append(user)
            days.append(user+day)
    return X,y,groups,X_date,days

def get_data(all_stress):
    X_date,X,y,groups,days = [],[],[],[],[]
    final = [get_user_data(all_stress[all_stress.user.isin([user])],user) 
                                          for user in np.unique(all_stress['user'].values)]
    for a in final:
        X.extend(a[0])
        y.extend(a[1])
        groups.extend(a[2])
        X_date.extend(a[3])
        days.extend(a[4])
    return X,y,groups,X_date,days

In [7]:
import warnings
import pickle
warnings.filterwarnings('ignore')
from sklearn.preprocessing import StandardScaler,OneHotEncoder
all_stress_left = pickle.load(open('../data/stress_ema_md2k_aa_rice_left_ppg.p','rb'))
# all_stress_left = all_stress_left[all_stress_left.imputed.isin([0])]
all_stress_right = pickle.load(open('../data/stress_ema_md2k_aa_rice_right1_ppg.p','rb'))
# all_stress_right = all_stress_right[all_stress_right.imputed.isin([0])]

In [8]:
X2,y2,groups2,X_date2,days2 = get_data(all_stress_left)
X3,y3,groups3,X_date3,days3 = get_data(all_stress_right)
Xf,yf,groupsf,X_datef,daysf = X2+X3,y2+y3,groups2+groups3,X_date2+X_date3,days2+days3

In [9]:
pickle.dump([Xf,yf,groupsf,X_datef,daysf],open('../data/ema_only_imputed_previous_way.p','wb'))

In [15]:
from sklearn import linear_model
Xf,yf,groupsf,X_datef,daysf = pickle.load(open('../data/ema_only_imputed_previous_way.p','rb'))
X,y1,groups1,X_date_final = np.array(Xf),np.array(yf),np.array(groupsf),OneHotEncoder().fit_transform(np.array(X_datef).reshape(-1,1))
Xfeatures = np.array([get_features1(x) for x in X])
all_y = []
all_y_pred = []
all_groups = []
for user in np.unique(groups1):
    index = np.where(groups1==user)[0]
    X1,y,groups = Xfeatures[index],y1[index],groups1[index]
#     print(y[0])
#     labels = []
#     temp = np.array([np.mean(a) for a in y])
#     temp_mean = np.mean(temp)
#     index2 = np.where(temp>temp_mean)[0]
#     index1 = np.where(temp<=temp_mean)[0]
#     temp[index2] = 1
#     temp[index1] = 0
#     labels = temp
#     labels = np.int64(np.array(labels))
    if len(y)<20:
        continue
    from sklearn.cluster import KMeans
    kmeans = KMeans(2, random_state=0)
    print(y.shape)
    labels = kmeans.fit(y).predict(y)
    if len(np.unique(labels))==1:
        continue
    if len(labels[labels==1])<3:
        continue
    if len(labels[labels==0])<3:
        continue
    
    paramGrid = {
#             'rf__kernel':['linear'],
            'rf__C': np.logspace(-3,3,10),
#             'rf__gamma': np.logspace(-6,3,2),
            'rf__class_weight':[{0:1,1:4/2},{0:1,1:3/2},{0:1,1:5/2},{0:1,1:1}],
            'pca__n_components':[3,4]
        }
    y = labels
    if len(labels[labels==1])<10:
        n = len(labels[labels==1])
    elif len(labels[labels==0])<10:
        n = len(labels[labels==0])
    else:
        n = 10
    clf = Pipeline([('pca',PCA()),('rf',linear_model.LogisticRegression())])
    grid_search = GridSearchCV(clf, paramGrid, n_jobs=-1,cv=StratifiedKFold(n_splits=n).split(X1, y),
                               scoring='accuracy',verbose=1, return_train_score=False)
    grid_search.fit(X1,y)
    from sklearn.metrics import classification_report
    clf = grid_search.best_estimator_
    y_pred = cross_val_predict(clf,X1,y,cv=StratifiedKFold(n_splits=n).split(X1, y))
    all_y.extend(list(y))
    all_y_pred.extend(list(y_pred))
    all_groups.extend(list(groups))
    print(confusion_matrix(y,y_pred),classification_report(y,y_pred))

(20, 5)
(67, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[10 17]
 [ 7 33]]               precision    recall  f1-score   support

           0       0.59      0.37      0.45        27
           1       0.66      0.82      0.73        40

   micro avg       0.64      0.64      0.64        67
   macro avg       0.62      0.60      0.59        67
weighted avg       0.63      0.64      0.62        67

(59, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[55  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.93      1.00      0.96        55
           1       0.00      0.00      0.00         4

   micro avg       0.93      0.93      0.93        59
   macro avg       0.47      0.50      0.48        59
weighted avg       0.87      0.93      0.90        59

(24, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  4]
 [ 0 20]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.83      1.00      0.91        20

   micro avg       0.83      0.83      0.83        24
   macro avg       0.42      0.50      0.45        24
weighted avg       0.69      0.83      0.76        24

(43, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.9s finished


[[ 0 15]
 [ 0 28]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.65      1.00      0.79        28

   micro avg       0.65      0.65      0.65        43
   macro avg       0.33      0.50      0.39        43
weighted avg       0.42      0.65      0.51        43

(21, 5)
(23, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 1  5]
 [ 1 16]]               precision    recall  f1-score   support

           0       0.50      0.17      0.25         6
           1       0.76      0.94      0.84        17

   micro avg       0.74      0.74      0.74        23
   macro avg       0.63      0.55      0.55        23
weighted avg       0.69      0.74      0.69        23

(82, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[57  1]
 [21  3]]               precision    recall  f1-score   support

           0       0.73      0.98      0.84        58
           1       0.75      0.12      0.21        24

   micro avg       0.73      0.73      0.73        82
   macro avg       0.74      0.55      0.53        82
weighted avg       0.74      0.73      0.66        82

(50, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished


[[46  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.92      1.00      0.96        46
           1       0.00      0.00      0.00         4

   micro avg       0.92      0.92      0.92        50
   macro avg       0.46      0.50      0.48        50
weighted avg       0.85      0.92      0.88        50

(24, 5)
(32, 5)
(37, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done  48 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.5s finished


[[31  0]
 [ 6  0]]               precision    recall  f1-score   support

           0       0.84      1.00      0.91        31
           1       0.00      0.00      0.00         6

   micro avg       0.84      0.84      0.84        37
   macro avg       0.42      0.50      0.46        37
weighted avg       0.70      0.84      0.76        37

(43, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[ 3 18]
 [ 1 21]]               precision    recall  f1-score   support

           0       0.75      0.14      0.24        21
           1       0.54      0.95      0.69        22

   micro avg       0.56      0.56      0.56        43
   macro avg       0.64      0.55      0.46        43
weighted avg       0.64      0.56      0.47        43

(26, 5)
(60, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 0 28]
 [ 0 32]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        28
           1       0.53      1.00      0.70        32

   micro avg       0.53      0.53      0.53        60
   macro avg       0.27      0.50      0.35        60
weighted avg       0.28      0.53      0.37        60

(44, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[16 11]
 [ 6 11]]               precision    recall  f1-score   support

           0       0.73      0.59      0.65        27
           1       0.50      0.65      0.56        17

   micro avg       0.61      0.61      0.61        44
   macro avg       0.61      0.62      0.61        44
weighted avg       0.64      0.61      0.62        44

(47, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[27  8]
 [ 0 12]]               precision    recall  f1-score   support

           0       1.00      0.77      0.87        35
           1       0.60      1.00      0.75        12

   micro avg       0.83      0.83      0.83        47
   macro avg       0.80      0.89      0.81        47
weighted avg       0.90      0.83      0.84        47

(30, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   8 tasks      | elapsed:    0.2s


[[24  2]
 [ 2  2]]               precision    recall  f1-score   support

           0       0.92      0.92      0.92        26
           1       0.50      0.50      0.50         4

   micro avg       0.87      0.87      0.87        30
   macro avg       0.71      0.71      0.71        30
weighted avg       0.87      0.87      0.87        30

(44, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[27  3]
 [10  4]]               precision    recall  f1-score   support

           0       0.73      0.90      0.81        30
           1       0.57      0.29      0.38        14

   micro avg       0.70      0.70      0.70        44
   macro avg       0.65      0.59      0.59        44
weighted avg       0.68      0.70      0.67        44

(88, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[84  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.95      1.00      0.98        84
           1       0.00      0.00      0.00         4

   micro avg       0.95      0.95      0.95        88
   macro avg       0.48      0.50      0.49        88
weighted avg       0.91      0.95      0.93        88

(34, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  4]
 [ 0 30]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.88      1.00      0.94        30

   micro avg       0.88      0.88      0.88        34
   macro avg       0.44      0.50      0.47        34
weighted avg       0.78      0.88      0.83        34

(57, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[24  8]
 [17  8]]               precision    recall  f1-score   support

           0       0.59      0.75      0.66        32
           1       0.50      0.32      0.39        25

   micro avg       0.56      0.56      0.56        57
   macro avg       0.54      0.54      0.52        57
weighted avg       0.55      0.56      0.54        57

(44, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 0 14]
 [ 0 30]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.68      1.00      0.81        30

   micro avg       0.68      0.68      0.68        44
   macro avg       0.34      0.50      0.41        44
weighted avg       0.46      0.68      0.55        44

(40, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[11  9]
 [ 4 16]]               precision    recall  f1-score   support

           0       0.73      0.55      0.63        20
           1       0.64      0.80      0.71        20

   micro avg       0.68      0.68      0.68        40
   macro avg       0.69      0.68      0.67        40
weighted avg       0.69      0.68      0.67        40

(45, 5)
Fitting 7 folds for each of 80 candidates, totalling 560 fits


[Parallel(n_jobs=-1)]: Done 560 out of 560 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  7]
 [ 0 38]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.84      1.00      0.92        38

   micro avg       0.84      0.84      0.84        45
   macro avg       0.42      0.50      0.46        45
weighted avg       0.71      0.84      0.77        45

(37, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[31  0]
 [ 6  0]]               precision    recall  f1-score   support

           0       0.84      1.00      0.91        31
           1       0.00      0.00      0.00         6

   micro avg       0.84      0.84      0.84        37
   macro avg       0.42      0.50      0.46        37
weighted avg       0.70      0.84      0.76        37

(43, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   7 tasks      | elapsed:    0.1s


[[ 8 14]
 [ 3 18]]               precision    recall  f1-score   support

           0       0.73      0.36      0.48        22
           1       0.56      0.86      0.68        21

   micro avg       0.60      0.60      0.60        43
   macro avg       0.64      0.61      0.58        43
weighted avg       0.65      0.60      0.58        43

(26, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  4]
 [ 0 22]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.85      1.00      0.92        22

   micro avg       0.85      0.85      0.85        26
   macro avg       0.42      0.50      0.46        26
weighted avg       0.72      0.85      0.78        26

(94, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[16 14]
 [14 50]]               precision    recall  f1-score   support

           0       0.53      0.53      0.53        30
           1       0.78      0.78      0.78        64

   micro avg       0.70      0.70      0.70        94
   macro avg       0.66      0.66      0.66        94
weighted avg       0.70      0.70      0.70        94

(44, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done  22 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 3 16]
 [ 1 24]]               precision    recall  f1-score   support

           0       0.75      0.16      0.26        19
           1       0.60      0.96      0.74        25

   micro avg       0.61      0.61      0.61        44
   macro avg       0.68      0.56      0.50        44
weighted avg       0.66      0.61      0.53        44

(47, 5)
(20, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  6]
 [ 0 14]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.70      1.00      0.82        14

   micro avg       0.70      0.70      0.70        20
   macro avg       0.35      0.50      0.41        20
weighted avg       0.49      0.70      0.58        20

(88, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 464 tasks      | elapsed:    0.9s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[33 15]
 [26 14]]               precision    recall  f1-score   support

           0       0.56      0.69      0.62        48
           1       0.48      0.35      0.41        40

   micro avg       0.53      0.53      0.53        88
   macro avg       0.52      0.52      0.51        88
weighted avg       0.52      0.53      0.52        88

(24, 5)
Fitting 3 folds for each of 80 candidates, totalling 240 fits


[Parallel(n_jobs=-1)]: Done 240 out of 240 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  3]
 [ 0 21]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         3
           1       0.88      1.00      0.93        21

   micro avg       0.88      0.88      0.88        24
   macro avg       0.44      0.50      0.47        24
weighted avg       0.77      0.88      0.82        24

(51, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  6]
 [ 0 45]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         6
           1       0.88      1.00      0.94        45

   micro avg       0.88      0.88      0.88        51
   macro avg       0.44      0.50      0.47        51
weighted avg       0.78      0.88      0.83        51

(55, 5)
(32, 5)
(38, 5)
Fitting 6 folds for each of 80 candidates, totalling 480 fits


[Parallel(n_jobs=-1)]: Done 480 out of 480 | elapsed:    0.6s finished


[[28  4]
 [ 2  4]]               precision    recall  f1-score   support

           0       0.93      0.88      0.90        32
           1       0.50      0.67      0.57         6

   micro avg       0.84      0.84      0.84        38
   macro avg       0.72      0.77      0.74        38
weighted avg       0.86      0.84      0.85        38

(110, 5)
(67, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  4]
 [ 0 63]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         4
           1       0.94      1.00      0.97        63

   micro avg       0.94      0.94      0.94        67
   macro avg       0.47      0.50      0.48        67
weighted avg       0.88      0.94      0.91        67

(47, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[21 10]
 [ 6 10]]               precision    recall  f1-score   support

           0       0.78      0.68      0.72        31
           1       0.50      0.62      0.56        16

   micro avg       0.66      0.66      0.66        47
   macro avg       0.64      0.65      0.64        47
weighted avg       0.68      0.66      0.67        47

(35, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 0 14]
 [ 0 21]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.60      1.00      0.75        21

   micro avg       0.60      0.60      0.60        35
   macro avg       0.30      0.50      0.37        35
weighted avg       0.36      0.60      0.45        35

(69, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[65  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.94      1.00      0.97        65
           1       0.00      0.00      0.00         4

   micro avg       0.94      0.94      0.94        69
   macro avg       0.47      0.50      0.49        69
weighted avg       0.89      0.94      0.91        69

(46, 5)
(72, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 0 33]
 [ 1 38]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        33
           1       0.54      0.97      0.69        39

   micro avg       0.53      0.53      0.53        72
   macro avg       0.27      0.49      0.35        72
weighted avg       0.29      0.53      0.37        72

(55, 5)
(22, 5)
(93, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done  13 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[89  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.96      1.00      0.98        89
           1       0.00      0.00      0.00         4

   micro avg       0.96      0.96      0.96        93
   macro avg       0.48      0.50      0.49        93
weighted avg       0.92      0.96      0.94        93

(24, 5)
(36, 5)
Fitting 7 folds for each of 80 candidates, totalling 560 fits


[Parallel(n_jobs=-1)]: Done 560 out of 560 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[26  3]
 [ 5  2]]               precision    recall  f1-score   support

           0       0.84      0.90      0.87        29
           1       0.40      0.29      0.33         7

   micro avg       0.78      0.78      0.78        36
   macro avg       0.62      0.59      0.60        36
weighted avg       0.75      0.78      0.76        36

(47, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[43  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.91      1.00      0.96        43
           1       0.00      0.00      0.00         4

   micro avg       0.91      0.91      0.91        47
   macro avg       0.46      0.50      0.48        47
weighted avg       0.84      0.91      0.87        47

(34, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[30  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.88      1.00      0.94        30
           1       0.00      0.00      0.00         4

   micro avg       0.88      0.88      0.88        34
   macro avg       0.44      0.50      0.47        34
weighted avg       0.78      0.88      0.83        34

(40, 5)
Fitting 8 folds for each of 80 candidates, totalling 640 fits


[Parallel(n_jobs=-1)]: Done 640 out of 640 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[ 0  8]
 [ 0 32]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.80      1.00      0.89        32

   micro avg       0.80      0.80      0.80        40
   macro avg       0.40      0.50      0.44        40
weighted avg       0.64      0.80      0.71        40

(53, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[40  0]
 [13  0]]               precision    recall  f1-score   support

           0       0.75      1.00      0.86        40
           1       0.00      0.00      0.00        13

   micro avg       0.75      0.75      0.75        53
   macro avg       0.38      0.50      0.43        53
weighted avg       0.57      0.75      0.65        53

(26, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[20  2]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.83      0.91      0.87        22
           1       0.00      0.00      0.00         4

   micro avg       0.77      0.77      0.77        26
   macro avg       0.42      0.45      0.43        26
weighted avg       0.71      0.77      0.74        26

(34, 5)
Fitting 8 folds for each of 80 candidates, totalling 640 fits


[Parallel(n_jobs=-1)]: Done 640 out of 640 | elapsed:    0.9s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[21  5]
 [ 5  3]]               precision    recall  f1-score   support

           0       0.81      0.81      0.81        26
           1       0.38      0.38      0.38         8

   micro avg       0.71      0.71      0.71        34
   macro avg       0.59      0.59      0.59        34
weighted avg       0.71      0.71      0.71        34

(42, 5)
Fitting 8 folds for each of 80 candidates, totalling 640 fits


[Parallel(n_jobs=-1)]: Done 640 out of 640 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.


[[ 3  5]
 [ 2 32]]               precision    recall  f1-score   support

           0       0.60      0.38      0.46         8
           1       0.86      0.94      0.90        34

   micro avg       0.83      0.83      0.83        42
   macro avg       0.73      0.66      0.68        42
weighted avg       0.81      0.83      0.82        42

(24, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.0s


[[20  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.83      1.00      0.91        20
           1       0.00      0.00      0.00         4

   micro avg       0.83      0.83      0.83        24
   macro avg       0.42      0.50      0.45        24
weighted avg       0.69      0.83      0.76        24

(32, 5)
Fitting 4 folds for each of 80 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.1s


[[28  0]
 [ 4  0]]               precision    recall  f1-score   support

           0       0.88      1.00      0.93        28
           1       0.00      0.00      0.00         4

   micro avg       0.88      0.88      0.88        32
   macro avg       0.44      0.50      0.47        32
weighted avg       0.77      0.88      0.82        32

(82, 5)
Fitting 10 folds for each of 80 candidates, totalling 800 fits
[[ 0 14]
 [ 0 68]]               precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.83      1.00      0.91        68

   micro avg       0.83      0.83      0.83        82
   macro avg       0.41      0.50      0.45        82
weighted avg       0.69      0.83      0.75        82



[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:    1.0s finished


In [6]:
print(confusion_matrix(all_y,all_y_pred),classification_report(all_y,all_y_pred))

[[836 338]
 [297 547]]               precision    recall  f1-score   support

           0       0.74      0.71      0.72      1174
           1       0.62      0.65      0.63       844

   micro avg       0.69      0.69      0.69      2018
   macro avg       0.68      0.68      0.68      2018
weighted avg       0.69      0.69      0.69      2018



In [14]:
print(confusion_matrix(all_y,all_y_pred),classification_report(all_y,all_y_pred))

[[458 278]
 [126 775]]               precision    recall  f1-score   support

           0       0.78      0.62      0.69       736
           1       0.74      0.86      0.79       901

   micro avg       0.75      0.75      0.75      1637
   macro avg       0.76      0.74      0.74      1637
weighted avg       0.76      0.75      0.75      1637



In [12]:
print(confusion_matrix(all_y,all_y_pred),classification_report(all_y,all_y_pred))
# y.shape

[[1146  445]
 [ 406  697]]               precision    recall  f1-score   support

           0       0.74      0.72      0.73      1591
           1       0.61      0.63      0.62      1103

   micro avg       0.68      0.68      0.68      2694
   macro avg       0.67      0.68      0.68      2694
weighted avg       0.69      0.68      0.68      2694



In [18]:
print(confusion_matrix(all_y,all_y_pred),classification_report(all_y,all_y_pred))

[[941 334]
 [199 785]]               precision    recall  f1-score   support

           0       0.83      0.74      0.78      1275
           1       0.70      0.80      0.75       984

   micro avg       0.76      0.76      0.76      2259
   macro avg       0.76      0.77      0.76      2259
weighted avg       0.77      0.76      0.77      2259



In [None]:
y1.shape

In [None]:
labels