# Predict new locations for UBS wealth management branch offices.

Union Bank of Switzerland (UBS) is looking to apply Machine Learning in predicting most likely new locations/zipcodes be opened with UBS wealth management branches in the USA

### keywords: data wrangling, PCA, under-sampling, cross-validation, cross-prediction, LogisticRegression, Naive Bayes, ensemble learning 

In [1]:
#Import libraries
import numpy as np
import pandas as pd
import random
import copy
from sklearn.model_selection import KFold, cross_val_score,cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
from collections import Counter

  _nan_object_mask = _nan_object_array != _nan_object_array


# Data wrangling

In [2]:
#Import original dataset
original_dataset=pd.read_csv('UBS_original_data.csv')
dataset=copy.deepcopy(original_dataset)
dataset.head()

Unnamed: 0,ZipCode,Population,HouseholdsPerZipCode,WhitePopulation,BlackPopulation,HispanicPopulation,AsianPopulation,HawaiianPopulation,IndianPopulation,OtherPopulation,...,DeliveryTotal,PopulationEstimate,LandArea,WaterArea,BoxCount,SFDU,MFDU,CityDeliveryIndicator,MedicareCBSAType,MarketRatingAreaID
0,501,0,0,0,0,0,0,0,0,0,...,1,0,0.0,0.0,0,0,0,N,Metro,8
1,501,0,0,0,0,0,0,0,0,0,...,1,0,0.0,0.0,0,0,0,N,Metro,8
2,544,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.0,0,0,0,N,Metro,8
3,544,0,0,0,0,0,0,0,0,0,...,0,0,0.0,0.0,0,0,0,N,Metro,8
4,601,18570,6525,17479,663,18486,7,10,113,558,...,5074,11342,64.348,0.309,831,2376,1206,Y,Micro,1


In [3]:
#Remove the rows where the population are 0
dataset = dataset[dataset.Population != 0]
dataset.head()

Unnamed: 0,ZipCode,Population,HouseholdsPerZipCode,WhitePopulation,BlackPopulation,HispanicPopulation,AsianPopulation,HawaiianPopulation,IndianPopulation,OtherPopulation,...,DeliveryTotal,PopulationEstimate,LandArea,WaterArea,BoxCount,SFDU,MFDU,CityDeliveryIndicator,MedicareCBSAType,MarketRatingAreaID
4,601,18570,6525,17479,663,18486,7,10,113,558,...,5074,11342,64.348,0.309,831,2376,1206,Y,Micro,1
5,601,18570,6525,17479,663,18486,7,10,113,558,...,5074,11342,64.348,0.309,831,2376,1206,Y,Micro,1
6,601,18570,6525,17479,663,18486,7,10,113,558,...,5074,11342,64.348,0.309,831,2376,1206,Y,Micro,1
7,601,18570,6525,17479,663,18486,7,10,113,558,...,5074,11342,64.348,0.309,831,2376,1206,Y,Micro,1
8,602,41520,15002,36828,2860,41265,42,32,291,2634,...,11165,24000,30.613,1.717,1502,5420,821,Y,Metro,1


In [4]:
#Reset the row index for later use
dataset.index=range(len(dataset))
#Replace the empty cell with the NaN
for j in range(dataset.columns.size):
    dataset.iloc[:,j].replace(r'\s+', np.nan, regex=True, inplace=True)

In [5]:
#Checking for missing values(NaN) in each feature
feature_nan_value=dataset.isnull().sum()
print(feature_nan_value)

ZipCode                            0
Population                         0
HouseholdsPerZipCode               0
WhitePopulation                    0
BlackPopulation                    0
HispanicPopulation                 0
AsianPopulation                    0
HawaiianPopulation                 0
IndianPopulation                   0
OtherPopulation                    0
MalePopulation                     0
FemalePopulation                   0
PersonsPerHousehold                0
AverageHouseValue                  0
IncomePerHousehold                 0
MedianAge                          0
MedianAgeMale                      0
MedianAgeFemale                    0
CityType                           0
NumberOfBusinesses                 0
NumberOfEmployees                  0
BusinessFirstQuarterPayroll        0
BusinessAnnualPayroll              0
BusinessEmploymentFlag         62853
GrowthRank                         0
GrowingCountiesA                   0
GrowingCountiesB                   0
G

In [6]:
#Drop those features with over 50% of the values being NaN 
dataset.drop(['BusinessEmploymentFlag'],axis=1,inplace=True)

In [7]:
#Fill the nan with its next value for the feature "MedicareCBSAType" 
dataset.MedicareCBSAType.fillna(method='bfill',inplace=True)

In [8]:
#Encode categorical features with dummy variables
dataset=pd.get_dummies(dataset)  

In [9]:
#Group by zip code, and select max value of each group
dataset=dataset.groupby('ZipCode',as_index=False).max()

In [10]:
#Input label/target data
#the zipcodes already opened with branches are labeled with "1", otherwise "0"
dataset_label=pd.read_csv('UBS_data_label.csv')
dataset_label.head()

Unnamed: 0,ZipCode,UBS_Open_Branch
0,1144,1
1,1608,1
2,1945,1
3,1960,1
4,2109,1


In [11]:
#Final clean data with the label
#Final_dataset=pd.merge(dataset,dataset_label.drop(['UBS_Branches'],axis=1), on='ZipCode',how='left')
Final_dataset=pd.merge(dataset,dataset_label, on='ZipCode',how='left')
Final_dataset.UBS_Open_Branch.fillna(0,inplace=True)
Final_dataset.to_csv('UBS_clean_data.csv',index=False)

In [12]:
# Import clean dataset
original_dataset = pd.read_csv('UBS_clean_data.csv')
original_dataset.head()

Unnamed: 0,ZipCode,Population,HouseholdsPerZipCode,WhitePopulation,BlackPopulation,HispanicPopulation,AsianPopulation,HawaiianPopulation,IndianPopulation,OtherPopulation,...,CityType_C,CityType_N,CityType_P,CityType_U,CityType_Z,CityDeliveryIndicator_N,CityDeliveryIndicator_Y,MedicareCBSAType_Metro,MedicareCBSAType_Micro,UBS_Open_Branch
0,601,18570,6525,17479,663,18486,7,10,113,558,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
1,602,41520,15002,36828,2860,41265,42,32,291,2634,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
2,603,54689,21161,46501,5042,53877,135,35,313,4177,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
3,606,6615,2404,5979,371,6575,3,9,35,323,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0
4,610,29016,10836,24510,2654,28789,57,31,200,2494,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0


## Principal Component Analysis (PCA)

In [13]:
# Import StandardScaler
from sklearn.preprocessing import StandardScaler 

In [14]:
# Scale the features and set the values to a new variable
scaler = StandardScaler() 
original_dataset_features = scaler.fit_transform(original_dataset.drop(['ZipCode','UBS_Open_Branch'],axis=1))

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [15]:
# Import PCA class
from sklearn.decomposition import PCA

In [16]:
#Find the suitable number of PCA components 
pca = PCA(random_state=7)
pca.fit(original_dataset_features)
#Get explained variance ratios from PCA using all features
exp_variance = pca.fit(original_dataset_features).explained_variance_ratio_

In [17]:
# Calculate the cumulative explained variance
cum_exp_variance = np.cumsum(exp_variance)

In [18]:
#Determine the number of components by 90% 
#of the cumulative explained variance  
n_components = len(cum_exp_variance[cum_exp_variance<0.9])
print(n_components)

19


In [19]:
#Perform PCA with the chosen number of components and project data onto components
pca = PCA(n_components)#, random_state=10)
pca.fit(original_dataset_features)
PCA_dataset_features= pca.transform(original_dataset_features)
PCA_dataset_features=pd.DataFrame(PCA_dataset_features)

In [20]:
#Add label and zip code index
PCA_dataset=PCA_dataset_features
PCA_dataset['UBS_Open_Branch']=original_dataset['UBS_Open_Branch']
PCA_dataset['ZipCode']=original_dataset['ZipCode']
#Add prediction coloumn
PCA_dataset['y_pred_prob']=0.0

In [21]:
#Number of records in class 0 and class 1
print('The size of class 1: {}'.format(len(PCA_dataset[PCA_dataset.UBS_Open_Branch == 0])))
print('The size of class 0: {}'.format(len(PCA_dataset[PCA_dataset.UBS_Open_Branch != 0]))) 

The size of class 1: 32695
The size of class 0: 264


## Binary classification problem on a highly imbalanced dataset

In [22]:
# Minority dataset
minority_dataset=PCA_dataset[PCA_dataset.UBS_Open_Branch != 0]
# Majority dataset
majority_dataset = PCA_dataset[PCA_dataset.UBS_Open_Branch == 0]

## Undersampling & cross-prediction 

For this specific problem, the information of zipcodes in majority class are requied for model training, on the other hand, any zipcode in majority class is possbile to be opened with a new branch.
Therefore, regular model training and prediction process lead to "data leakage", which refers to a mistake made by the creator of a machine learning model, in which they accidentally share information between the test and training data-sets, memorizes the training set data, and is easily able to correctly output the labels/values for those test data-set examples.
In order to avoid "data leakage", we employ an undersampling & cross-prediction scheme.

Divide the majority dataset into m=122 subsets, each subset size is n=268, we randomly select one subset of majority dataset, and combine it with the minority set as a train set to train the model with cross-validation, the cross-validation score is recored as the weight which is used in the final stage. 
Keep the remaining part of the majority dataset as the test set, predict the test set with the trained model.
<img src="cross_prediction2-1.png">
Repeat above process until all the m=122 subsets of majority dataset have been employed for model training. 
Finally calculate the weighted average of the predictions for each zipcode, 
those zipcodes with the top predictions are the ones to be opend wth the new branches most likely.

In [23]:

Monte_Carlo=2

monte_All_ZipCode_prediction=PCA_dataset.loc[:,['ZipCode','y_pred_prob']]
monte_All_ZipCode_prediction.sort_values('ZipCode',inplace=True,ascending=False)

for r in range(Monte_Carlo):
    print(r)
    
    #Shuffle the indeces of Majority dataset
    majority_index=list(majority_dataset.index)
    #random.seed(2337)
    random.shuffle(majority_index)

    #Parameters initialization

    simulation_rounds=122

    #Size of each subset in majority set
    subsetsize=268

    #Indeces of elements in each subset of majority set
    majority_subset_index=[]

    #Cross-validation score initialization
    Model_performance=pd.DataFrame(np.zeros((122,1)),columns=['Accuracy'])

    #Obtain indeces of elements in each subset of majority set
    for i in range(122):
        majority_subset_index.append(majority_index[i*subsetsize:(i+1)*subsetsize])

    #Initialize the final weighted prediction of each zipcode
    All_ZipCode_prediction=PCA_dataset.loc[:,['ZipCode','y_pred_prob']]

    #Initialize the prediction of each zipcode in a single round cross-prediction
    temp_single_round_prediction=PCA_dataset.loc[:,['ZipCode','y_pred_prob']] 

    #Train model with K-Fold cross validation
    kf = KFold(n_splits=10)#, random_state=10)

    #Classification model ensembling

    #Logistic regression
    logreg= LogisticRegression(solver='liblinear')#random_state=10
    #Naive bayes
    NB = GaussianNB()
    
#Cross prediction

    for j in range(simulation_rounds):

        #A subset of majoriry data set is combined with the minority set to form the train set
        majority_datasubset=majority_dataset.loc[majority_subset_index[j],:]
        dataset_train=majority_datasubset.append(minority_dataset,ignore_index=False)    
        X_train= dataset_train.iloc[:,1:-3].values ##features
        y_train = dataset_train.UBS_Open_Branch.values##labels


        #Train models using KFold cv
        #logit_score =cross_val_score(logreg, X_train, y_train, cv=kf,scoring='accuracy')
        #NB_score = cross_val_score(NB, X_train, y_train, cv=kf,scoring='accuracy')
        logit_score = cross_validate(logreg, X_train, y_train,  scoring='accuracy', cv=kf)
        NB_score=cross_validate(NB, X_train, y_train,  scoring='accuracy', cv=kf)
        ave_cv_logit_train_score=np.mean(logit_score['train_score'])
        ave_cv_logit_test_score=np.mean(logit_score['test_score'])
        ave_cv_NB_train_score=np.mean(NB_score['train_score'])
        ave_cv_NB_test_score=np.mean(NB_score['test_score'])
        
        #Select the average test score of the model with less likely overfitting
        #as cross validation score
        if (ave_cv_logit_train_score-ave_cv_logit_test_score)/ave_cv_logit_test_score>(ave_cv_NB_train_score-ave_cv_NB_test_score)/ave_cv_NB_test_score:
            cross_validation_score = np.mean(NB_score['test_score'])
        else:
            cross_validation_score = np.mean(logit_score['test_score'])
            
        #Predict the test set
        #The remaining part of the majority dataset which is not used in training process
        #is the test set   
        X_test=majority_dataset.drop(majority_subset_index[j],axis=0).iloc[:,1:-3].values
        X_test_ZipCode=majority_dataset.drop(majority_subset_index[j],axis=0).loc[:,['ZipCode','y_pred_prob']]

        #Select the model of less likely overfitting 
        #to predict the test set
        if (ave_cv_logit_train_score-ave_cv_logit_test_score)/ave_cv_logit_test_score>(ave_cv_NB_train_score-ave_cv_NB_test_score)/ave_cv_NB_test_score:
            NB.fit(X_train, y_train)           
            y_probas= NB.predict_proba(X_test)
        else:
            logreg.fit(X_train, y_train) 
            y_probas= logreg.predict_proba(X_test)

        X_test_ZipCode['y_pred_prob']=y_probas[:,1]

        #Merge the predictions on Zipcode
        single_round_predictions=pd.merge(temp_single_round_prediction,X_test_ZipCode, on='ZipCode',how='left')

        #Replace nan with 0
        single_round_predictions.iloc[:,-1].fillna(0,inplace=True)


        #Weighted averaging the prediction probability with the 
        #Cross validation score from each model
        All_ZipCode_prediction['y_pred_prob']+=single_round_predictions.iloc[:,-1]*cross_validation_score 
        Model_performance.iloc[j,:]=cross_validation_score
        ##print('simulation round {}: cross validation score is {}'.format(j,cross_validation_score)) 
        ##print('simu_round {}: \n c-v train_score is {} \n c-v test_score is {}'.format(j,np.around(scores['train_score'],decimals=3),np.around(scores['test_score'],decimals=3))) 
        print('Cross-validation for {}-th subset using logit: \n logit c-v train_score mean is {} \n logit c-v test_score mean is {}'.format(j,np.mean(logit_score['train_score']),np.mean(logit_score['test_score'])))
        print('Cross-validation for {}-th subset using NB: \n NB c-v train_score mean is {} \n NB c-v test_score mean is {}'.format(j,np.mean(NB_score['train_score']),np.mean(NB_score['test_score'])))
    All_ZipCode_prediction.sort_values('ZipCode',inplace=True,ascending=False)
    All_ZipCode_prediction['y_pred_prob']=All_ZipCode_prediction['y_pred_prob']/Model_performance.iloc[:,0].sum()
    
    monte_All_ZipCode_prediction['y_pred_prob']+= All_ZipCode_prediction['y_pred_prob']
    # print(All_ZipCode_prediction.y_pred_prob.head(10))
    # print(monte_All_ZipCode_prediction.y_pred_prob.head(10))

0
Cross-validation for 0-th subset using logit: 
 logit c-v train_score mean is 0.8796979411430719 
 logit c-v test_score mean is 0.7929769392033543
Cross-validation for 0-th subset using NB: 
 NB c-v train_score mean is 0.7391348782767446 
 NB c-v test_score mean is 0.706219426974144
Cross-validation for 1-th subset using logit: 
 logit c-v train_score mean is 0.8974506686699103 
 logit c-v test_score mean is 0.8494060097833683
Cross-validation for 1-th subset using NB: 
 NB c-v train_score mean is 0.7232658694455848 
 NB c-v test_score mean is 0.6836128581411601




Cross-validation for 2-th subset using logit: 
 logit c-v train_score mean is 0.8840868790454313 
 logit c-v test_score mean is 0.8119147449336129
Cross-validation for 2-th subset using NB: 
 NB c-v train_score mean is 0.7393423362828767 
 NB c-v test_score mean is 0.7081062194269742




Cross-validation for 3-th subset using logit: 
 logit c-v train_score mean is 0.895364733012465 
 logit c-v test_score mean is 0.845737246680643
Cross-validation for 3-th subset using NB: 
 NB c-v train_score mean is 0.730993789362427 
 NB c-v test_score mean is 0.6647449336128581
Cross-validation for 4-th subset using logit: 
 logit c-v train_score mean is 0.8757304705584332 
 logit c-v test_score mean is 0.7931167016072675
Cross-validation for 4-th subset using NB: 
 NB c-v train_score mean is 0.6961076510512662 
 NB c-v test_score mean is 0.6476589797344514




Cross-validation for 5-th subset using logit: 
 logit c-v train_score mean is 0.8924384832417609 
 logit c-v test_score mean is 0.8307477288609364
Cross-validation for 5-th subset using NB: 
 NB c-v train_score mean is 0.7401778461054673 
 NB c-v test_score mean is 0.6909503843466107
Cross-validation for 6-th subset using logit: 
 logit c-v train_score mean is 0.8895109232099649 
 logit c-v test_score mean is 0.8304332634521314
Cross-validation for 6-th subset using NB: 
 NB c-v train_score mean is 0.7468597409177069 
 NB c-v test_score mean is 0.7156184486373165
Cross-validation for 7-th subset using logit: 
 logit c-v train_score mean is 0.8684165931464609 
 logit c-v test_score mean is 0.8003843466107616
Cross-validation for 7-th subset using NB: 
 NB c-v train_score mean is 0.6910876040565682 
 NB c-v test_score mean is 0.613591893780573




Cross-validation for 8-th subset using logit: 
 logit c-v train_score mean is 0.8874258610599138 
 logit c-v test_score mean is 0.8250524109014675
Cross-validation for 8-th subset using NB: 
 NB c-v train_score mean is 0.727224168202584 
 NB c-v test_score mean is 0.6815513626834382
Cross-validation for 9-th subset using logit: 
 logit c-v train_score mean is 0.8993295830749208 
 logit c-v test_score mean is 0.8418588399720477
Cross-validation for 9-th subset using NB: 
 NB c-v train_score mean is 0.7299447069819447 
 NB c-v test_score mean is 0.691090146750524




Cross-validation for 10-th subset using logit: 
 logit c-v train_score mean is 0.8961963120517817 
 logit c-v test_score mean is 0.8438155136268344
Cross-validation for 10-th subset using NB: 
 NB c-v train_score mean is 0.7370454485897223 
 NB c-v test_score mean is 0.7043675751222921
Cross-validation for 11-th subset using logit: 
 logit c-v train_score mean is 0.9005808824171696 
 logit c-v test_score mean is 0.8473794549266248
Cross-validation for 11-th subset using NB: 
 NB c-v train_score mean is 0.744771621491776 
 NB c-v test_score mean is 0.7021313766596786
Cross-validation for 12-th subset using logit: 
 logit c-v train_score mean is 0.9089377276578647 
 logit c-v test_score mean is 0.8494409503843465
Cross-validation for 12-th subset using NB: 
 NB c-v train_score mean is 0.7748547793957076 
 NB c-v test_score mean is 0.7286163522012579




Cross-validation for 13-th subset using logit: 
 logit c-v train_score mean is 0.8895174745154218 
 logit c-v test_score mean is 0.8138364779874214
Cross-validation for 13-th subset using NB: 
 NB c-v train_score mean is 0.7391296372323792 
 NB c-v test_score mean is 0.7023060796645703
Cross-validation for 14-th subset using logit: 
 logit c-v train_score mean is 0.8736379835955311 
 logit c-v test_score mean is 0.8078965758211041
Cross-validation for 14-th subset using NB: 
 NB c-v train_score mean is 0.6971431940671377 
 NB c-v test_score mean is 0.6323200559049615
Cross-validation for 15-th subset using logit: 
 logit c-v train_score mean is 0.898704588534342 
 logit c-v test_score mean is 0.8438504542278128
Cross-validation for 15-th subset using NB: 
 NB c-v train_score mean is 0.7368340598003162 
 NB c-v test_score mean is 0.7041229909154436




Cross-validation for 16-th subset using logit: 
 logit c-v train_score mean is 0.9037163372087944 
 logit c-v test_score mean is 0.8588399720475192
Cross-validation for 16-th subset using NB: 
 NB c-v train_score mean is 0.7403844306042051 
 NB c-v test_score mean is 0.7044025157232705
Cross-validation for 17-th subset using logit: 
 logit c-v train_score mean is 0.8874267345673081 
 logit c-v test_score mean is 0.8288609364081063
Cross-validation for 17-th subset using NB: 
 NB c-v train_score mean is 0.7261873149256208 
 NB c-v test_score mean is 0.6911250873515025
Cross-validation for 18-th subset using logit: 
 logit c-v train_score mean is 0.8916034101728669 
 logit c-v test_score mean is 0.8250873515024459
Cross-validation for 18-th subset using NB: 
 NB c-v train_score mean is 0.7257645373468087 
 NB c-v test_score mean is 0.6834381551362683




Cross-validation for 19-th subset using logit: 
 logit c-v train_score mean is 0.8893069592334101 
 logit c-v test_score mean is 0.8269042627533194
Cross-validation for 19-th subset using NB: 
 NB c-v train_score mean is 0.7403848673579022 
 NB c-v test_score mean is 0.6968204053109713
Cross-validation for 20-th subset using logit: 
 logit c-v train_score mean is 0.9076838077934332 
 logit c-v test_score mean is 0.8551362683438155
Cross-validation for 20-th subset using NB: 
 NB c-v train_score mean is 0.7727596719106227 
 NB c-v test_score mean is 0.7154786862334033
Cross-validation for 21-th subset using logit: 
 logit c-v train_score mean is 0.9028830111546894 
 logit c-v test_score mean is 0.8287561146051712
Cross-validation for 21-th subset using NB: 
 NB c-v train_score mean is 0.7351647871699234 
 NB c-v test_score mean is 0.6872816212438854




Cross-validation for 22-th subset using logit: 
 logit c-v train_score mean is 0.8857583354443095 
 logit c-v test_score mean is 0.8137665967854648
Cross-validation for 22-th subset using NB: 
 NB c-v train_score mean is 0.737880958412313 
 NB c-v test_score mean is 0.6815164220824598
Cross-validation for 23-th subset using logit: 
 logit c-v train_score mean is 0.899330893336012 
 logit c-v test_score mean is 0.8455276030747729
Cross-validation for 23-th subset using NB: 
 NB c-v train_score mean is 0.7391348782767446 
 NB c-v test_score mean is 0.6910901467505242
Cross-validation for 24-th subset using logit: 
 logit c-v train_score mean is 0.8801159144312157 
 logit c-v test_score mean is 0.819392033542977
Cross-validation for 24-th subset using NB: 
 NB c-v train_score mean is 0.7552122186214307 
 NB c-v test_score mean is 0.7081761006289309




Cross-validation for 25-th subset using logit: 
 logit c-v train_score mean is 0.88930390195753 
 logit c-v test_score mean is 0.8285814116002795
Cross-validation for 25-th subset using NB: 
 NB c-v train_score mean is 0.7222159135577082 
 NB c-v test_score mean is 0.689203354297694
Cross-validation for 26-th subset using logit: 
 logit c-v train_score mean is 0.9045527205387793 
 logit c-v test_score mean is 0.8475890985324949
Cross-validation for 26-th subset using NB: 
 NB c-v train_score mean is 0.7581406521606205 
 NB c-v test_score mean is 0.6947589098532495
Cross-validation for 27-th subset using logit: 
 logit c-v train_score mean is 0.8982853049851067 
 logit c-v test_score mean is 0.8456673654786864
Cross-validation for 27-th subset using NB: 
 NB c-v train_score mean is 0.7731785187061608 
 NB c-v test_score mean is 0.7267994409503843




Cross-validation for 28-th subset using logit: 
 logit c-v train_score mean is 0.8968239271145431 
 logit c-v test_score mean is 0.8155835080363382
Cross-validation for 28-th subset using NB: 
 NB c-v train_score mean is 0.7347494344039622 
 NB c-v test_score mean is 0.6816561844863732
Cross-validation for 29-th subset using logit: 
 logit c-v train_score mean is 0.8851320306426395 
 logit c-v test_score mean is 0.8231306778476588
Cross-validation for 29-th subset using NB: 
 NB c-v train_score mean is 0.7048815960727107 
 NB c-v test_score mean is 0.6206848357791754
Cross-validation for 30-th subset using logit: 
 logit c-v train_score mean is 0.8782335059966282 
 logit c-v test_score mean is 0.8080363382250175
Cross-validation for 30-th subset using NB: 
 NB c-v train_score mean is 0.7535455665132205 
 NB c-v test_score mean is 0.7174353598881901




Cross-validation for 31-th subset using logit: 
 logit c-v train_score mean is 0.8916034101728669 
 logit c-v test_score mean is 0.8138364779874214
Cross-validation for 31-th subset using NB: 
 NB c-v train_score mean is 0.7232601916475223 
 NB c-v test_score mean is 0.6798043326345213
Cross-validation for 32-th subset using logit: 
 logit c-v train_score mean is 0.9081026545889712 
 logit c-v test_score mean is 0.8569182389937108
Cross-validation for 32-th subset using NB: 
 NB c-v train_score mean is 0.744564600239341 
 NB c-v test_score mean is 0.7062893081761007
Cross-validation for 33-th subset using logit: 
 logit c-v train_score mean is 0.8909757951101056 
 logit c-v test_score mean is 0.826764500349406
Cross-validation for 33-th subset using NB: 
 NB c-v train_score mean is 0.7211707619605001 
 NB c-v test_score mean is 0.6834381551362683




Cross-validation for 34-th subset using logit: 
 logit c-v train_score mean is 0.9068465509560539 
 logit c-v test_score mean is 0.8437106918238992
Cross-validation for 34-th subset using NB: 
 NB c-v train_score mean is 0.738088416418445 
 NB c-v test_score mean is 0.7060796645702306
Cross-validation for 35-th subset using logit: 
 logit c-v train_score mean is 0.8803273032206217 
 logit c-v test_score mean is 0.8082809224318659
Cross-validation for 35-th subset using NB: 
 NB c-v train_score mean is 0.7220040880146051 
 NB c-v test_score mean is 0.6627533193570929
Cross-validation for 36-th subset using logit: 
 logit c-v train_score mean is 0.8934810143167864 
 logit c-v test_score mean is 0.8306079664570231
Cross-validation for 36-th subset using NB: 
 NB c-v train_score mean is 0.7443562687258148 
 NB c-v test_score mean is 0.6985674353598882




Cross-validation for 37-th subset using logit: 
 logit c-v train_score mean is 0.8972414636489898 
 logit c-v test_score mean is 0.8248777078965759
Cross-validation for 37-th subset using NB: 
 NB c-v train_score mean is 0.732867462723072 
 NB c-v test_score mean is 0.6779874213836478
Cross-validation for 38-th subset using logit: 
 logit c-v train_score mean is 0.8880521658615841 
 logit c-v test_score mean is 0.8098183088749128
Cross-validation for 38-th subset using NB: 
 NB c-v train_score mean is 0.7341244398633835 
 NB c-v test_score mean is 0.6892382948986724
Cross-validation for 39-th subset using logit: 
 logit c-v train_score mean is 0.9001681501733912 
 logit c-v test_score mean is 0.8401118099231308
Cross-validation for 39-th subset using NB: 
 NB c-v train_score mean is 0.7301508547269853 
 NB c-v test_score mean is 0.6926974143955277




Cross-validation for 40-th subset using logit: 
 logit c-v train_score mean is 0.9135310662904761 
 logit c-v test_score mean is 0.8418588399720475
Cross-validation for 40-th subset using NB: 
 NB c-v train_score mean is 0.7334937675247422 
 NB c-v test_score mean is 0.7042627533193571
Cross-validation for 41-th subset using logit: 
 logit c-v train_score mean is 0.9072675815200777 
 logit c-v test_score mean is 0.8514325646401119
Cross-validation for 41-th subset using NB: 
 NB c-v train_score mean is 0.7462321258549454 
 NB c-v test_score mean is 0.6986722571628231
Cross-validation for 42-th subset using logit: 
 logit c-v train_score mean is 0.8845026685650893 
 logit c-v test_score mean is 0.806219426974144
Cross-validation for 42-th subset using NB: 
 NB c-v train_score mean is 0.7234707069295341 
 NB c-v test_score mean is 0.6796296296296297




Cross-validation for 43-th subset using logit: 
 logit c-v train_score mean is 0.8982879255072893 
 logit c-v test_score mean is 0.847519217330538
Cross-validation for 43-th subset using NB: 
 NB c-v train_score mean is 0.7506236842794874 
 NB c-v test_score mean is 0.7267994409503842
Cross-validation for 44-th subset using logit: 
 logit c-v train_score mean is 0.8878447078554519 
 logit c-v test_score mean is 0.8363382250174702
Cross-validation for 44-th subset using NB: 
 NB c-v train_score mean is 0.7299473275041273 
 NB c-v test_score mean is 0.6948637316561844
Cross-validation for 45-th subset using logit: 
 logit c-v train_score mean is 0.8692577807671142 
 logit c-v test_score mean is 0.7856394129979035
Cross-validation for 45-th subset using NB: 
 NB c-v train_score mean is 0.7021566897563788 
 NB c-v test_score mean is 0.6306429070580014




Cross-validation for 46-th subset using logit: 
 logit c-v train_score mean is 0.8907670268428823 
 logit c-v test_score mean is 0.8211390635918938
Cross-validation for 46-th subset using NB: 
 NB c-v train_score mean is 0.7395497942890087 
 NB c-v test_score mean is 0.7002795248078267
Cross-validation for 47-th subset using logit: 
 logit c-v train_score mean is 0.8855443261327206 
 logit c-v test_score mean is 0.8285115303983229
Cross-validation for 47-th subset using NB: 
 NB c-v train_score mean is 0.7378800849049189 
 NB c-v test_score mean is 0.7004192872117401
Cross-validation for 48-th subset using logit: 
 logit c-v train_score mean is 0.884294337051563 
 logit c-v test_score mean is 0.8270440251572329
Cross-validation for 48-th subset using NB: 
 NB c-v train_score mean is 0.7222159135577082 
 NB c-v test_score mean is 0.6778127183787561




Cross-validation for 49-th subset using logit: 
 logit c-v train_score mean is 0.9030904691608216 
 logit c-v test_score mean is 0.8343466107617052
Cross-validation for 49-th subset using NB: 
 NB c-v train_score mean is 0.7539631030476672 
 NB c-v test_score mean is 0.7249825296995108
Cross-validation for 50-th subset using logit: 
 logit c-v train_score mean is 0.8872197133148731 
 logit c-v test_score mean is 0.8232704402515724
Cross-validation for 50-th subset using NB: 
 NB c-v train_score mean is 0.7414335129846875 
 NB c-v test_score mean is 0.7024458420684836




Cross-validation for 51-th subset using logit: 
 logit c-v train_score mean is 0.8813672137734645 
 logit c-v test_score mean is 0.8211390635918938
Cross-validation for 51-th subset using NB: 
 NB c-v train_score mean is 0.7403901084022676 
 NB c-v test_score mean is 0.6986722571628232
Cross-validation for 52-th subset using logit: 
 logit c-v train_score mean is 0.8995392248495383 
 logit c-v test_score mean is 0.8399021663172606
Cross-validation for 52-th subset using NB: 
 NB c-v train_score mean is 0.7643984591329566 
 NB c-v test_score mean is 0.702271139063592
Cross-validation for 53-th subset using logit: 
 logit c-v train_score mean is 0.8943195814152567 
 logit c-v test_score mean is 0.8363032844164919
Cross-validation for 53-th subset using NB: 
 NB c-v train_score mean is 0.7324464321590483 
 NB c-v test_score mean is 0.702375960866527




Cross-validation for 54-th subset using logit: 
 logit c-v train_score mean is 0.908309239087709 
 logit c-v test_score mean is 0.85104821802935
Cross-validation for 54-th subset using NB: 
 NB c-v train_score mean is 0.750621937264699 
 NB c-v test_score mean is 0.7154088050314465
Cross-validation for 55-th subset using logit: 
 logit c-v train_score mean is 0.8859653566967445 
 logit c-v test_score mean is 0.823235499650594
Cross-validation for 55-th subset using NB: 
 NB c-v train_score mean is 0.7205470776810126 
 NB c-v test_score mean is 0.6664570230607967
Cross-validation for 56-th subset using logit: 
 logit c-v train_score mean is 0.9047597417912143 
 logit c-v test_score mean is 0.8531446540880504
Cross-validation for 56-th subset using NB: 
 NB c-v train_score mean is 0.7687869602816189 
 NB c-v test_score mean is 0.7304332634521313




Cross-validation for 57-th subset using logit: 
 logit c-v train_score mean is 0.8801185349533984 
 logit c-v test_score mean is 0.7741090146750524
Cross-validation for 57-th subset using NB: 
 NB c-v train_score mean is 0.7211729457289856 
 NB c-v test_score mean is 0.6854996505939902
Cross-validation for 58-th subset using logit: 
 logit c-v train_score mean is 0.900167276665997 
 logit c-v test_score mean is 0.83256464011181
Cross-validation for 58-th subset using NB: 
 NB c-v train_score mean is 0.7426861225880278 
 NB c-v test_score mean is 0.7082459818308876




Cross-validation for 59-th subset using logit: 
 logit c-v train_score mean is 0.9007944549750613 
 logit c-v test_score mean is 0.8229559748427672
Cross-validation for 59-th subset using NB: 
 NB c-v train_score mean is 0.7320332631615727 
 NB c-v test_score mean is 0.7004192872117401
Cross-validation for 60-th subset using logit: 
 logit c-v train_score mean is 0.8972405901415955 
 logit c-v test_score mean is 0.8455625436757511
Cross-validation for 60-th subset using NB: 
 NB c-v train_score mean is 0.75604641818293 
 NB c-v test_score mean is 0.7117749825296995




Cross-validation for 61-th subset using logit: 
 logit c-v train_score mean is 0.8824114918632787 
 logit c-v test_score mean is 0.8117749825296997
Cross-validation for 61-th subset using NB: 
 NB c-v train_score mean is 0.7094793022422935 
 NB c-v test_score mean is 0.6665269042627534
Cross-validation for 62-th subset using logit: 
 logit c-v train_score mean is 0.8913976991815236 
 logit c-v test_score mean is 0.8270090845562544
Cross-validation for 62-th subset using NB: 
 NB c-v train_score mean is 0.7412256182248583 
 NB c-v test_score mean is 0.6948287910552061




Cross-validation for 63-th subset using logit: 
 logit c-v train_score mean is 0.8939024816345069 
 logit c-v test_score mean is 0.840146750524109
Cross-validation for 63-th subset using NB: 
 NB c-v train_score mean is 0.7706741730068745 
 NB c-v test_score mean is 0.7381201956673655
Cross-validation for 64-th subset using logit: 
 logit c-v train_score mean is 0.8987028415195535 
 logit c-v test_score mean is 0.840041928721174
Cross-validation for 64-th subset using NB: 
 NB c-v train_score mean is 0.7336999152697828 
 NB c-v test_score mean is 0.7022012578616352
Cross-validation for 65-th subset using logit: 
 logit c-v train_score mean is 0.8901394117801207 
 logit c-v test_score mean is 0.8380503144654089
Cross-validation for 65-th subset using NB: 
 NB c-v train_score mean is 0.7531275932250766 
 NB c-v test_score mean is 0.6946191474493362




Cross-validation for 66-th subset using logit: 
 logit c-v train_score mean is 0.900581755924564 
 logit c-v test_score mean is 0.8380852550663871
Cross-validation for 66-th subset using NB: 
 NB c-v train_score mean is 0.7570950638097151 
 NB c-v test_score mean is 0.7002795248078267
Cross-validation for 67-th subset using logit: 
 logit c-v train_score mean is 0.9005839396930495 
 logit c-v test_score mean is 0.8475890985324949
Cross-validation for 67-th subset using NB: 
 NB c-v train_score mean is 0.7084341506450852 
 NB c-v test_score mean is 0.6685534591194968




Cross-validation for 68-th subset using logit: 
 logit c-v train_score mean is 0.8746826984390422 
 logit c-v test_score mean is 0.8099231306778476
Cross-validation for 68-th subset using NB: 
 NB c-v train_score mean is 0.7195006158227131 
 NB c-v test_score mean is 0.6719426974143955
Cross-validation for 69-th subset using logit: 
 logit c-v train_score mean is 0.9068487347245396 
 logit c-v test_score mean is 0.8474493361285814
Cross-validation for 69-th subset using NB: 
 NB c-v train_score mean is 0.7552192066805846 
 NB c-v test_score mean is 0.7212788259958071
Cross-validation for 70-th subset using logit: 
 logit c-v train_score mean is 0.8987054620417361 
 logit c-v test_score mean is 0.8512578616352202
Cross-validation for 70-th subset using NB: 
 NB c-v train_score mean is 0.7349560189027 
 NB c-v test_score mean is 0.6949336128581411




Cross-validation for 71-th subset using logit: 
 logit c-v train_score mean is 0.8824132388780672 
 logit c-v test_score mean is 0.8173654786862334
Cross-validation for 71-th subset using NB: 
 NB c-v train_score mean is 0.7167840078266263 
 NB c-v test_score mean is 0.6626834381551363
Cross-validation for 72-th subset using logit: 
 logit c-v train_score mean is 0.8853394886487713 
 logit c-v test_score mean is 0.8345562543675751
Cross-validation for 72-th subset using NB: 
 NB c-v train_score mean is 0.7303626802700885 
 NB c-v test_score mean is 0.6929419986023759
Cross-validation for 73-th subset using logit: 
 logit c-v train_score mean is 0.8899328272813831 
 logit c-v test_score mean is 0.81554856743536
Cross-validation for 73-th subset using NB: 
 NB c-v train_score mean is 0.7130270525239996 
 NB c-v test_score mean is 0.6778825995807127




Cross-validation for 74-th subset using logit: 
 logit c-v train_score mean is 0.8938994243586272 
 logit c-v test_score mean is 0.8154786862334031
Cross-validation for 74-th subset using NB: 
 NB c-v train_score mean is 0.7401726050611018 
 NB c-v test_score mean is 0.7020614954577219
Cross-validation for 75-th subset using logit: 
 logit c-v train_score mean is 0.8740585774058578 
 logit c-v test_score mean is 0.8061495457721872
Cross-validation for 75-th subset using NB: 
 NB c-v train_score mean is 0.7230509866266018 
 NB c-v test_score mean is 0.6590845562543676
Cross-validation for 76-th subset using logit: 
 logit c-v train_score mean is 0.8926459412478926 
 logit c-v test_score mean is 0.8211041229909155
Cross-validation for 76-th subset using NB: 
 NB c-v train_score mean is 0.7525004149160123 
 NB c-v test_score mean is 0.7137665967854647




Cross-validation for 77-th subset using logit: 
 logit c-v train_score mean is 0.8826211336378963 
 logit c-v test_score mean is 0.8211390635918938
Cross-validation for 77-th subset using NB: 
 NB c-v train_score mean is 0.7468610511787983 
 NB c-v test_score mean is 0.7024109014675053
Cross-validation for 78-th subset using logit: 
 logit c-v train_score mean is 0.8924380464880635 
 logit c-v test_score mean is 0.8305730258560449
Cross-validation for 78-th subset using NB: 
 NB c-v train_score mean is 0.7253491845808475 
 NB c-v test_score mean is 0.6720475192173304
Cross-validation for 79-th subset using logit: 
 logit c-v train_score mean is 0.9156226797459841 
 logit c-v test_score mean is 0.8495108315863034
Cross-validation for 79-th subset using NB: 
 NB c-v train_score mean is 0.7433098068675152 
 NB c-v test_score mean is 0.7061146051712089




Cross-validation for 80-th subset using logit: 
 logit c-v train_score mean is 0.9053873568539759 
 logit c-v test_score mean is 0.8475192173305383
Cross-validation for 80-th subset using NB: 
 NB c-v train_score mean is 0.715531398223286 
 NB c-v test_score mean is 0.6778825995807127
Cross-validation for 81-th subset using logit: 
 logit c-v train_score mean is 0.8771931586900884 
 logit c-v test_score mean is 0.8157931516422083
Cross-validation for 81-th subset using NB: 
 NB c-v train_score mean is 0.7165774233278885 
 NB c-v test_score mean is 0.6684835779175402
Cross-validation for 82-th subset using logit: 
 logit c-v train_score mean is 0.9049711305806205 
 logit c-v test_score mean is 0.8400768693221524
Cross-validation for 82-th subset using NB: 
 NB c-v train_score mean is 0.7474952175470165 
 NB c-v test_score mean is 0.7176100628930817




Cross-validation for 83-th subset using logit: 
 logit c-v train_score mean is 0.8934827613315747 
 logit c-v test_score mean is 0.8362683438155136
Cross-validation for 83-th subset using NB: 
 NB c-v train_score mean is 0.742053703234598 
 NB c-v test_score mean is 0.7172606568832984
Cross-validation for 84-th subset using logit: 
 logit c-v train_score mean is 0.8999589451524708 
 logit c-v test_score mean is 0.821278825995807
Cross-validation for 84-th subset using NB: 
 NB c-v train_score mean is 0.7059250006551305 
 NB c-v test_score mean is 0.6683438155136269
Cross-validation for 85-th subset using logit: 
 logit c-v train_score mean is 0.8968252373756345 
 logit c-v test_score mean is 0.8306079664570231
Cross-validation for 85-th subset using NB: 
 NB c-v train_score mean is 0.7261803268664669 
 NB c-v test_score mean is 0.6966107617051013




Cross-validation for 86-th subset using logit: 
 logit c-v train_score mean is 0.8936915295987982 
 logit c-v test_score mean is 0.8457023060796646
Cross-validation for 86-th subset using NB: 
 NB c-v train_score mean is 0.7558472584970433 
 NB c-v test_score mean is 0.7214185883997204
Cross-validation for 87-th subset using logit: 
 logit c-v train_score mean is 0.8819974493584087 
 logit c-v test_score mean is 0.8100978336827394
Cross-validation for 87-th subset using NB: 
 NB c-v train_score mean is 0.69422655287777 
 NB c-v test_score mean is 0.6457372466806429
Cross-validation for 88-th subset using logit: 
 logit c-v train_score mean is 0.8857539679073383 
 logit c-v test_score mean is 0.8211740041928722
Cross-validation for 88-th subset using NB: 
 NB c-v train_score mean is 0.7322385373992192 
 NB c-v test_score mean is 0.6910202655485674




Cross-validation for 89-th subset using logit: 
 logit c-v train_score mean is 0.8811610660284239 
 logit c-v test_score mean is 0.8079315164220826
Cross-validation for 89-th subset using NB: 
 NB c-v train_score mean is 0.710932381792612 
 NB c-v test_score mean is 0.6775331935709294
Cross-validation for 90-th subset using logit: 
 logit c-v train_score mean is 0.8851246058297884 
 logit c-v test_score mean is 0.8343116701607268
Cross-validation for 90-th subset using NB: 
 NB c-v train_score mean is 0.7138634358539846 
 NB c-v test_score mean is 0.6611111111111112
Cross-validation for 91-th subset using logit: 
 logit c-v train_score mean is 0.9147841126475136 
 logit c-v test_score mean is 0.8607267645003495
Cross-validation for 91-th subset using NB: 
 NB c-v train_score mean is 0.7397576890488378 
 NB c-v test_score mean is 0.6759608665269042




Cross-validation for 92-th subset using logit: 
 logit c-v train_score mean is 0.8953621124902822 
 logit c-v test_score mean is 0.8382250174703005
Cross-validation for 92-th subset using NB: 
 NB c-v train_score mean is 0.7391313842471676 
 NB c-v test_score mean is 0.6874213836477988
Cross-validation for 93-th subset using logit: 
 logit c-v train_score mean is 0.8709248696290214 
 logit c-v test_score mean is 0.7914395527603075
Cross-validation for 93-th subset using NB: 
 NB c-v train_score mean is 0.7142757313440659 
 NB c-v test_score mean is 0.6552410901467505
Cross-validation for 94-th subset using logit: 
 logit c-v train_score mean is 0.8880508556004927 
 logit c-v test_score mean is 0.8212438853948288
Cross-validation for 94-th subset using NB: 
 NB c-v train_score mean is 0.7291078868982626 
 NB c-v test_score mean is 0.6815513626834382




Cross-validation for 95-th subset using logit: 
 logit c-v train_score mean is 0.8694687328028232 
 logit c-v test_score mean is 0.7893431167016074
Cross-validation for 95-th subset using NB: 
 NB c-v train_score mean is 0.6871271215310837 
 NB c-v test_score mean is 0.6251222921034241
Cross-validation for 96-th subset using logit: 
 logit c-v train_score mean is 0.886171504441785 
 logit c-v test_score mean is 0.8060447239692523
Cross-validation for 96-th subset using NB: 
 NB c-v train_score mean is 0.7370419545601454 
 NB c-v test_score mean is 0.6813766596785465
Cross-validation for 97-th subset using logit: 
 logit c-v train_score mean is 0.907265397751592 
 logit c-v test_score mean is 0.8476240391334731
Cross-validation for 97-th subset using NB: 
 NB c-v train_score mean is 0.7537517142582612 
 NB c-v test_score mean is 0.7003843466107617




Cross-validation for 98-th subset using logit: 
 logit c-v train_score mean is 0.9116534621465571 
 logit c-v test_score mean is 0.8568832983927324
Cross-validation for 98-th subset using NB: 
 NB c-v train_score mean is 0.7871686131323102 
 NB c-v test_score mean is 0.7437456324248777
Cross-validation for 99-th subset using logit: 
 logit c-v train_score mean is 0.8836658484814073 
 logit c-v test_score mean is 0.8136268343815513
Cross-validation for 99-th subset using NB: 
 NB c-v train_score mean is 0.7019527257798238 
 NB c-v test_score mean is 0.6624737945492664
Cross-validation for 100-th subset using logit: 
 logit c-v train_score mean is 0.8888894226989631 
 logit c-v test_score mean is 0.8155835080363382
Cross-validation for 100-th subset using NB: 
 NB c-v train_score mean is 0.7368340598003162 
 NB c-v test_score mean is 0.6835080363382249




Cross-validation for 101-th subset using logit: 
 logit c-v train_score mean is 0.8861719411954823 
 logit c-v test_score mean is 0.8118448637316563
Cross-validation for 101-th subset using NB: 
 NB c-v train_score mean is 0.7209611201858823 
 NB c-v test_score mean is 0.6684136967155835
Cross-validation for 102-th subset using logit: 
 logit c-v train_score mean is 0.8947366811960065 
 logit c-v test_score mean is 0.8269042627533194
Cross-validation for 102-th subset using NB: 
 NB c-v train_score mean is 0.7151125514277478 
 NB c-v test_score mean is 0.6910552061495459
Cross-validation for 103-th subset using logit: 
 logit c-v train_score mean is 0.8890955704440039 
 logit c-v test_score mean is 0.8266946191474493
Cross-validation for 103-th subset using NB: 
 NB c-v train_score mean is 0.746653156418969 
 NB c-v test_score mean is 0.6889937106918238




Cross-validation for 104-th subset using logit: 
 logit c-v train_score mean is 0.8978668949432655 
 logit c-v test_score mean is 0.8248777078965759
Cross-validation for 104-th subset using NB: 
 NB c-v train_score mean is 0.7163690918143623 
 NB c-v test_score mean is 0.6911949685534592
Cross-validation for 105-th subset using logit: 
 logit c-v train_score mean is 0.8970344423965548 
 logit c-v test_score mean is 0.8193570929419985
Cross-validation for 105-th subset using NB: 
 NB c-v train_score mean is 0.7305688280151292 
 NB c-v test_score mean is 0.6966107617051014
Cross-validation for 106-th subset using logit: 
 logit c-v train_score mean is 0.9189629720215582 
 logit c-v test_score mean is 0.8719426974143956
Cross-validation for 106-th subset using NB: 
 NB c-v train_score mean is 0.7598103615447105 
 NB c-v test_score mean is 0.732459818308875




Cross-validation for 107-th subset using logit: 
 logit c-v train_score mean is 0.8886784706632541 
 logit c-v test_score mean is 0.8211390635918938
Cross-validation for 107-th subset using NB: 
 NB c-v train_score mean is 0.7324477424201397 
 NB c-v test_score mean is 0.687141858839972
Cross-validation for 108-th subset using logit: 
 logit c-v train_score mean is 0.8834557699530926 
 logit c-v test_score mean is 0.8250524109014675
Cross-validation for 108-th subset using NB: 
 NB c-v train_score mean is 0.7347476873891737 
 NB c-v test_score mean is 0.6987071977638016
Cross-validation for 109-th subset using logit: 
 logit c-v train_score mean is 0.8918117416863934 
 logit c-v test_score mean is 0.847519217330538
Cross-validation for 109-th subset using NB: 
 NB c-v train_score mean is 0.7263930259169644 
 NB c-v test_score mean is 0.6740041928721174




Cross-validation for 110-th subset using logit: 
 logit c-v train_score mean is 0.9003764816869178 
 logit c-v test_score mean is 0.83445143256464
Cross-validation for 110-th subset using NB: 
 NB c-v train_score mean is 0.7339143613350687 
 NB c-v test_score mean is 0.700524109014675
Cross-validation for 111-th subset using logit: 
 logit c-v train_score mean is 0.8621522348686682 
 logit c-v test_score mean is 0.7705450733752619
Cross-validation for 111-th subset using NB: 
 NB c-v train_score mean is 0.672502423983019 
 NB c-v test_score mean is 0.609853249475891
Cross-validation for 112-th subset using logit: 
 logit c-v train_score mean is 0.8878442711017549 
 logit c-v test_score mean is 0.8251222921034242
Cross-validation for 112-th subset using NB: 
 NB c-v train_score mean is 0.700075995143299 
 NB c-v test_score mean is 0.6515373864430469




Cross-validation for 113-th subset using logit: 
 logit c-v train_score mean is 0.8897214384919767 
 logit c-v test_score mean is 0.8194619147449336
Cross-validation for 113-th subset using NB: 
 NB c-v train_score mean is 0.7357906552178964 
 NB c-v test_score mean is 0.7024807826694619
Cross-validation for 114-th subset using logit: 
 logit c-v train_score mean is 0.8853386151413772 
 logit c-v test_score mean is 0.8249475890985325
Cross-validation for 114-th subset using NB: 
 NB c-v train_score mean is 0.703623745425005 
 NB c-v test_score mean is 0.6533542976939203
Cross-validation for 115-th subset using logit: 
 logit c-v train_score mean is 0.8751024187419747 
 logit c-v test_score mean is 0.8100279524807826
Cross-validation for 115-th subset using NB: 
 NB c-v train_score mean is 0.7167848813340205 
 NB c-v test_score mean is 0.673969252271139




Cross-validation for 116-th subset using logit: 
 logit c-v train_score mean is 0.8991216883150918 
 logit c-v test_score mean is 0.853214535290007
Cross-validation for 116-th subset using NB: 
 NB c-v train_score mean is 0.7320297691319958 
 NB c-v test_score mean is 0.6967155835080363
Cross-validation for 117-th subset using logit: 
 logit c-v train_score mean is 0.90872764912955 
 logit c-v test_score mean is 0.8624737945492663
Cross-validation for 117-th subset using NB: 
 NB c-v train_score mean is 0.7556367432150313 
 NB c-v test_score mean is 0.7155835080363383
Cross-validation for 118-th subset using logit: 
 logit c-v train_score mean is 0.8957827063006087 
 logit c-v test_score mean is 0.8307826694619148
Cross-validation for 118-th subset using NB: 
 NB c-v train_score mean is 0.7224233715638404 
 NB c-v test_score mean is 0.6645352900069882




Cross-validation for 119-th subset using logit: 
 logit c-v train_score mean is 0.8961976223128729 
 logit c-v test_score mean is 0.8399720475192174
Cross-validation for 119-th subset using NB: 
 NB c-v train_score mean is 0.7334880897266796 
 NB c-v test_score mean is 0.6927672955974843
Cross-validation for 120-th subset using logit: 
 logit c-v train_score mean is 0.8746875027297107 
 logit c-v test_score mean is 0.783717679944095
Cross-validation for 120-th subset using NB: 
 NB c-v train_score mean is 0.7086376778679432 
 NB c-v test_score mean is 0.6552760307477289
Cross-validation for 121-th subset using logit: 
 logit c-v train_score mean is 0.8968356095892211 
 logit c-v test_score mean is 0.843466107617051
Cross-validation for 121-th subset using NB: 
 NB c-v train_score mean is 0.7574796277290948 
 NB c-v test_score mean is 0.7077568134171908
1




Cross-validation for 0-th subset using logit: 
 logit c-v train_score mean is 0.8953616757365852 
 logit c-v test_score mean is 0.8362683438155136
Cross-validation for 0-th subset using NB: 
 NB c-v train_score mean is 0.7382980581930627 
 NB c-v test_score mean is 0.7136617749825296
Cross-validation for 1-th subset using logit: 
 logit c-v train_score mean is 0.9118587363842036 
 logit c-v test_score mean is 0.8682040531097135
Cross-validation for 1-th subset using NB: 
 NB c-v train_score mean is 0.7554218603960483 
 NB c-v test_score mean is 0.7230957372466806
Cross-validation for 2-th subset using logit: 
 logit c-v train_score mean is 0.8943195814152565 
 logit c-v test_score mean is 0.8270090845562544
Cross-validation for 2-th subset using NB: 
 NB c-v train_score mean is 0.7451887212725257 
 NB c-v test_score mean is 0.6947239692522711




Cross-validation for 3-th subset using logit: 
 logit c-v train_score mean is 0.8893043387112272 
 logit c-v test_score mean is 0.8361635220125786
Cross-validation for 3-th subset using NB: 
 NB c-v train_score mean is 0.7531262829639853 
 NB c-v test_score mean is 0.713696715583508
Cross-validation for 4-th subset using logit: 
 logit c-v train_score mean is 0.8934810143167862 
 logit c-v test_score mean is 0.8306429070580013
Cross-validation for 4-th subset using NB: 
 NB c-v train_score mean is 0.7399673308234554 
 NB c-v test_score mean is 0.7099231306778476
Cross-validation for 5-th subset using logit: 
 logit c-v train_score mean is 0.8928573300372987 
 logit c-v test_score mean is 0.8419287211740041
Cross-validation for 5-th subset using NB: 
 NB c-v train_score mean is 0.7481158445506242 
 NB c-v test_score mean is 0.6928721174004193




Cross-validation for 6-th subset using logit: 
 logit c-v train_score mean is 0.896823490360846 
 logit c-v test_score mean is 0.8344514325646403
Cross-validation for 6-th subset using NB: 
 NB c-v train_score mean is 0.7053021898830373 
 NB c-v test_score mean is 0.6685883997204752
Cross-validation for 7-th subset using logit: 
 logit c-v train_score mean is 0.8803233724373477 
 logit c-v test_score mean is 0.8041229909154437
Cross-validation for 7-th subset using NB: 
 NB c-v train_score mean is 0.7305688280151292 
 NB c-v test_score mean is 0.6721523410202656
Cross-validation for 8-th subset using logit: 
 logit c-v train_score mean is 0.8872179663000846 
 logit c-v test_score mean is 0.8117749825296995
Cross-validation for 8-th subset using NB: 
 NB c-v train_score mean is 0.7103130650500958 
 NB c-v test_score mean is 0.657058001397624




Cross-validation for 9-th subset using logit: 
 logit c-v train_score mean is 0.9012115547558108 
 logit c-v test_score mean is 0.819392033542977
Cross-validation for 9-th subset using NB: 
 NB c-v train_score mean is 0.7270188939649375 
 NB c-v test_score mean is 0.6946191474493362
Cross-validation for 10-th subset using logit: 
 logit c-v train_score mean is 0.8932779238476254 
 logit c-v test_score mean is 0.8270440251572326
Cross-validation for 10-th subset using NB: 
 NB c-v train_score mean is 0.7176230116787938 
 NB c-v test_score mean is 0.679664570230608
Cross-validation for 11-th subset using logit: 
 logit c-v train_score mean is 0.8851294101204565 
 logit c-v test_score mean is 0.8101327742837177
Cross-validation for 11-th subset using NB: 
 NB c-v train_score mean is 0.6981970807382885 
 NB c-v test_score mean is 0.6438853948287911




Cross-validation for 12-th subset using logit: 
 logit c-v train_score mean is 0.9032996741817421 
 logit c-v test_score mean is 0.8438155136268344
Cross-validation for 12-th subset using NB: 
 NB c-v train_score mean is 0.7353757392056325 
 NB c-v test_score mean is 0.7024458420684836
Cross-validation for 13-th subset using logit: 
 logit c-v train_score mean is 0.8934823245778775 
 logit c-v test_score mean is 0.8438155136268344
Cross-validation for 13-th subset using NB: 
 NB c-v train_score mean is 0.7464443881517457 
 NB c-v test_score mean is 0.7005590496156533
Cross-validation for 14-th subset using logit: 
 logit c-v train_score mean is 0.8982874887535923 
 logit c-v test_score mean is 0.840041928721174
Cross-validation for 14-th subset using NB: 
 NB c-v train_score mean is 0.7274338099772015 
 NB c-v test_score mean is 0.6909503843466107




Cross-validation for 15-th subset using logit: 
 logit c-v train_score mean is 0.8964055170727022 
 logit c-v test_score mean is 0.8248777078965759
Cross-validation for 15-th subset using NB: 
 NB c-v train_score mean is 0.7395480472742202 
 NB c-v test_score mean is 0.7115653389238294
Cross-validation for 16-th subset using logit: 
 logit c-v train_score mean is 0.9009975454442222 
 logit c-v test_score mean is 0.839937106918239
Cross-validation for 16-th subset using NB: 
 NB c-v train_score mean is 0.7428883395497943 
 NB c-v test_score mean is 0.7097134870719776




Cross-validation for 17-th subset using logit: 
 logit c-v train_score mean is 0.8790738201098872 
 logit c-v test_score mean is 0.8042976939203355
Cross-validation for 17-th subset using NB: 
 NB c-v train_score mean is 0.713858194809619 
 NB c-v test_score mean is 0.6890635918937806
Cross-validation for 18-th subset using logit: 
 logit c-v train_score mean is 0.9032966169058622 
 logit c-v test_score mean is 0.8455974842767295
Cross-validation for 18-th subset using NB: 
 NB c-v train_score mean is 0.7466479153746036 
 NB c-v test_score mean is 0.70020964360587
Cross-validation for 19-th subset using logit: 
 logit c-v train_score mean is 0.9062219931691722 
 logit c-v test_score mean is 0.8569182389937108
Cross-validation for 19-th subset using NB: 
 NB c-v train_score mean is 0.7562604274945187 
 NB c-v test_score mean is 0.7230607966457022




Cross-validation for 20-th subset using logit: 
 logit c-v train_score mean is 0.8911858736384204 
 logit c-v test_score mean is 0.8494060097833682
Cross-validation for 20-th subset using NB: 
 NB c-v train_score mean is 0.755631938924363 
 NB c-v test_score mean is 0.7248427672955975
Cross-validation for 21-th subset using logit: 
 logit c-v train_score mean is 0.9045553410609622 
 logit c-v test_score mean is 0.8532494758909854
Cross-validation for 21-th subset using NB: 
 NB c-v train_score mean is 0.750621937264699 
 NB c-v test_score mean is 0.7118798043326345
Cross-validation for 22-th subset using logit: 
 logit c-v train_score mean is 0.8755217022912098 
 logit c-v test_score mean is 0.8080363382250175
Cross-validation for 22-th subset using NB: 
 NB c-v train_score mean is 0.7328696464915576 
 NB c-v test_score mean is 0.6890985324947589




Cross-validation for 23-th subset using logit: 
 logit c-v train_score mean is 0.8851285366130626 
 logit c-v test_score mean is 0.81554856743536
Cross-validation for 23-th subset using NB: 
 NB c-v train_score mean is 0.7205392161144644 
 NB c-v test_score mean is 0.6568832983927324
Cross-validation for 24-th subset using logit: 
 logit c-v train_score mean is 0.8955717542648998 
 logit c-v test_score mean is 0.8287561146051712
Cross-validation for 24-th subset using NB: 
 NB c-v train_score mean is 0.7819459124221486 
 NB c-v test_score mean is 0.7174353598881901
Cross-validation for 25-th subset using logit: 
 logit c-v train_score mean is 0.8993313300897092 
 logit c-v test_score mean is 0.8305380852550664
Cross-validation for 25-th subset using NB: 
 NB c-v train_score mean is 0.7266052882137648 
 NB c-v test_score mean is 0.6968204053109714




Cross-validation for 26-th subset using logit: 
 logit c-v train_score mean is 0.8805308304434798 
 logit c-v test_score mean is 0.8006638714185886
Cross-validation for 26-th subset using NB: 
 NB c-v train_score mean is 0.7257627903320202 
 NB c-v test_score mean is 0.6607267645003494
Cross-validation for 27-th subset using logit: 
 logit c-v train_score mean is 0.894945886216927 
 logit c-v test_score mean is 0.8476240391334731
Cross-validation for 27-th subset using NB: 
 NB c-v train_score mean is 0.7520837518889597 
 NB c-v test_score mean is 0.7211390635918937
Cross-validation for 28-th subset using logit: 
 logit c-v train_score mean is 0.8941077558721535 
 logit c-v test_score mean is 0.8342417889587701
Cross-validation for 28-th subset using NB: 
 NB c-v train_score mean is 0.7961377870563673 
 NB c-v test_score mean is 0.698846960167715




Cross-validation for 29-th subset using logit: 
 logit c-v train_score mean is 0.8895126702247534 
 logit c-v test_score mean is 0.8248427672955975
Cross-validation for 29-th subset using NB: 
 NB c-v train_score mean is 0.7476961242476918 
 NB c-v test_score mean is 0.709748427672956
Cross-validation for 30-th subset using logit: 
 logit c-v train_score mean is 0.8842930267904718 
 logit c-v test_score mean is 0.8080363382250175
Cross-validation for 30-th subset using NB: 
 NB c-v train_score mean is 0.7263908421484788 
 NB c-v test_score mean is 0.6646750524109015
Cross-validation for 31-th subset using logit: 
 logit c-v train_score mean is 0.8976633677204078 
 logit c-v test_score mean is 0.8421034241788959
Cross-validation for 31-th subset using NB: 
 NB c-v train_score mean is 0.7579375616914598 
 NB c-v test_score mean is 0.7083508036338224




Cross-validation for 32-th subset using logit: 
 logit c-v train_score mean is 0.8955686969890199 
 logit c-v test_score mean is 0.8287561146051712
Cross-validation for 32-th subset using NB: 
 NB c-v train_score mean is 0.7284772145596212 
 NB c-v test_score mean is 0.6836477987421384
Cross-validation for 33-th subset using logit: 
 logit c-v train_score mean is 0.9058027096199369 
 logit c-v test_score mean is 0.8569881201956674
Cross-validation for 33-th subset using NB: 
 NB c-v train_score mean is 0.742054139988295 
 NB c-v test_score mean is 0.7174004192872118
Cross-validation for 34-th subset using logit: 
 logit c-v train_score mean is 0.8805347612267538 
 logit c-v test_score mean is 0.8249475890985325
Cross-validation for 34-th subset using NB: 
 NB c-v train_score mean is 0.7136537940793669 
 NB c-v test_score mean is 0.6703703703703704




Cross-validation for 35-th subset using logit: 
 logit c-v train_score mean is 0.8703003118421397 
 logit c-v test_score mean is 0.8024458420684837
Cross-validation for 35-th subset using NB: 
 NB c-v train_score mean is 0.6910932818546309 
 NB c-v test_score mean is 0.6591544374563243
Cross-validation for 36-th subset using logit: 
 logit c-v train_score mean is 0.9078938863217478 
 logit c-v test_score mean is 0.8495457721872816
Cross-validation for 36-th subset using NB: 
 NB c-v train_score mean is 0.7257680313763857 
 NB c-v test_score mean is 0.6835429769392034
Cross-validation for 37-th subset using logit: 
 logit c-v train_score mean is 0.8943191446615597 
 logit c-v test_score mean is 0.8250174703004891
Cross-validation for 37-th subset using NB: 
 NB c-v train_score mean is 0.720966797983945 
 NB c-v test_score mean is 0.6873515024458421




Cross-validation for 38-th subset using logit: 
 logit c-v train_score mean is 0.8957822695469119 
 logit c-v test_score mean is 0.8400069881201956
Cross-validation for 38-th subset using NB: 
 NB c-v train_score mean is 0.745201387129742 
 NB c-v test_score mean is 0.6947938504542278
Cross-validation for 39-th subset using logit: 
 logit c-v train_score mean is 0.8907644063206994 
 logit c-v test_score mean is 0.8419287211740043
Cross-validation for 39-th subset using NB: 
 NB c-v train_score mean is 0.7073881255404826 
 NB c-v test_score mean is 0.6589447938504542
Cross-validation for 40-th subset using logit: 
 logit c-v train_score mean is 0.900374297918432 
 logit c-v test_score mean is 0.8341719077568135
Cross-validation for 40-th subset using NB: 
 NB c-v train_score mean is 0.7293140346433034 
 NB c-v test_score mean is 0.6890286512928022




Cross-validation for 41-th subset using logit: 
 logit c-v train_score mean is 0.9003773551943117 
 logit c-v test_score mean is 0.8495108315863034
Cross-validation for 41-th subset using NB: 
 NB c-v train_score mean is 0.7585590622024615 
 NB c-v test_score mean is 0.7232005590496156
Cross-validation for 42-th subset using logit: 
 logit c-v train_score mean is 0.8769804596395907 
 logit c-v test_score mean is 0.7947589098532495
Cross-validation for 42-th subset using NB: 
 NB c-v train_score mean is 0.7144862466260776 
 NB c-v test_score mean is 0.6571278825995808
Cross-validation for 43-th subset using logit: 
 logit c-v train_score mean is 0.893480577563089 
 logit c-v test_score mean is 0.8363382250174702
Cross-validation for 43-th subset using NB: 
 NB c-v train_score mean is 0.7497859906884112 
 NB c-v test_score mean is 0.7079315164220825




Cross-validation for 44-th subset using logit: 
 logit c-v train_score mean is 0.8918121784400906 
 logit c-v test_score mean is 0.8324947589098531
Cross-validation for 44-th subset using NB: 
 NB c-v train_score mean is 0.7462395506677965 
 NB c-v test_score mean is 0.7004192872117401
Cross-validation for 45-th subset using logit: 
 logit c-v train_score mean is 0.8734357666337645 
 logit c-v test_score mean is 0.8025157232704403
Cross-validation for 45-th subset using NB: 
 NB c-v train_score mean is 0.6779456853102261 
 NB c-v test_score mean is 0.6306429070580013
Cross-validation for 46-th subset using logit: 
 logit c-v train_score mean is 0.9012111180021138 
 logit c-v test_score mean is 0.8344164919636619
Cross-validation for 46-th subset using NB: 
 NB c-v train_score mean is 0.7420528297272037 
 NB c-v test_score mean is 0.7041928721174003




Cross-validation for 47-th subset using logit: 
 logit c-v train_score mean is 0.8859636096819561 
 logit c-v test_score mean is 0.8042976939203355
Cross-validation for 47-th subset using NB: 
 NB c-v train_score mean is 0.7217979402695643 
 NB c-v test_score mean is 0.666491963661775
Cross-validation for 48-th subset using logit: 
 logit c-v train_score mean is 0.8951533442230589 
 logit c-v test_score mean is 0.8381551362683437
Cross-validation for 48-th subset using NB: 
 NB c-v train_score mean is 0.7151125514277478 
 NB c-v test_score mean is 0.6702655485674354
Cross-validation for 49-th subset using logit: 
 logit c-v train_score mean is 0.8989129200478683 
 logit c-v test_score mean is 0.8399720475192174
Cross-validation for 49-th subset using NB: 
 NB c-v train_score mean is 0.7376752474209695 
 NB c-v test_score mean is 0.689203354297694




Cross-validation for 50-th subset using logit: 
 logit c-v train_score mean is 0.8899319537739887 
 logit c-v test_score mean is 0.8325995807127884
Cross-validation for 50-th subset using NB: 
 NB c-v train_score mean is 0.7255562058332824 
 NB c-v test_score mean is 0.679769392033543
Cross-validation for 51-th subset using logit: 
 logit c-v train_score mean is 0.9022527755697451 
 logit c-v test_score mean is 0.8455974842767295
Cross-validation for 51-th subset using NB: 
 NB c-v train_score mean is 0.7800770433521719 
 NB c-v test_score mean is 0.7082809224318658
Cross-validation for 52-th subset using logit: 
 logit c-v train_score mean is 0.8811597557673327 
 logit c-v test_score mean is 0.7872816212438855
Cross-validation for 52-th subset using NB: 
 NB c-v train_score mean is 0.6979835081803968 
 NB c-v test_score mean is 0.6363382250174704




Cross-validation for 53-th subset using logit: 
 logit c-v train_score mean is 0.9045492265092024 
 logit c-v test_score mean is 0.8549615653389238
Cross-validation for 53-th subset using NB: 
 NB c-v train_score mean is 0.7376669491007242 
 NB c-v test_score mean is 0.7079315164220825
Cross-validation for 54-th subset using logit: 
 logit c-v train_score mean is 0.8867982459971524 
 logit c-v test_score mean is 0.8192872117400419
Cross-validation for 54-th subset using NB: 
 NB c-v train_score mean is 0.7345358618460706 
 NB c-v test_score mean is 0.687246680642907
Cross-validation for 55-th subset using logit: 
 logit c-v train_score mean is 0.8949450127095325 
 logit c-v test_score mean is 0.8419636617749825
Cross-validation for 55-th subset using NB: 
 NB c-v train_score mean is 0.7337042828067541 
 NB c-v test_score mean is 0.7004891684136967




Cross-validation for 56-th subset using logit: 
 logit c-v train_score mean is 0.8953603654754938 
 logit c-v test_score mean is 0.8172955974842768
Cross-validation for 56-th subset using NB: 
 NB c-v train_score mean is 0.7385050794454975 
 NB c-v test_score mean is 0.6986373165618448
Cross-validation for 57-th subset using logit: 
 logit c-v train_score mean is 0.8893030284501358 
 logit c-v test_score mean is 0.8380852550663871
Cross-validation for 57-th subset using NB: 
 NB c-v train_score mean is 0.727228535739555 
 NB c-v test_score mean is 0.6720475192173305
Cross-validation for 58-th subset using logit: 
 logit c-v train_score mean is 0.893062167521248 
 logit c-v test_score mean is 0.8210691823899371
Cross-validation for 58-th subset using NB: 
 NB c-v train_score mean is 0.7418541067950141 
 NB c-v test_score mean is 0.7099930118798043




Cross-validation for 59-th subset using logit: 
 logit c-v train_score mean is 0.8782361265188111 
 logit c-v test_score mean is 0.8157582110412299
Cross-validation for 59-th subset using NB: 
 NB c-v train_score mean is 0.7236807854578489 
 NB c-v test_score mean is 0.6646750524109015
Cross-validation for 60-th subset using logit: 
 logit c-v train_score mean is 0.9032992374280451 
 logit c-v test_score mean is 0.826764500349406
Cross-validation for 60-th subset using NB: 
 NB c-v train_score mean is 0.7466531564189691 
 NB c-v test_score mean is 0.7098881900768694
Cross-validation for 61-th subset using logit: 
 logit c-v train_score mean is 0.9014194495156401 
 logit c-v test_score mean is 0.8419986023759609
Cross-validation for 61-th subset using NB: 
 NB c-v train_score mean is 0.7499947589556345 
 NB c-v test_score mean is 0.7117749825296996




Cross-validation for 62-th subset using logit: 
 logit c-v train_score mean is 0.8997471196093676 
 logit c-v test_score mean is 0.8362683438155136
Cross-validation for 62-th subset using NB: 
 NB c-v train_score mean is 0.7380871061573536 
 NB c-v test_score mean is 0.6871767994409503
Cross-validation for 63-th subset using logit: 
 logit c-v train_score mean is 0.8880539128763726 
 logit c-v test_score mean is 0.8344863731656185
Cross-validation for 63-th subset using NB: 
 NB c-v train_score mean is 0.7437264698945677 
 NB c-v test_score mean is 0.6929419986023759
Cross-validation for 64-th subset using logit: 
 logit c-v train_score mean is 0.894945886216927 
 logit c-v test_score mean is 0.8419287211740041
Cross-validation for 64-th subset using NB: 
 NB c-v train_score mean is 0.7362051344764634 
 NB c-v test_score mean is 0.6852201257861635




Cross-validation for 65-th subset using logit: 
 logit c-v train_score mean is 0.8813737650789214 
 logit c-v test_score mean is 0.8081761006289309
Cross-validation for 65-th subset using NB: 
 NB c-v train_score mean is 0.7113617106768808 
 NB c-v test_score mean is 0.6513277428371768
Cross-validation for 66-th subset using logit: 
 logit c-v train_score mean is 0.8888902962063575 
 logit c-v test_score mean is 0.7967505241090147
Cross-validation for 66-th subset using NB: 
 NB c-v train_score mean is 0.7201247368558975 
 NB c-v test_score mean is 0.6588749126484975
Cross-validation for 67-th subset using logit: 
 logit c-v train_score mean is 0.895991037814135 
 logit c-v test_score mean is 0.836198462613557
Cross-validation for 67-th subset using NB: 
 NB c-v train_score mean is 0.7311946960631023 
 NB c-v test_score mean is 0.6797344514325647




Cross-validation for 68-th subset using logit: 
 logit c-v train_score mean is 0.8938998611123242 
 logit c-v test_score mean is 0.8212438853948287
Cross-validation for 68-th subset using NB: 
 NB c-v train_score mean is 0.7374559970650152 
 NB c-v test_score mean is 0.6759958071278827
Cross-validation for 69-th subset using logit: 
 logit c-v train_score mean is 0.8890973174587924 
 logit c-v test_score mean is 0.8154786862334031
Cross-validation for 69-th subset using NB: 
 NB c-v train_score mean is 0.7167835710729291 
 NB c-v test_score mean is 0.6720475192173306
Cross-validation for 70-th subset using logit: 
 logit c-v train_score mean is 0.9055969986285934 
 logit c-v test_score mean is 0.8512928022361985
Cross-validation for 70-th subset using NB: 
 NB c-v train_score mean is 0.7547955555943782 
 NB c-v test_score mean is 0.6929419986023759




Cross-validation for 71-th subset using logit: 
 logit c-v train_score mean is 0.8851267895982741 
 logit c-v test_score mean is 0.8211740041928721
Cross-validation for 71-th subset using NB: 
 NB c-v train_score mean is 0.7251395428062299 
 NB c-v test_score mean is 0.6667714884696017
Cross-validation for 72-th subset using logit: 
 logit c-v train_score mean is 0.9010001659664049 
 logit c-v test_score mean is 0.8437805730258561
Cross-validation for 72-th subset using NB: 
 NB c-v train_score mean is 0.7303570024720261 
 NB c-v test_score mean is 0.6814814814814815
Cross-validation for 73-th subset using logit: 
 logit c-v train_score mean is 0.9032983639206508 
 logit c-v test_score mean is 0.8475890985324949
Cross-validation for 73-th subset using NB: 
 NB c-v train_score mean is 0.7867506398441664 
 NB c-v test_score mean is 0.7417889587700908




Cross-validation for 74-th subset using logit: 
 logit c-v train_score mean is 0.878861557813087 
 logit c-v test_score mean is 0.7969601677148848
Cross-validation for 74-th subset using NB: 
 NB c-v train_score mean is 0.7197041430455708 
 NB c-v test_score mean is 0.6738644304682041
Cross-validation for 75-th subset using logit: 
 logit c-v train_score mean is 0.8886841484613168 
 logit c-v test_score mean is 0.8327742837176799
Cross-validation for 75-th subset using NB: 
 NB c-v train_score mean is 0.7284846393724724 
 NB c-v test_score mean is 0.6930468204053111
Cross-validation for 76-th subset using logit: 
 logit c-v train_score mean is 0.878654536560652 
 logit c-v test_score mean is 0.8062893081761006
Cross-validation for 76-th subset using NB: 
 NB c-v train_score mean is 0.7121880486718319 
 NB c-v test_score mean is 0.6589098532494758




Cross-validation for 77-th subset using logit: 
 logit c-v train_score mean is 0.9076868650693128 
 logit c-v test_score mean is 0.8513976240391334
Cross-validation for 77-th subset using NB: 
 NB c-v train_score mean is 0.7474912867637424 
 NB c-v test_score mean is 0.7099231306778476
Cross-validation for 78-th subset using logit: 
 logit c-v train_score mean is 0.8870091980328614 
 logit c-v test_score mean is 0.8229909154437456
Cross-validation for 78-th subset using NB: 
 NB c-v train_score mean is 0.7151103676592623 
 NB c-v test_score mean is 0.679769392033543
Cross-validation for 79-th subset using logit: 
 logit c-v train_score mean is 0.9012093709873256 
 logit c-v test_score mean is 0.853109713487072
Cross-validation for 79-th subset using NB: 
 NB c-v train_score mean is 0.7401778461054673 
 NB c-v test_score mean is 0.7173654786862335




Cross-validation for 80-th subset using logit: 
 logit c-v train_score mean is 0.8878416505795721 
 logit c-v test_score mean is 0.8230607966457022
Cross-validation for 80-th subset using NB: 
 NB c-v train_score mean is 0.7374651688926547 
 NB c-v test_score mean is 0.6797344514325646
Cross-validation for 81-th subset using logit: 
 logit c-v train_score mean is 0.8886793441706484 
 logit c-v test_score mean is 0.8154786862334034
Cross-validation for 81-th subset using NB: 
 NB c-v train_score mean is 0.7115639276386474 
 NB c-v test_score mean is 0.6701257861635221
Cross-validation for 82-th subset using logit: 
 logit c-v train_score mean is 0.8832483119469607 
 logit c-v test_score mean is 0.7985674353598882
Cross-validation for 82-th subset using NB: 
 NB c-v train_score mean is 0.7042522339951608 
 NB c-v test_score mean is 0.6570230607966457




Cross-validation for 83-th subset using logit: 
 logit c-v train_score mean is 0.9005821926782611 
 logit c-v test_score mean is 0.8418588399720475
Cross-validation for 83-th subset using NB: 
 NB c-v train_score mean is 0.7441488107196828 
 NB c-v test_score mean is 0.7137665967854647
Cross-validation for 84-th subset using logit: 
 logit c-v train_score mean is 0.8872170927926906 
 logit c-v test_score mean is 0.8193570929419985
Cross-validation for 84-th subset using NB: 
 NB c-v train_score mean is 0.7376717533913925 
 NB c-v test_score mean is 0.7004891684136967
Cross-validation for 85-th subset using logit: 
 logit c-v train_score mean is 0.8934831980852719 
 logit c-v test_score mean is 0.8249475890985325
Cross-validation for 85-th subset using NB: 
 NB c-v train_score mean is 0.7470698194460217 
 NB c-v test_score mean is 0.7118099231306778




Cross-validation for 86-th subset using logit: 
 logit c-v train_score mean is 0.9012098077410226 
 logit c-v test_score mean is 0.8344164919636616
Cross-validation for 86-th subset using NB: 
 NB c-v train_score mean is 0.7257662843615972 
 NB c-v test_score mean is 0.6852900069881203
Cross-validation for 87-th subset using logit: 
 logit c-v train_score mean is 0.8987006577510679 
 logit c-v test_score mean is 0.8361635220125786
Cross-validation for 87-th subset using NB: 
 NB c-v train_score mean is 0.7345371721071619 
 NB c-v test_score mean is 0.6837176799440952
Cross-validation for 88-th subset using logit: 
 logit c-v train_score mean is 0.8819952655899232 
 logit c-v test_score mean is 0.8024109014675054
Cross-validation for 88-th subset using NB: 
 NB c-v train_score mean is 0.6990312802997878 
 NB c-v test_score mean is 0.6627882599580712




Cross-validation for 89-th subset using logit: 
 logit c-v train_score mean is 0.9053877936076729 
 logit c-v test_score mean is 0.8438155136268343
Cross-validation for 89-th subset using NB: 
 NB c-v train_score mean is 0.7479053292686123 
 NB c-v test_score mean is 0.7193570929419988
Cross-validation for 90-th subset using logit: 
 logit c-v train_score mean is 0.8901411587949092 
 logit c-v test_score mean is 0.8344863731656185
Cross-validation for 90-th subset using NB: 
 NB c-v train_score mean is 0.7385090102287716 
 NB c-v test_score mean is 0.674074074074074
Cross-validation for 91-th subset using logit: 
 logit c-v train_score mean is 0.9055943781064109 
 logit c-v test_score mean is 0.836198462613557
Cross-validation for 91-th subset using NB: 
 NB c-v train_score mean is 0.7322381006455221 
 NB c-v test_score mean is 0.6966107617051013




Cross-validation for 92-th subset using logit: 
 logit c-v train_score mean is 0.8836697792646815 
 logit c-v test_score mean is 0.8232005590496158
Cross-validation for 92-th subset using NB: 
 NB c-v train_score mean is 0.701131628829238 
 NB c-v test_score mean is 0.6344514325646401
Cross-validation for 93-th subset using logit: 
 logit c-v train_score mean is 0.894109939640639 
 logit c-v test_score mean is 0.8289308176100629
Cross-validation for 93-th subset using NB: 
 NB c-v train_score mean is 0.7759042985298871 
 NB c-v test_score mean is 0.7288958770090845
Cross-validation for 94-th subset using logit: 
 logit c-v train_score mean is 0.882201850088661 
 logit c-v test_score mean is 0.8248427672955975
Cross-validation for 94-th subset using NB: 
 NB c-v train_score mean is 0.7336990417623885 
 NB c-v test_score mean is 0.6965059399021664




Cross-validation for 95-th subset using logit: 
 logit c-v train_score mean is 0.8847136206007985 
 logit c-v test_score mean is 0.8194619147449336
Cross-validation for 95-th subset using NB: 
 NB c-v train_score mean is 0.7117766266891449 
 NB c-v test_score mean is 0.676135569531796
Cross-validation for 96-th subset using logit: 
 logit c-v train_score mean is 0.8801167879386099 
 logit c-v test_score mean is 0.8138714185883998
Cross-validation for 96-th subset using NB: 
 NB c-v train_score mean is 0.715738856229418 
 NB c-v test_score mean is 0.6815164220824599




Cross-validation for 97-th subset using logit: 
 logit c-v train_score mean is 0.8895126702247534 
 logit c-v test_score mean is 0.8154088050314465
Cross-validation for 97-th subset using NB: 
 NB c-v train_score mean is 0.7115599968553734 
 NB c-v test_score mean is 0.6664570230607967
Cross-validation for 98-th subset using logit: 
 logit c-v train_score mean is 0.8723884312680706 
 logit c-v test_score mean is 0.7873165618448638
Cross-validation for 98-th subset using NB: 
 NB c-v train_score mean is 0.7128209047789589 
 NB c-v test_score mean is 0.6610761705101329




Cross-validation for 99-th subset using logit: 
 logit c-v train_score mean is 0.8924376097343665 
 logit c-v test_score mean is 0.8286512928022362
Cross-validation for 99-th subset using NB: 
 NB c-v train_score mean is 0.7357897817105022 
 NB c-v test_score mean is 0.6928721174004193
Cross-validation for 100-th subset using logit: 
 logit c-v train_score mean is 0.895572191018597 
 logit c-v test_score mean is 0.836408106219427
Cross-validation for 100-th subset using NB: 
 NB c-v train_score mean is 0.7426852490806335 
 NB c-v test_score mean is 0.7024109014675053
Cross-validation for 101-th subset using logit: 
 logit c-v train_score mean is 0.8928568932836016 
 logit c-v test_score mean is 0.8250174703004891
Cross-validation for 101-th subset using NB: 
 NB c-v train_score mean is 0.7186672897686079 
 NB c-v test_score mean is 0.6667016072676449




Cross-validation for 102-th subset using logit: 
 logit c-v train_score mean is 0.8811584455062412 
 logit c-v test_score mean is 0.806009783368274
Cross-validation for 102-th subset using NB: 
 NB c-v train_score mean is 0.6990277862702108 
 NB c-v test_score mean is 0.6494758909853249
Cross-validation for 103-th subset using logit: 
 logit c-v train_score mean is 0.8759396755793538 
 logit c-v test_score mean is 0.8025157232704403
Cross-validation for 103-th subset using NB: 
 NB c-v train_score mean is 0.7364160865121724 
 NB c-v test_score mean is 0.6852900069881203




Cross-validation for 104-th subset using logit: 
 logit c-v train_score mean is 0.8713459001930453 
 logit c-v test_score mean is 0.787456324248777
Cross-validation for 104-th subset using NB: 
 NB c-v train_score mean is 0.6946397218752457 
 NB c-v test_score mean is 0.6495457721872816
Cross-validation for 105-th subset using logit: 
 logit c-v train_score mean is 0.9020453175636133 
 logit c-v test_score mean is 0.8437456324248778
Cross-validation for 105-th subset using NB: 
 NB c-v train_score mean is 0.7821559909504634 
 NB c-v test_score mean is 0.7343116701607266
Cross-validation for 106-th subset using logit: 
 logit c-v train_score mean is 0.8746857557149221 
 logit c-v test_score mean is 0.7912997903563941
Cross-validation for 106-th subset using NB: 
 NB c-v train_score mean is 0.7107288545697539 
 NB c-v test_score mean is 0.66659678546471




Cross-validation for 107-th subset using logit: 
 logit c-v train_score mean is 0.9010019129811934 
 logit c-v test_score mean is 0.8475890985324949
Cross-validation for 107-th subset using NB: 
 NB c-v train_score mean is 0.7345402293830418 
 NB c-v test_score mean is 0.7062543675751223
Cross-validation for 108-th subset using logit: 
 logit c-v train_score mean is 0.9074746027725127 
 logit c-v test_score mean is 0.8382250174703005
Cross-validation for 108-th subset using NB: 
 NB c-v train_score mean is 0.7481158445506242 
 NB c-v test_score mean is 0.7042976939203355
Cross-validation for 109-th subset using logit: 
 logit c-v train_score mean is 0.894527912928783 
 logit c-v test_score mean is 0.8417889587700909
Cross-validation for 109-th subset using NB: 
 NB c-v train_score mean is 0.7274377407604755 
 NB c-v test_score mean is 0.6872466806429071




Cross-validation for 110-th subset using logit: 
 logit c-v train_score mean is 0.8895122334710563 
 logit c-v test_score mean is 0.8305380852550664
Cross-validation for 110-th subset using NB: 
 NB c-v train_score mean is 0.7435155178588586 
 NB c-v test_score mean is 0.7040880503144654
Cross-validation for 111-th subset using logit: 
 logit c-v train_score mean is 0.8943200181689537 
 logit c-v test_score mean is 0.8060796645702306
Cross-validation for 111-th subset using NB: 
 NB c-v train_score mean is 0.7397611830784148 
 NB c-v test_score mean is 0.7097833682739343




Cross-validation for 112-th subset using logit: 
 logit c-v train_score mean is 0.8962011163424499 
 logit c-v test_score mean is 0.826869322152341
Cross-validation for 112-th subset using NB: 
 NB c-v train_score mean is 0.7266052882137648 
 NB c-v test_score mean is 0.6911949685534591
Cross-validation for 113-th subset using logit: 
 logit c-v train_score mean is 0.8951524707156645 
 logit c-v test_score mean is 0.8343116701607268
Cross-validation for 113-th subset using NB: 
 NB c-v train_score mean is 0.7566757802604799 
 NB c-v test_score mean is 0.7211390635918937
Cross-validation for 114-th subset using logit: 
 logit c-v train_score mean is 0.896616032354714 
 logit c-v test_score mean is 0.8270440251572326
Cross-validation for 114-th subset using NB: 
 NB c-v train_score mean is 0.7658738131218281 
 NB c-v test_score mean is 0.7004192872117401




Cross-validation for 115-th subset using logit: 
 logit c-v train_score mean is 0.8982848682314095 
 logit c-v test_score mean is 0.8323549965059399
Cross-validation for 115-th subset using NB: 
 NB c-v train_score mean is 0.7445602327023699 
 NB c-v test_score mean is 0.7060097833682739
Cross-validation for 116-th subset using logit: 
 logit c-v train_score mean is 0.8732213205684785 
 logit c-v test_score mean is 0.8118099231306779
Cross-validation for 116-th subset using NB: 
 NB c-v train_score mean is 0.7123941964168727 
 NB c-v test_score mean is 0.6719776380153738
Cross-validation for 117-th subset using logit: 
 logit c-v train_score mean is 0.8730169198382265 
 logit c-v test_score mean is 0.8043326345213139
Cross-validation for 117-th subset using NB: 
 NB c-v train_score mean is 0.692349385487548 
 NB c-v test_score mean is 0.6458071278825996




Cross-validation for 118-th subset using logit: 
 logit c-v train_score mean is 0.8893056489723186 
 logit c-v test_score mean is 0.834346610761705
Cross-validation for 118-th subset using NB: 
 NB c-v train_score mean is 0.7405918886103371 
 NB c-v test_score mean is 0.7079315164220825
Cross-validation for 119-th subset using logit: 
 logit c-v train_score mean is 0.907683371039736 
 logit c-v test_score mean is 0.8438155136268344
Cross-validation for 119-th subset using NB: 
 NB c-v train_score mean is 0.7178287226701373 
 NB c-v test_score mean is 0.6646750524109015
Cross-validation for 120-th subset using logit: 
 logit c-v train_score mean is 0.893480577563089 
 logit c-v test_score mean is 0.8362683438155136
Cross-validation for 120-th subset using NB: 
 NB c-v train_score mean is 0.726602667691582 
 NB c-v test_score mean is 0.7061844863731657
Cross-validation for 121-th subset using logit: 
 logit c-v train_score mean is 0.898096541319088 
 logit c-v test_score mean is 0.836093



In [24]:
All_ZipCode_prediction['y_pred_prob'] =monte_All_ZipCode_prediction['y_pred_prob']/ Monte_Carlo  

All_ZipCode_prediction.sort_values('y_pred_prob',inplace=True,ascending=False)

print(All_ZipCode_prediction.head(100).to_string(index=False))

ZipCode  y_pred_prob
  10036     0.992966
  60601     0.992626
  10010     0.992467
  80111     0.992412
  94103     0.992411
  10011     0.992403
  60654     0.992371
  10003     0.992351
  94111     0.992310
  43215     0.992302
  22102     0.992292
  60611     0.992227
  30309     0.992222
  20036     0.992214
  10021     0.992209
  20004     0.992194
  28202     0.992192
  98109     0.992180
   2210     0.992154
  90048     0.992139
  90245     0.992129
  70809     0.992127
  60603     0.992125
  94301     0.992117
  30303     0.992115
   2110     0.992088
  90210     0.992082
  33166     0.991864
  37027     0.991809
  77024     0.991800
  10004     0.991788
  78759     0.991770
  92121     0.991747
  98052     0.991734
  46204     0.991694
  75039     0.991666
  98101     0.991660
  10001     0.991620
  75093     0.991595
  15222     0.991585
  10016     0.991565
  89109     0.991561
  95054     0.991551
  77030     0.991519
  10014     0.991507
  30339     0.991477
  92108     0