In [23]:
import sklearn
import numpy as np
import pandas as pd


from sklearn.metrics import classification_report, accuracy_score
from numpy import set_printoptions
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold, StratifiedKFold, RepeatedKFold
from statistics import mean, stdev

from sklearn.neighbors import KNeighborsClassifier

In [24]:
def Train_Models_CV10(df, targetFeature, models):
    accuracies = {}
    
    for mod in models:
        model_name = type(mod).__name__
        accuracies[model_name] = Training_basic(df, targetFeature, mod)
        
    return accuracies
        

In [25]:
# Split dataframe into train and test data
# Note: Stratify preserves the propotion of Revenue of T/F in the testing and training sets
def Training_basic (df, targetFeature, model):
    
    model_name = type(model).__name__
        
    # Split dataframe into X and y
    X = df.drop(columns=[targetFeature])
    y = df[targetFeature].values
        
    # prepare the cross-validation procedure
    cv = StratifiedKFold(n_splits=10, shuffle=True)

    # evaluate model
    scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
        
    # report performance
    print(model_name+' Accuracy: %.3f (%.3f)' % (mean(scores), stdev(scores)))
        
    # Return all of the stats
    return scores

In [26]:
def SavePredictionsToFile(fileName, stats):

    df_y = pd.DataFrame(stats) 
    df_y.to_csv(fileName)

    df_y.shape

# Morgue

In [27]:
#Split dataframe into train and test data
#Note: Stratify preserves the propotion of Revenue of T/F in the testing and training sets
def Training_Repeat (df, targetFeature, model):
    
    accuracies = []

    for i in range(10):
        print("__________________________________________________________________________________Iteration:"+str(i))
        
        # Split dataframe into X and y
        X = df.drop(columns=[targetFeature])
        y = df[targetFeature].values
        
        # prepare the cross-validation procedure
        cv = StratifiedKFold(n_splits=10, shuffle=True)

        # evaluate model
        scores = cross_val_score(model, X, y, scoring='accuracy', cv=cv, n_jobs=-1)
        
        # report performance
        print('Accuracy: %.3f (%.3f)' % (mean(scores), stdev(scores)))
        
        accuracies.append(mean(scores))
        
    # Return all of the stats
    return accuracies

In [28]:
#Split dataframe into train and test data
#Note: Stratify preserves the propotion of Revenue of T/F in the testing and training sets
def Training_All (df, targetFeature, models):
    
    accuracies = []
    
    cv_10 = StratifiedKFold(n_splits=10, shuffle=True)
    
    features = list(df.columns)
    features.remove(targetFeature)
    
    X = df[features]
    y = df[targetFeature]
    
    j=0
    
    # Repeat for 10 folds
    for train_index, test_index in cv_10.split(X,y):
        print("___________________________________________________________________________ Fold "+str(j+1))
        
        fold_temp = []
        acc_temp = []
        
        # Split the data
        X_train = X.iloc[train_index]
        X_test = X.iloc[test_index]
        y_train = y.iloc[train_index]
        y_test = y.loc[test_index]
        
        # Repeat 10 times for each fold
        for i in range(10):
            
            repeat_temp = []
            
            print("_________________ Repeat "+str(i+1))
        
            #Train the models
            for mod in models:
                model_name = type(mod).__name__
                print("______________________________________________________________ "+model_name)
                mod.fit(X_train, y_train) #Training the model
                acc = accuracy_score(y_test, mod.predict(X_test))
                print(str(i+1)+": "+model_name+": "+str(acc))
                repeat_temp.append(acc)
                
            fold_temp.append(repeat_temp) 
            
        fold_temp = np.transpose(fold_temp)
        
        for k in range(len(models)):
            acc_temp.append(mean(fold_temp[k]))
        
        accuracies.append(acc_temp)
        
        j=j+1
        
    return accuracies  
        

In [29]:
#Split dataframe into train and test data
#Note: Stratify preserves the propotion of Revenue of T/F in the testing and training sets
def Training_All_2 (df, targetFeature, models, folds, repeats):
    
    accuracies = []
    j=0
    
    cv_10 = StratifiedKFold(n_splits=folds, shuffle=True)
    
    features = list(df.columns)
    #print(features)
    features.remove(targetFeature)
    
    X = df[features]
    y = df[targetFeature]
    
    # Repeat for 10 folds
    for train_index, test_index in cv_10.split(X,y):
        print("___________________________________________________________________________ Fold "+str(j+1))
        
        fold_temp = []
        mod_temp = []
        
        # Split the data
        X_train = X.reindex(index = train_index)
        X_test = X.reindex(index = test_index)
        y_train = y.reindex(index = train_index)
        y_test = y.reindex(index = test_index)
        
        for i in range(len(models)):
            
            mod_repeats_temp = []
            
            mod = models[i]          
            model_name = type(mod).__name__
            
            print("____________________________ Model "+model_name)
            
            for i in range (repeats):
                
                mod.fit(X_train, y_train) #Training the model
                acc = accuracy_score(y_test, mod.predict(X_test))
                print(str(i+1)+": "+str(acc))
                mod_repeats_temp.append(acc)
            
            avg_acc = mean(mod_repeats_temp)
            mod_temp.append(avg_acc)
            print("\n Average Accuracy: "+str(avg_acc)+"\n")
            
        fold_temp.append(mod_temp)
            
        accuracies.append(mod_temp)
        j=j+1

    return np.transpose(accuracies)