In [None]:
%matplotlib inline

In [None]:
### Faster SVM run
from sklearnex import patch_sklearn
patch_sklearn()

In [None]:
# Data Analysis Packages
import os
import random
import pandas as pd
import numpy as np
import pathlib
import pickle
import itertools
import collections
from scipy.stats.stats import spearmanr
from scipy.stats.stats import pearsonr
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

# Plot Packages
import seaborn as sns
import matplotlib.pyplot as plt
# from matplotlib.ticker import FixedFormatter
from matplotlib import gridspec 
from pylab import *
#import pylab as pl
from matplotlib.colors import LogNorm
from statannotations.Annotator import Annotator

# ML Packages
from sklearn.svm import SVC
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from scipy.optimize import curve_fit # Fit data


In [None]:
# Font size adjustment
sns.set(font_scale = 1.5)
# Set background
sns.set_style("white")

In [None]:
### Function to process F2a and b
def processExcelFormat01(name, percent=0.5):
    DF = pd.read_csv(name, header = None)
#     columnNames = ['Nutrient', 'Conjugation Inhibitor', 'N10', 'N11', 'N20', 'N21', 'noP', 'P', 'N1']
    columnNames = ['Nutrient', 'Conjugation Inhibitor', 'N0', 'N1']
    DF.columns = columnNames
    # This part needs to be more flexible
    # What if people want N0?
    DF01 = DF[['Nutrient', 'Conjugation Inhibitor', 'N1']]
    # Rename last col as Density
    DF01.columns = [*DF01.columns[:-1], 'Density']
    
    # Reduce to 10*10
    A = list(set(list(DF01['Nutrient'])))
    A.sort()
    B = list(set(list(DF01['Conjugation Inhibitor'])))
    B.sort()

    DF01 = DF01[DF01['Nutrient'].isin(A[::5])]
    DF01 = DF01[DF01['Conjugation Inhibitor'].isin(B[::5])]
    
    DF01['BinaryClass'] = \
    DF01.Density.apply(lambda x: 1 if \
                        x >= DF01.Density.quantile(percent)  # 20th percentile\
                        else -1)
    
    return DF01

In [None]:
### Function to import sensor data
def processExcelFormat02(name, sheet):
    DF = pd.read_excel(name, sheet_name=sheet)

    DF['BinaryClass'] = \
    DF.Density.apply(lambda x: 1 if x >= np.median(DF.iloc[:, -1]) else -1)
    
    return DF

In [None]:
### Function to import Fig 1 modeling data
def processExcelFormat05(name, sheet, percentage = 0):
    DF = pd.read_excel(name, sheet)
    column_name = ['Environmental factor 1', 'Environmental factor 2', 'Density', 'BinaryClass']
    DF.columns = column_name
    
    # Retain the original name
    # Get input features
    inputName = list(DF.columns)[:-2]
    column_name = inputName + ['Density', 'BinaryClass']

    # Reduce to 10*10
    A = list(set(list(DF['Environmental factor 1'])))
    A.sort()
    B = list(set(list(DF['Environmental factor 2'])))
    B.sort()
    
    # Assign binary class, if know the percentage
    if percentage != 0:
        RSorted = sort(DF['Density'])
        BoundaryVal = RSorted[int(percentage*len(RSorted)/10)]
        DF['BinaryClass']=np.where(DF['Density']>BoundaryVal, 1, -1)
        
    return DF

In [None]:
### Function to import experimental data of 3 columns
### No preassigned column/drug names here
### Assign binary boundaried manually here
def processExcelFormat03(name, sheet, quant=0.25):
    DF = pd.read_excel(name, sheet, header = None)
    #print(DF)\n
    column_name = ['Environmental factor 1', 'Environmental factor 2', 'Density']
    DF.columns = column_name
    
    ### How about infinity after log transform
    ### Assign a very small positive value to 0 for log operation
    DF.loc[DF['Environmental factor 1'] == 0, 'Environmental factor 1'] = 0.2
    DF.loc[DF['Environmental factor 2'] == 0, 'Environmental factor 2'] = 0.2
    
    # Log transform data
    DF['Environmental factor 1'] = np.log10(DF['Environmental factor 1'])
    DF['Environmental factor 2'] = np.log10(DF['Environmental factor 2'])
    
    DF['BinaryClass'] = \
    DF.Density.apply(lambda x: 1 if \
                        x >= DF.Density.quantile(quant)  # 20th percentile\
                        else -1)
    
    return DF

In [None]:
### Visualize inputs before or after normalization
### The only mandatory input is the dataframe
def heatmapGenerator(dataframe, values = 0, centered=1, labels=1, colorP="coolwarm"): # 'vlag', 'Spectral'
    ### dataframe: DF that stores all the data
    ### rows: rows of Heatmap; columns: columns of Heatmap; values: values to visualize
    ### digit: # length of labels
    #ax.set_xlim(0, 60)
    
    # Retrieve rows, columns and values columns for heatmap dataframe
    rows = dataframe.columns[1]
    columns = dataframe.columns[0]
    
    # Default is density plot
    if values == 0:
        values = dataframe.columns[2]
        
    # Get the boundary from the sheet
    ### Sort values from both class, get the ones closest to the boundary
    pos_values = dataframe[dataframe['BinaryClass'] > 0][values]
    pos_values_threshold = pos_values.sort_values(ascending=True)

    neg_values = dataframe[dataframe['BinaryClass'] < 0][values]
    neg_values_threshold = neg_values.sort_values(ascending=False)
    # Take avg as the boundary value
    boundary = (pos_values_threshold.iloc[0] + neg_values_threshold.iloc[0])/2

    heatmap_DF = dataframe.pivot(index=rows, columns=columns, values=values)

    fig = plt.figure(figsize=(6,4))
    
    colorPalette = sns.color_palette(colorP, as_cmap=True)
    # sns.diverging_palette(145, 300, s=60, as_cmap=True)
    # sns.color_palette("Spectral", as_cmap=True)
    
    # Colormap centered around the boundary value
    if centered == 1:
        print('here', boundary)
        # Dont add x & y labels or ticks
        ### For Fig 1
        if labels == 0:
            
            g = sns.heatmap(heatmap_DF, cmap=colorPalette, center = boundary,\
                            yticklabels=False, xticklabels=False) #cmap=colorPalette, 
            g.set_ylabel('')
            g.set_xlabel('')
            g.set_title('')

        else: # Add x & y labels & values??
            g = sns.heatmap(heatmap_DF, cmap=colorPalette, center = boundary) #cmap=colorPalette, 
            plt.title(values, fontsize = 15)

    # Colormap not centered
    else:
        print('here, not centered', boundary)
        if labels == 0:
            g = sns.heatmap(heatmap_DF, cmap=colorPalette, \
                            yticklabels=False, xticklabels=False) #cmap=colorPalette,
            g.set_ylabel('')
            g.set_xlabel('')
            g.set_title('')
        else:
            g = sns.heatmap(heatmap_DF, cmap=colorPalette) #, cmap=colorPalette
            plt.title(values, fontsize = 15)
    g.invert_yaxis() # Small value at bottom, same in Matlab
    sns.diverging_palette(220, 20, as_cmap=True)     
    # Aesthestic for x,y ticklabels
    xticklabels = ['{:,.2f}'.format(float(x.get_text())) for x in g.get_xticklabels()]
    yticklabels = ['{:,.2f}'.format(float(y.get_text())) for y in g.get_yticklabels()]
    
    g.set_xticklabels(xticklabels, rotation=30, horizontalalignment='right')
    g.set_yticklabels(yticklabels, rotation=30, horizontalalignment='right')

In [None]:
### Normalize and Split for SVM data
### This fraction here is the number of datapoints
#### Not percentage
def trainingTestSplit(full_df_Ori, n):
    #### Think about alternative scalers
    sc_X = StandardScaler()
    sc_density = StandardScaler()
    
    # Split data into training and testing sets
    TrainDF = full_df_Ori.sample(n = n) #frac = fraction
    TestDF = full_df_Ori[~full_df_Ori.isin(TrainDF)].dropna()

    # Normalize input features
    inputName = list(full_df_Ori.columns)[:-2]
    featureScaler = sc_X.fit(TrainDF[inputName].values)
    scaled_features = featureScaler.transform(TrainDF[inputName].values)
    # Normalize density
    densityScaler = sc_density.fit(TrainDF[['Density']].values)
    scaled_density = densityScaler.transform(TrainDF[['Density']].values)
    
    # Assign normalized training data back
    TrainDF[inputName] = scaled_features
    TrainDF['Density'] = scaled_density
    # Normalize testing set X and density using training scalers
    TestDF[inputName] = featureScaler.transform(TestDF[inputName].values)
    TestDF['Density'] = densityScaler.transform(TestDF[['Density']].values)
    # X & Y for normalized training set
    TrainDF_X = TrainDF[inputName]
    TrainDF_Y = TrainDF['BinaryClass']
    # X & Y for normalized testing set
    TestDF_X = TestDF[inputName]
    TestDF_Y = TestDF['BinaryClass']
    # X & Y for all transformed data, operate on a copy
    # keep the original data intact
    fullDF_copy = full_df_Ori.copy()
    # Transform first
    fullDF_copy[inputName] = featureScaler.transform(fullDF_copy[inputName].values)
    fullDF_copy['Density'] = densityScaler.transform(fullDF_copy[['Density']].values)
    # Then get x & y
    fullDF_X = fullDF_copy[inputName]
    fullDF_Y = fullDF_copy['BinaryClass']
    # Check if we have 2 classes for training
    # In training data too few, we may not have any
    numberOfClasses = len(set(list(TrainDF['BinaryClass'])))
    
    # If only 1 class, recursion until there are 2 classes
    if numberOfClasses == 1: # If not; rerun
        full_df_Ori, fullDF_copy, TrainDF, TestDF, fullDF_X, fullDF_Y,\
        TrainDF_X, TrainDF_Y, TestDF_X, TestDF_Y,\
        sc_X, sc_density = trainingTestSplit(full_df_Ori, n)
        
    ### Use test data in the last step
    # full_df_Ori: nonnormalized one; all others ar normalized
    return full_df_Ori, fullDF_copy, TrainDF, TestDF, fullDF_X, fullDF_Y,\
TrainDF_X, TrainDF_Y, TestDF_X, TestDF_Y, sc_X, sc_density

In [None]:
def svc_rand_param_selection(TrainDF_X, TrainDF_Y, kernelList):
    ### Select best SVM parameters of any number of kernel(s)
    ### TrainDF_X: X training dataset
    ### TrainDF_Y: Y training dataset
    ### kernelList: the list of kernels to try

    # 3 universal parameterx
    Cs = list(10. ** np.arange(-2, 5))
    gammas = list(10. ** np.arange(-4, 2))
    coef0s = list(10. ** np.arange(-4, 4))
    # Create grid parameters to tune: include universal parameters & other parameters
    param_grid = {'C': Cs, 'gamma': gammas+['scale', 'auto'], \
                  'degree':[2,3,4,5], 'kernel': kernelList}

    # Grid search
    if len(TrainDF_X) >= 10:
        grid = RandomizedSearchCV(SVC(), param_grid, refit = True, n_jobs = -2) # , n_jobs = -1
    elif len(TrainDF_X) < 6:
        grid = RandomizedSearchCV(SVC(), param_grid, refit = True, cv = 1, n_jobs = -2)
    else: # If training instances too few, reduce cross validation fold
        grid = RandomizedSearchCV(SVC(), param_grid, refit = True, cv = 2, n_jobs = -2)

    grid.fit(TrainDF_X.values, TrainDF_Y)
    # print the best estimator & best parameters & best_score_ & performance check\n",
    print(grid.best_estimator_, grid.best_params_, grid.best_score_)

    # Return all parameters in best estimator
    return grid.best_estimator_, grid.best_estimator_.C, grid.best_estimator_.gamma, \
grid.best_estimator_.degree, grid.best_estimator_.kernel, grid.best_estimator_.coef0

In [None]:
### For SVM quantitative prediction mainly
### To be used in autoProcess
def Quantitative_prediction(fullDF_X, trainedModel):
    ### fullDF_X: full X input
    ### trainedModel: trained SVM model
    quantitative_list = []
    # Process entry by entry
    for entry in range(len(fullDF_X)):
        currentEntry = array(fullDF_X.iloc[entry]) # current entry
        # Get the current entry's score
        sum2 = 0
        sum2 = float(trainedModel.decision_function(np.array([currentEntry])))
        quantitative_list.append(sum2) # -sum2 for other purpose ??
    return quantitative_list

In [None]:
### SVM autoprocess
### Call after spliting & normalization
### Train SVM 1 time
### Plot SVM density vs. quantitative value
### Stats on testing data
def autoProcess(full_df_Ori, fullDF_copy, TrainDF, fullDF_X, TrainDF_X, TrainDF_Y, \
                TestDF_X, TestDF_Y, kernelList, colName):
    # Select model using training data
    fModel, EstC, EstGamma, EstDegree, EstKernel, EstCoef0 = \
    svc_rand_param_selection(TrainDF_X, TrainDF_Y, kernelList) 
    #     print('Finish model selection')
    # Quantitative prediction on all points
    q_list = Quantitative_prediction(fullDF_X, fModel)
    # Only store on the copy DF
    fullDF_copy[colName] = q_list
    # Locate training data
    trainingIndex = TrainDF.index
    # Locate training set for ploting SVM results
    NewTrainDF = fullDF_copy.loc[trainingIndex]
    NewTestDF = fullDF_copy[~fullDF_copy.isin(NewTrainDF)].dropna()

    # Compare with the testing DF
    spearmanCorrelation = spearmanr(NewTestDF[colName], NewTestDF['Density'])
    #print('Spearman: ', spearmanCorrelation)
    pearsonCorrelation = pearsonr(NewTestDF[colName], NewTestDF['Density'])
    #print('Pearson: ', pearsonCorrelation)
    mse = mean_squared_error(NewTestDF[colName], NewTestDF['Density'], squared=True)
    #print('MSE: ', mse)
    correlation_matrix = np.corrcoef(NewTestDF[colName], NewTestDF['Density'])
    correlation_xy = correlation_matrix[0,1]
    r2Correlation = correlation_xy**2
    
    return fModel, spearmanCorrelation, pearsonCorrelation, r2Correlation, mse

# Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge

In [None]:
### Select best SVM parameters of any number of kernel(s)
### TrainDF_X: X training dataset
### TrainDF_Y: Y training dataset
### kernelList: the list of kernels to try
def poly_rand_param_selection(TrainDF_X, TrainDF_Y, kernelList):
    # possible degrees
    param_rand = [{'poly__degree': [1, 2, 3, 4, 5]}]

    pipeline = Pipeline(steps=[('poly', PolynomialFeatures()), ('ridge', Ridge())])

    if len(TrainDF_X) > 10:
        rand = RandomizedSearchCV(pipeline, param_rand, refit = True, scoring='r2', n_jobs = -1) # , n_jobs = -1
    else:
        rand = RandomizedSearchCV(pipeline, param_rand, refit = True, scoring='r2', cv = 2, n_jobs = -1)
    
    rand.fit(TrainDF_X, TrainDF_Y)
    
    # print the best estimator: all kernel information & best 2 parameters & best_score_ & sanity check
    print(rand.best_estimator_, rand.best_params_, rand.best_score_) 
    
    # Return best estimator
    return rand.best_estimator_

In [None]:
# Run SVR regression once, given training and testing sets
### For both SVCSCR & SVR
### TestY is density for both
### TrainX is 3-variable input for SVCSVR
### TrainY is density for both
### TrainSVRX is 2-variable input for SVR only\n",
def regressionSVR(trainX, trainSVRX, trainY, \
                  testX, testSVRX, testY, \
                  kernelList, Newfull_df_X, sc_density):
    # Weights to be used
    #### Make this tunable too
    weightList = [0.01, 0.1, 1]
    k = 2  
    kf = KFold(n_splits=k, shuffle=True)

    r2WeightList = []
    MSEWeightList = []
    PearsonWeightList = []
    SpearmanWeightList = []
    # May not need this anymore
    r2ModelList = []
    MSEModelList = []
    PearsonModelList = []
    SpearmanModelList = []
    
    # Score for best SVR model
    SVRR2scoreTemp = -1000
    SVRMSEscoreTemp = 200
    SVRPearsonScoreTemp = -2
    SVRSpearmanScoreTemp = -2
    
    SVRbestR2Model = 0
    SVRbestMSEModel = 0
    SVRbestPearsonModel = 0
    SVRbestSpearmanModel = 0

    bestR2Weight = 1
    bestMSEWeight = 1
    bestPearsonWeight = 1
    bestSpearmanWeight = 1
    
    bestR2Model = 0
    bestMSEModel = 0
    bestPearsonModel = 0
    bestSpearmanModel = 0
    

    for i in range(1):    
        if len(trainX) <= 10:
            weightTrainXDF = trainX.sample(frac = 0.7)
        else:
            weightTrainXDF = trainX.sample(frac = 0.8)

        weightTestXDF = trainX.drop(weightTrainXDF.index)
        train_index = weightTrainXDF.index
        test_index = weightTestXDF.index
        TrainY, TestY = trainY.loc[train_index], trainY.loc[test_index]
        TrainYArr = np.array(TrainY)
        TestYArr = np.array(TestY)
        SVRTrainXDF, SVRTestXDF = trainSVRX.loc[train_index], trainSVRX.loc[test_index]

        # Score for best model
        R2scoreTemp = -1000
        MSEscoreTemp = 200
        PearsonScoreTemp = -2
        SpearmanScoreTemp = -2
        R2modelTemp = 0
        MSEmodelTemp = 0
        PearsonmodelTemp = 0
        SpearmanmodelTemp = 0

        # Loop through distances * different weights:
        for w in weightList:
            trainXCopy = weightTrainXDF.copy()
            testXCopy = weightTestXDF.copy()
            # Transform
            trainXCopy['SVM Quant Estimation'] = trainXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            testXCopy['SVM Quant Estimation'] = testXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            # Select best parameters
            model = \
        poly_rand_param_selection(trainXCopy, TrainYArr, kernelList)
            #### Predict with the best model of the current weight
            #### On a slice of the training data
            weightY_pred = model.predict(testXCopy.values)
            predictedDensityInversedBack = sc_density.inverse_transform(weightY_pred.reshape(-1, 1))
            # Invert back testY
            testDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
            #### Select the best weight for all metrics
            # R2
            corrTestDensity = testDensityInversedBack.reshape(len(TestYArr))
            corrPredictedDensity = predictedDensityInversedBack.reshape(len(TestYArr)) 
            correlation_matrix = np.corrcoef(corrTestDensity, corrPredictedDensity)
            correlation_xy = correlation_matrix[0,1]
            R2score = correlation_xy**2
            print('R2: ', R2score)
            if (math.isnan(R2score) == False) & (R2score > R2scoreTemp):
                R2scoreTemp = R2score
                bestR2Model = model
                bestR2Weight = w
            # MSE
            MSEscore = mean_squared_error(corrTestDensity, corrPredictedDensity, squared=True)
            print('MSE: ', MSEscore)
            if (math.isnan(MSEscore) == False) & (MSEscore < MSEscoreTemp):
                MSEscoreTemp = MSEscore
                bestMSEModel = model
                bestMSEWeight = w
#                 print(MSEscoreTemp, bestMSEWeight)
            # Pearson
            PearsonScore = pearsonr(corrTestDensity, corrPredictedDensity)[0]
            print('Pearson: ', PearsonScore)
            if (math.isnan(PearsonScore) == False) & (PearsonScore > PearsonScoreTemp):
                PearsonScoreTemp = PearsonScore
                bestPearsonModel = model
                bestPearsonWeight = w
#                 print(PearsonScoreTemp, bestPearsonWeight)
            # Spearman
            SpearmanScore = spearmanr(corrTestDensity, corrPredictedDensity)[0]
            print('Spearman: ', SpearmanScore)
            #print(SpearmanScore)
            if (math.isnan(SpearmanScore) == False) & (SpearmanScore > SpearmanScoreTemp):
                SpearmanScoreTemp = SpearmanScore
                bestSpearmanModel = model
                bestSpearmanWeight = w

        # Record best weight now for each fold
#         r2ModelList.append(bestR2Model)
        r2WeightList.append(bestR2Weight)
#         MSEModelList.append(bestMSEModel)
        MSEWeightList.append(bestMSEWeight)
#         PearsonModelList.append(bestPearsonModel)
        PearsonWeightList.append(bestPearsonWeight)
#         SpearmanModelList.append(bestSpearmanModel)
        SpearmanWeightList.append(bestSpearmanWeight)

        #### Train SVR only now\n",
        SVRmodel = \
            poly_rand_param_selection(SVRTrainXDF, TrainYArr, kernelList)
        SVRDensity_pred = SVRmodel.predict(SVRTestXDF.values)
#             print(weightY_pred)
        SVRpredictedDensityInversedBack = sc_density.inverse_transform(SVRDensity_pred.reshape(-1, 1))
#             print(weightY_pred.reshape(-1, 1))
#             print(predictedDensityInversedBack)
        # Invert back testY
#             print(weightTestYArr)
        SVRtestDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
#             print(testDensityInversedBack)
        SVRcorrTestDensity = SVRtestDensityInversedBack.reshape(len(TestYArr))
        SVRcorrPredictedDensity = SVRpredictedDensityInversedBack.reshape(len(TestYArr))

        SVRcorrelation_matrix = np.corrcoef(SVRcorrTestDensity, SVRcorrPredictedDensity)
#             print(correlation_matrix)
        SVRcorrelation_xy = SVRcorrelation_matrix[0,1]
        SVRR2score = SVRcorrelation_xy**2

        if SVRR2score > SVRR2scoreTemp:
            SVRR2scoreTemp = SVRR2score
            SVRbestR2Model = SVRmodel
        # MSE
        SVRMSEscore = mean_squared_error(SVRcorrTestDensity, SVRcorrPredictedDensity, squared=True)
        if SVRMSEscore < SVRMSEscoreTemp:
            SVRMSEscoreTemp = SVRMSEscore
            SVRbestMSEModel = SVRmodel
        # Pearson
        SVRPearsonScore = pearsonr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        if SVRPearsonScore > SVRPearsonScoreTemp:
            SVRPearsonScoreTemp = SVRPearsonScore
            SVRbestPearsonModel = SVRmodel
        # Spearman
        SVRSpearmanScore = spearmanr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        #print(SpearmanScore)
        if SVRSpearmanScore > SVRSpearmanScoreTemp:
            SVRSpearmanScoreTemp = SVRSpearmanScore
            SVRbestSpearmanModel = SVRmodel
    
    ### SVCSVR
    bestR2Weight = max(set(r2WeightList), key = r2WeightList.count)
    bestMSEWeight = max(set(MSEWeightList), key = MSEWeightList.count)
    bestPearsonWeight = max(set(PearsonWeightList), key = PearsonWeightList.count)
    bestSpearmanWeight = max(set(SpearmanWeightList), key = SpearmanWeightList.count)

    #### To predict using the best weight
    metricWeightList = [bestR2Weight, bestMSEWeight, bestPearsonWeight, bestSpearmanWeight]
    ### Record SVR models
    SVRModelList = [SVRbestR2Model, SVRbestMSEModel, SVRbestPearsonModel, SVRbestSpearmanModel]
    metricList = ['R2', 'MSE', 'Pearson', 'Spearman']
    # To record final prediction scores
    corrList = []
    predictedList = []
    SVRcorrList = []
    SVRpredictedList = []
    # Loop through the 4 metric
    for i in range(4):
        ### Reserved for final training and testing sets
        finalTrainXCopy = trainX.copy()
        finalTestXCopy = testX.copy()

        finalTrainSVRXCopy = trainSVRX.copy()
        finalTestSVRXCopy = testSVRX.copy()

        weight = metricWeightList[i]
        m = metricList[i]
        finalTrainXCopy['SVM Quant Estimation'] = \
        finalTrainXCopy['SVM Quant Estimation'].apply(lambda x: x*weight)

        finalModel = \
        poly_rand_param_selection(finalTrainXCopy, trainY, kernelList)
        
        # Now predict
        corr, predictedDensity = \
        finalCorrPrediction(finalTestXCopy, testY, weight, finalModel, sc_density, metric = m)
        corrList.append(corr) # Save results
        predictedList.append(predictedDensity)
        
        #### For SVR -- retrain with the best model and predict
        SVRCurrModel = SVRModelList[i]
        if SVRCurrModel != 0:
            SVRCurrModel.fit(finalTrainSVRXCopy.values, trainY)
        else: 
            SVRCurrModel = \
            poly_rand_param_selection(finalTrainSVRXCopy, trainY, kernelList)
        # Predict
        SVRcorr, SVRpredictedDensity = \
        SVROnlyfinalCorrPrediction(finalTestSVRXCopy, testY, 1, SVRCurrModel, sc_density, metric = m)
        SVRcorrList.append(SVRcorr)
        SVRpredictedList.append(SVRpredictedDensity)

    print('SVM: ', corrList) #, predictedList)
    print('SVR: ', SVRcorrList) #, SVRpredictedList)
    R2Corr, MSECorr, PearsonCorr, SpearmanCorr = corrList[0], corrList[1], corrList[2], corrList[3]
    R2Predicted, MSEPredicted, PearsonPredicted, SpearmanPredicted = \
    predictedList[0], predictedList[1], predictedList[2], predictedList[3]
    
    SVRR2Corr, SVRMSECorr, SVRPearsonCorr, SVRSpearmanCorr = \
    SVRcorrList[0], SVRcorrList[1], SVRcorrList[2], SVRcorrList[3]
    SVRR2Predicted, SVRMSEPredicted, SVRPearsonPredicted, SVRSpearmanPredicted = \
    SVRpredictedList[0], SVRpredictedList[1], SVRpredictedList[2], SVRpredictedList[3]

    return bestR2Model, bestR2Weight, R2Corr, R2Predicted, bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, SVRbestSpearmanModel, SVRSpearmanCorr, SVRSpearmanPredicted


In [None]:
def finalCorrPrediction(testX, testY, weight, bestModel, sc_density, metric = 'R2'):
    finalTestX = testX.copy()
    finalTestX['SVM Quant Estimation'] = finalTestX['SVM Quant Estimation'].apply(lambda x: x*weight)
    predictedDensity = bestModel.predict(finalTestX)
    predictedDensityInversedBack = sc_density.inverse_transform(predictedDensity.reshape(-1, 1))
    # Invert back testY
    testYArr = np.array(testY)
    testDensityInversedBack = sc_density.inverse_transform(testYArr.reshape(-1, 1))
    corrTestDensity = testDensityInversedBack.reshape(len(testY))
    corrPredictedDensity = predictedDensityInversedBack.reshape(len(testY))
    
    if metric == 'R2':
        correlation_matrix = np.corrcoef(corrTestDensity, corrPredictedDensity)
        correlation_xy = correlation_matrix[0,1]
        Correlation = correlation_xy**2
    elif metric == 'MSE':
        Correlation = mean_squared_error(corrTestDensity, corrPredictedDensity, squared=True)
    elif metric == 'Pearson':
        Correlation = pearsonr(corrTestDensity, corrPredictedDensity)[0]
    elif metric == 'Spearman':
        Correlation = spearmanr(corrTestDensity, corrPredictedDensity)[0]
        
    return Correlation, predictedDensityInversedBack


In [None]:
def SVROnlyfinalCorrPrediction(testX, testY, weight, bestModel, sc_density, metric = 'R2'):
    # Predict
    predictedDensity = bestModel.predict(testX)
    predictedDensityInversedBack = sc_density.inverse_transform(predictedDensity.reshape(-1, 1))
    # Invert back testY
    testYArr = np.array(testY)
    testDensityInversedBack = sc_density.inverse_transform(testYArr.reshape(-1, 1))

    corrTestDensity = testDensityInversedBack.reshape(len(testY))
    corrPredictedDensity = predictedDensityInversedBack.reshape(len(testY))

    if metric == 'R2':
        correlation_matrix = np.corrcoef(corrTestDensity, corrPredictedDensity)
        correlation_xy = correlation_matrix[0,1]
        Correlation = correlation_xy**2
    elif metric == 'MSE':
        Correlation = mean_squared_error(corrTestDensity, corrPredictedDensity, squared=True)
    elif metric == 'Pearson':
        Correlation = pearsonr(corrTestDensity, corrPredictedDensity)[0]
    elif metric == 'Spearman':
        Correlation = spearmanr(corrTestDensity, corrPredictedDensity)[0]

    return Correlation, predictedDensityInversedBack


In [None]:
### Normalize and Split for SVM & SVR
def ComparisonTrainingTestSplit(full_df_Ori, n): 
    sc_X = StandardScaler()
    sc_density = StandardScaler()

    TrainDF = full_df_Ori.sample(n = n)
#     print(TrainDF)
    TestDF = full_df_Ori[~full_df_Ori.isin(TrainDF)].dropna()
#     print(TestDF)

    # Normalize training inputs
    inputName = list(full_df_Ori.columns)[:-2]
    featureScaler = sc_X.fit(TrainDF[inputName].values)
    scaled_features = featureScaler.transform(TrainDF[inputName].values)
    # Normalize density output
    densityScaler = sc_density.fit(TrainDF[['Density']].values)
    scaled_density = densityScaler.transform(TrainDF[['Density']].values)

    # Assign back
    TrainDF[inputName] = scaled_features
    TrainDF['Density'] = scaled_density
    # Normalize testing set values
    TestDF[inputName] = featureScaler.transform(TestDF[inputName].values)
    TestDF['Density'] = densityScaler.transform(TestDF[['Density']].values)

    # X & Y for normalized training set
    TrainDF_X = TrainDF[inputName]
    TrainDF_Y = TrainDF['BinaryClass']
    TrainDF_Density = TrainDF['Density']
    # X & Y for normalized testing set
    TestDF_X = TestDF[inputName]
    TestDF_Y = TestDF['BinaryClass']
    TestDF_Density = TestDF['Density']

    # X & Y for all transformed data
    fullDF_copy = full_df_Ori.copy()
    # Transform first
    fullDF_copy[inputName] = featureScaler.transform(fullDF_copy[inputName].values)
    fullDF_copy['Density'] = \
    densityScaler.transform(fullDF_copy[['Density']].values)
    # Then get x & y
    fullDF_X = fullDF_copy[inputName]
    fullDF_Y = fullDF_copy['BinaryClass']
    fullDF_Density = fullDF_copy['Density']

    # Check if we have 2 classes for training
    numberOfClasses = len(set(list(TrainDF['BinaryClass'])))
    if numberOfClasses == 1: # If not; rerun
        full_df_Ori, fullDF_copy, TrainDF, TestDF, fullDF_X, fullDF_Y, fullDF_Density, \
        TrainDF_X, TrainDF_Y, TrainDF_Density, TestDF_X, TestDF_Y, TestDF_Density, sc_X, sc_density, \
        = ComparisonTrainingTestSplit(full_df_Ori, n)

    ### Use test data in the last step
    # full_df_Ori: nonnormalized one; all others ar normalized
    return full_df_Ori, fullDF_copy, TrainDF, TestDF, fullDF_X, fullDF_Y, fullDF_Density, \
TrainDF_X, TrainDF_Y, TrainDF_Density, TestDF_X, TestDF_Y, TestDF_Density, sc_X, sc_density

In [None]:
# Run SVM + SVR & SVR once together
def ComparisonRunOnce(excelFile, processExcelFormat, sheet, n, indices, kernelList, \
                      percent = 0, plasmidNum = 1):
    # Load the data
    if processExcelFormat == 1:
        df = processExcelFormat01(excelFile, percent)
    elif processExcelFormat == 2:
        df = processExcelFormat02(excelFile, sheet)
    elif processExcelFormat == 3: 
        df = processExcelFormat03(excelFile, sheet, percent)
    elif processExcelFormat == 4:
        df = processExcelFormat04(excelFile, sheet, percent)
    elif processExcelFormat == 5:
        df = processExcelFormat05(excelFile, sheet, percent)
    elif processExcelFormat == 6:
        df = processExcelFormatMedia(excelFile, sheet)
    elif processExcelFormat == 7:
        df = AndreaDrugScreening(excelFile, sheet)

    # Split the data
    df_Ori, df, dfTrain, dfTest, df_X, df_Y, df_Density, \
    dfTrain_X, dfTrain_Y, dfTrain_Density, dfTest_X, dfTest_Y, dfTest_Density, sc_X, sc_density \
    = ComparisonTrainingTestSplit(df, n)

    trainingIndex = dfTrain.index

    ### Train SVM+SVR one time
    ## Run SVM first
    SVMmodel, spearmanCorrelation, pearsonCorrelation, r2Correlation, mse = \
                autoProcess(df_Ori, df, dfTrain, df_X, dfTrain_X, dfTrain_Y, dfTest_X, dfTest_Y, kernelList,\
                           'SVM Quant Estimation')

    #### Normalize distances
    #### Normalize the ones for training SVC
    sc_distance = StandardScaler()
    # Locate training set
    NewTrainDF = df.loc[trainingIndex]
    distanceScaler = sc_distance.fit(NewTrainDF[['SVM Quant Estimation']].values)
    scaled_newFeatures = distanceScaler.transform(NewTrainDF[['SVM Quant Estimation']].values)
    NewTrainDF[['SVM Quant Estimation']] = scaled_newFeatures

    # Scale all SVC prediction now
    df[['SVM Quant Estimation']] = distanceScaler.transform(df[['SVM Quant Estimation']].values)
    # Get the transformed testing data now
    NewTestDF = df.loc[~df.index.isin(trainingIndex)]

    inputName = list(df_Ori.columns)[:-2]
    newInputName = inputName + ['SVM Quant Estimation']
    # X & Y for all data
    Newfull_df_X = df[newInputName]
    Newfull_df_Y = df['Density']

    # X & Y for training set
    NewTrainDF_X = NewTrainDF[newInputName]
    NewTrainDF_Y = NewTrainDF['Density']
    # X & Y for testing set
    NewTestDF_X = NewTestDF[newInputName]
    NewTestDF_Y = NewTestDF['Density']

    bestR2Model, bestR2Weight, R2Corr, R2Predicted, bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
    bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
    bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
    SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
    SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, SVRbestSpearmanModel, SVRSpearmanCorr, \
    SVRSpearmanPredicted = \
    regressionSVR(NewTrainDF_X, dfTrain_X, NewTrainDF_Y, \
                  NewTestDF_X, dfTest_X, NewTestDF_Y, \
                  kernelList, Newfull_df_X, sc_density)

    return SVMmodel, bestR2Model, bestR2Weight, R2Corr, R2Predicted,\
    bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
    bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
    bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
    SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
    SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, \
    SVRbestSpearmanModel, SVRSpearmanCorr, SVRSpearmanPredicted, \
    NewTrainDF_X, NewTrainDF_Y, Newfull_df_X, Newfull_df_Y, sc_X, sc_density, sc_distance, trainingIndex


In [None]:
def ComparisonMultipleRun(runNum, excelFile, processExcelFormat, sheet, nRange, indices, kernelList,\
                           percent = 0, plasmidNum = 1): # Percent or abs value
    # To record scalers
    scXRecorder = {}
    scDensityRecorder = {}
    # This is just for SVC+SVR
    scDistanceRecorder = {}
    
    # To store corr results for SVM+SVR
    complexSVRSpearmanLists = []
    complexSVRPearsonLists = []
    complexSVRR2Lists = []
    complexSVRMSELists = []
    # To store prediction raw data for SVM+SVR
    complexPredictedSVRSpearmanLists = []
    complexPredictedSVRPearsonLists = []
    complexPredictedSVRR2Lists = []
    complexPredictedSVRMSELists = []
    # To record models
    r2Recorder = {}
    mseRecorder = {}
    pearsonRecorder = {}
    spearmanRecorder = {}

    TrainXDFRecorder = {}
    TrainYDFRecorder = {}
    indexRecorder = {}
    SVMSVRFullXRecorder = {}
    SVMSVRFullYRecorder = {}
    SVRFullXRecorder = {}

    # To store results for SVR
    SVRSpearmanLists = []
    SVRPearsonLists = []
    SVRR2Lists = []
    SVRMSELists = []
    
    PredictedSVRSpearmanLists = []
    PredictedSVRPearsonLists = []
    PredictedSVRR2Lists = []
    PredictedSVRMSELists = []
    
    # To record models
    SVRr2Recorder = {}
    SVRmseRecorder = {}
    SVRpearsonRecorder = {}
    SVRspearmanRecorder = {}
        
    # Loop through all split percentage
    for n in nRange:
        # For both: input and density scaler
        #### To be used later in active learning part
        scXR = {}
        scDensityR = {}
        scDistanceR = {}

        # For SVM + SVR
        r2Recorder[n] = {}
        mseRecorder[n] = {}
        # Recorder dictionaries
        r2R = {}
        mseR = {}
        pearsonR = {}
        spearmanR = {}
        indexR = {}
        TrainXDF = {}
        TrainYDF = {}
        SVMSVRFullXDF = {}
        SVMSVRFullYDF = {}
        SVRFullXDF = {}

        print(n)
        # Corr Score lists
        spearmanList = []
        pearsonList = []
        r2List = []
        mseList = []
        # Raw prediction lists
        PredictedspearmanList = []
        PredictedpearsonList = []
        Predictedr2List = []
        PredictedmseList = []
        
        # For SVR
        SVRr2Recorder[n] = {}
        SVRmseRecorder[n] = {}
        SVRpearsonRecorder[n] = {}
        SVRspearmanRecorder[n] = {}
        # Recorder dictionaries
        SVRr2R = {}
        SVRmseR = {}
        SVRpearsonR = {}
        SVRspearmanR = {}
        # Score lists
        SVRspearmanList = []
        SVRpearsonList = []
        SVRr2List = []
        SVRmseList = []
        
        PredictedSVRspearmanList = []
        PredictedSVRpearsonList = []
        PredictedSVRr2List = []
        PredictedSVRmseList = []

        for it in range(runNum):
            # Run once
            SVMmodel, bestR2Model, bestR2Weight, R2Corr, R2Predicted,\
            bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
            bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
            bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
            SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
            SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, \
            SVRbestSpearmanModel, SVRSpearmanCorr, SVRSpearmanPredicted, \
            NewTrainDF_X, NewTrainDF_Y, Newfull_df_X, Newfull_df_Y, sc_X, sc_density, sc_distance, trainingIndex = \
            ComparisonRunOnce(excelFile, processExcelFormat, sheet, n, indices, kernelList, \
                                   percent, plasmidNum)

            # Record the set of model with its accuracy
            R2bothModel = (SVMmodel, bestR2Model, bestR2Weight)
            r2R[it] = R2bothModel
            MSEbothModel = (SVMmodel, bestMSEModel, bestMSEWeight)
            mseR[it] = MSEbothModel
            PearsonbothModel = (SVMmodel, bestPearsonModel, bestPearsonWeight)
            pearsonR[it] = PearsonbothModel
            SpearmanbothModel = (SVMmodel, bestSpearmanModel, bestSpearmanWeight)
            spearmanR[it] = SpearmanbothModel
            TrainXDF[it] = NewTrainDF_X
            TrainYDF[it] = NewTrainDF_Y
            SVMSVRFullXDF[it] = Newfull_df_X
            SVMSVRFullYDF[it] = Newfull_df_Y
            indexR[it] = trainingIndex

            # Append each correlation with its split to its corresponding list
            spearmanTuple = (n, SpearmanCorr)
            spearmanList.append(spearmanTuple)
            pearsonTuple = (n, PearsonCorr)
            pearsonList.append(pearsonTuple)
            r2Tuple = (n, R2Corr)
            r2List.append(r2Tuple)
            mseTuple = (n, MSECorr)
            mseList.append(mseTuple)
            
            PredictedspearmanTuple = (n, SpearmanPredicted)
            PredictedspearmanList.append(PredictedspearmanTuple)
            PredictedpearsonTuple = (n, PearsonPredicted)
            PredictedpearsonList.append(PredictedpearsonTuple)
            Predictedr2Tuple = (n, R2Predicted)
            Predictedr2List.append(Predictedr2Tuple)
            PredictedmseTuple = (n, MSEPredicted)
            PredictedmseList.append(PredictedmseTuple)

            
            # Results of SVR
            # Record the set of model with its accuracy
            SVRr2R[it] = SVRbestR2Model
            SVRmseR[it] = SVRbestMSEModel
            SVRpearsonR[it] = SVRbestPearsonModel
            SVRspearmanR[it] = SVRbestSpearmanModel

            # Append each correlation with its split to its corresponding list
            SVRspearmanTuple = (n, SVRSpearmanCorr)
            SVRspearmanList.append(SVRspearmanTuple)
            SVRpearsonTuple = (n, SVRPearsonCorr)
            SVRpearsonList.append(SVRpearsonTuple)
            SVRr2Tuple = (n, SVRR2Corr)
            SVRr2List.append(SVRr2Tuple)
            SVRmseTuple = (n, SVRMSECorr)
            SVRmseList.append(SVRmseTuple)
            
            PredictedSVRspearmanTuple = (n, SVRSpearmanPredicted)
            PredictedSVRspearmanList.append(PredictedSVRspearmanTuple)
            PredictedSVRpearsonTuple = (n, SVRPearsonPredicted)
            PredictedSVRpearsonList.append(PredictedSVRpearsonTuple)
            PredictedSVRr2Tuple = (n, SVRR2Predicted)
            PredictedSVRr2List.append(PredictedSVRr2Tuple)
            PredictedSVRmseTuple = (n, SVRMSEPredicted)
            PredictedSVRmseList.append(PredictedSVRmseTuple)
            
            ### Add in the predicted results here
            
            # Record the scaler for both density and input
            scXR[it] = sc_X
            scDensityR[it] = sc_density
            scDistanceR[it] = sc_distance

        r2Recorder[n] = r2R
        mseRecorder[n] = mseR
        pearsonRecorder[n] = pearsonR
        spearmanRecorder[n] = spearmanR
        SVRr2Recorder[n] = SVRr2R
        SVRmseRecorder[n] = SVRmseR
        SVRpearsonRecorder[n] = SVRpearsonR
        SVRspearmanRecorder[n] = SVRspearmanR
        TrainXDFRecorder[n] = TrainXDF
        TrainYDFRecorder[n] = TrainYDF
        SVMSVRFullXRecorder[n] = SVMSVRFullXDF
        SVMSVRFullYRecorder[n] = SVMSVRFullYDF
#         SVRFullXRecorder[n] = SVRFullXDF\n",
        indexRecorder[n] = indexR
        # Record scalers
        scXRecorder[n] = scXR
        scDensityRecorder[n] = scDensityR
        # This is just for SVC+SVR
        scDistanceRecorder[n] = scDistanceR

        #### Need to solve the issue of giving a good name
        # Append list of criteria values to the master list of lists
        # For SVC+SVR
        complexSVRSpearmanLists.append(spearmanList)
        complexSVRPearsonLists.append(pearsonList)
        complexSVRR2Lists.append(r2List)
        complexSVRMSELists.append(mseList)
        # Raw predicted value
        complexPredictedSVRSpearmanLists.append(PredictedspearmanList)
        complexPredictedSVRPearsonLists.append(PredictedpearsonList)
        complexPredictedSVRR2Lists.append(Predictedr2List)
        complexPredictedSVRMSELists.append(PredictedmseList)
        
        # For SVR
        SVRSpearmanLists.append(SVRspearmanList)
        SVRPearsonLists.append(SVRpearsonList)
        SVRR2Lists.append(SVRr2List)
        SVRMSELists.append(SVRmseList)
        # Raw predicted value
        PredictedSVRSpearmanLists.append(PredictedSVRspearmanList)
        PredictedSVRPearsonLists.append(PredictedSVRpearsonList)
        PredictedSVRR2Lists.append(PredictedSVRr2List)
        PredictedSVRMSELists.append(PredictedSVRmseList)
        

    return complexSVRSpearmanLists, complexSVRPearsonLists, complexSVRR2Lists, complexSVRMSELists, \
complexPredictedSVRSpearmanLists, complexPredictedSVRPearsonLists, complexPredictedSVRR2Lists, \
complexPredictedSVRMSELists, \
r2Recorder, mseRecorder, pearsonRecorder, spearmanRecorder, \
SVRSpearmanLists, SVRPearsonLists, SVRR2Lists, SVRMSELists, \
PredictedSVRSpearmanLists, PredictedSVRPearsonLists, PredictedSVRR2Lists, PredictedSVRMSELists, \
SVRr2Recorder, SVRmseRecorder, SVRpearsonRecorder, SVRspearmanRecorder,\
indexRecorder, TrainXDFRecorder, TrainYDFRecorder, \
SVMSVRFullXRecorder, SVMSVRFullYRecorder, scXRecorder, scDensityRecorder, scDistanceRecorder


In [None]:
def ComparisonResultsPlot(ResultLists, regression = 'Poly', position = 'lower right', \
                          xname = '# training data', yname = '$R^2$',\
                          show = 1, ylimit = [0, 1], yline = 0.75):
    resultList1, resultList2 = ResultLists
    SVMSVRFinalList = []
#     print(resultList1)
    for r in resultList1:
        SVMSVRFinalList = SVMSVRFinalList + r
    SVMSVRDF = pd.DataFrame(SVMSVRFinalList)
    columnNames = [xname, 'Results']
    SVMSVRDF.columns = columnNames
    SVMSVRDF['Method'] = 'SVC+'+regression

    # SVR scores
    SVRFinalList = []
    for r in resultList2:
        SVRFinalList = SVRFinalList + r

    SVRDF = pd.DataFrame(SVRFinalList)
    SVRDF.columns = columnNames
    SVRDF['Method'] = regression

    FinalDF = pd.concat([SVMSVRDF, SVRDF])
    p = sns.color_palette("husl", 2)
    sns.boxplot(y='Results', x=xname, 
                data=FinalDF, hue='Method',
                palette=p, saturation=1.0, width=0.4)

    plt.legend(loc=position)

    if yname != "MSE":
        plt.ylim(ylimit)
        if yline != 0:
            plt.axhline(y = yline, color = 'grey', ls = '--', lw = 1.1)   
    else:
        plt.legend(loc='upper right')

    #sns.despine(offset=10, trim=True)
    if show == 1:
        plt.xlabel(xname)
        plt.ylabel(yname)
    else:
        plt.xlabel('')
        plt.ylabel('')

    return FinalDF

In [None]:
def statsPlot(toPlotDF, regression = 'Poly', statsTest = "Mann-Whitney", ylabel = 'Results', show = 1, limit = 0):
    statesName = '# Training data'
    states_order = list(set(list(toPlotDF[statesName])))
    states_order.sort()
    subcat_palette = sns.dark_palette("#8BF", reverse=True, n_colors=3)
    subcat_order = [regression, 'SVC+'+regression]
    # Switching hue and x
    hue_plot_params = {
        'data':      toPlotDF,
        'x':         statesName,
        'y':         ylabel,
        "order":     states_order,
        "hue":       "Method",
        "hue_order": subcat_order,
        "palette":   subcat_palette
    }
    if len(states_order) == 4:
        pairs =(
            [(states_order[0], subcat_order[0]), (states_order[0], subcat_order[1])],

            [(states_order[1], subcat_order[0]), (states_order[1], subcat_order[1])],

            [(states_order[2], subcat_order[0]), (states_order[2], subcat_order[1])],

            [(states_order[3], subcat_order[0]), (states_order[3], subcat_order[1])],
        )
    elif len(states_order) == 3:
        pairs =(
            [(states_order[0], subcat_order[0]), (states_order[0], subcat_order[1])],

            [(states_order[1], subcat_order[0]), (states_order[1], subcat_order[1])],

            [(states_order[2], subcat_order[0]), (states_order[2], subcat_order[1])],
        )
    elif len(states_order) == 5:
        pairs =(
            [(states_order[0], subcat_order[0]), (states_order[0], subcat_order[1])],

            [(states_order[1], subcat_order[0]), (states_order[1], subcat_order[1])],

            [(states_order[2], subcat_order[0]), (states_order[2], subcat_order[1])],

            [(states_order[3], subcat_order[0]), (states_order[3], subcat_order[1])],

            [(states_order[4], subcat_order[0]), (states_order[4], subcat_order[1])],
        )
    elif len(states_order) == 1:
        pairs =(
            [(states_order[0], subcat_order[0]), (states_order[0], subcat_order[1])],
        )
    elif len(states_order) == 2:
        pairs =(
            [(states_order[0], subcat_order[0]), (states_order[0], subcat_order[1])],

            [(states_order[1], subcat_order[0]), (states_order[1], subcat_order[1])],
        )
    with sns.plotting_context("notebook", font_scale = 1.4):
        sns.set(font_scale = 4)
        sns.set_style("white")
        # Create new plot
        figsize = (8, 5)
        fig, ax = plt.subplots(1, 1, figsize=figsize)
        # Plot with seaborn
        ax = sns.boxplot(ax=ax, **hue_plot_params)
        # Add stats annotations
        annotator = Annotator(ax, pairs, **hue_plot_params)
        annotator.configure(test = statsTest, verbose=False)
        _, results = annotator.apply_and_annotate()

        if show == 1:
            ax.set_xlabel(statesName, fontsize=40)
            ax.set_ylabel(ylabel, fontsize=40)
        else:
            ax.set_xlabel(" ")
            ax.set_ylabel(" ")
            
        plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left', borderaxespad=0)
#         if limit == 1:
        plt.ylim([0, 1])
        plt.show()

In [None]:
def scatterComparisonPlot(DF):
    DFHeader = list(DF.columns)
    FixedName = ['# Training data', 'Method']
    resultsName = [i for i in DFHeader if i not in FixedName]
    resultsName = [i for i in resultsName if i != 'Unnamed: 0']
    print(resultsName)

    DF_SVC = DF[DF['Method'] == 'SVC+SVR']
    DF_SVR = DF[DF['Method'] == 'SVR']

    # Get all different numbers of training data
    trainingDataType = sorted(list(set(DF_SVC['# Training data'])))

    DF_SVC_1 = DF_SVC[DF_SVC['# Training data'] == trainingDataType[0]]
    DF_SVR_1 = DF_SVR[DF_SVR['# Training data'] == trainingDataType[0]]


    # Create new plot
    figsize = (5, 5)
    # Create new plot
    fig, ax = plt.subplots(1, 1, figsize=figsize)

    s1 = plt.scatter(DF_SVR_1[resultsName], DF_SVC_1[resultsName], alpha = 0.7, color = 'cyan', \
                     label = str(trainingDataType[0])+' training data')
    ax.legend(handles=[s1], bbox_to_anchor = (1.7, 1.0), loc='upper right') #, s3

    # At most include 5 different types
    if len(trainingDataType) != 1:
        DF_SVC_2 = DF_SVC[DF_SVC['# Training data'] == trainingDataType[1]]
        DF_SVR_2 = DF_SVR[DF_SVR['# Training data'] == trainingDataType[1]]
        s2 = plt.scatter(DF_SVR_2[resultsName], DF_SVC_2[resultsName], alpha = 0.9, color = 'olive', \
                     label = str(trainingDataType[1])+' training data')
        if len(trainingDataType) == 2:           
            ax.legend(handles=[s1, s2], bbox_to_anchor = (1.7, 1.0), loc='upper right') #, s3
            
        else:
            DF_SVC_3 = DF_SVC[DF_SVC['# Training data'] == trainingDataType[2]]
            DF_SVR_3 = DF_SVR[DF_SVR['# Training data'] == trainingDataType[2]]
            s3 = plt.scatter(DF_SVR_3[resultsName], DF_SVC_3[resultsName], alpha = 0.9, color = 'brown', \
                         label = str(trainingDataType[2])+' training data')
            if len(trainingDataType) == 3:
                ax.legend(handles=[s1, s2, s3], bbox_to_anchor = (1.7, 1.0), loc='upper right')
                
            else:
                DF_SVC_4 = DF_SVC[DF_SVC['# Training data'] == trainingDataType[3]]
                DF_SVR_4 = DF_SVR[DF_SVR['# Training data'] == trainingDataType[3]]
                s4 = plt.scatter(DF_SVR_4[resultsName], DF_SVC_4[resultsName], alpha = 0.7, color = 'tomato', \
                             label = str(trainingDataType[3])+' training data')
                if len(trainingDataType) == 4:  
                    ax.legend(handles=[s1, s2, s3, s4], bbox_to_anchor = (1.7, 1.0), loc='upper right')
                else:
                    DF_SVC_5 = DF_SVC[DF_SVC['# Training data'] == trainingDataType[4]]
                    DF_SVR_5 = DF_SVR[DF_SVR['# Training data'] == trainingDataType[4]]
                    s5 = plt.scatter(DF_SVR_5[resultsName], DF_SVC_5[resultsName], alpha = 0.7, color = 'red', \
                                 label = str(trainingDataType[4])+' training data')
                    if len(trainingDataType) == 5:
                        ax.legend(handles=[s1, s2, s3, s4, s5], bbox_to_anchor = (1.7, 1.0), loc='upper right')

    plt.ylim([0,1])
    plt.xlim([0,1])
    
    plt.tick_params(axis='x',which='major',direction='out',length=5,\
           pad=10,labelsize=25)
    plt.tick_params(axis='y',which='major',direction='out',length=5,\
           pad=10,labelsize=25)

    x = np.linspace(0, 1)
    ax.plot(x, x, 'grey', linewidth=2)

    plt.xlabel('SVR Prediction', fontsize=25)
    plt.ylabel('SVC+SVR Prediction', fontsize=25)
#     plt.title('Methods accuracy comparison', fontsize=25)

In [None]:
TFTFB = processExcelFormat03('HelenaTFTFBdensity.xlsx', 'Sheet1', 0.4)
heatmapGenerator(TFTFB, 'BinaryClass')

In [None]:
heatmapGenerator(TFTFB)

In [None]:
complexSVRSpearmanLists_TFTFB_Poly, complexSVRPearsonLists_TFTFB_Poly, \
complexSVRR2Lists_TFTFB_Poly, complexSVRMSELists_TFTFB_Poly, \
complexPredictedSVRSpearmanLists_TFTFB_Poly, complexPredictedSVRPearsonLists_TFTFB_Poly, \
complexPredictedSVRR2Lists_TFTFB_Poly, complexPredictedSVRMSELists_TFTFB_Poly, \
r2Recorder_TFTFB_Poly, mseRecorder_TFTFB_Poly, pearsonRecorder_TFTFB_Poly, spearmanRecorder_TFTFB_Poly, \
SVRSpearmanLists_TFTFB_Poly, SVRPearsonLists_TFTFB_Poly, SVRR2Lists_TFTFB_Poly, SVRMSELists_TFTFB_Poly, \
PredictedSVRSpearmanLists_TFTFB_Poly, PredictedSVRPearsonLists_TFTFB_Poly, \
PredictedSVRR2Lists_TFTFB_Poly, PredictedSVRMSELists_TFTFB_Poly, \
SVRr2Recorder_TFTFB_Poly, SVRmseRecorder_TFTFB_Poly, SVRpearsonRecorder_TFTFB_Poly, SVRspearmanRecorder_TFTFB_Poly,\
indexRecorder_TFTFB_Poly, TrainXDFRecorder_TFTFB_Poly, TrainYDFRecorder_TFTFB_Poly, \
SVMSVRFullXDFRecorder_TFTFB_Poly, SVMSVRFullYDFRecorder_TFTFB_Poly, scXRecorder_TFTFB_Poly, \
scDensityRecorder_TFTFB_Poly, scDistanceRecorder_TFTFB_Poly = \
ComparisonMultipleRun(30, 'HelenaTFTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30, 40, 50], 0, ['rbf'],\
                     0.4)

In [None]:
TFTFB = ComparisonResultsPlot([complexSVRR2Lists_TFTFB_Poly, SVRR2Lists_TFTFB_Poly], 'Poly',\
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFB, 'Poly')

In [None]:
DAHBCLA = processExcelFormat03('HelenaExpDAHBCLA.xlsx', 'Sheet1', 0.5)
heatmapGenerator(DAHBCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAHBCLA)

In [None]:
complexSVRSpearmanLists_DAHBCLA, complexSVRPearsonLists_DAHBCLA, complexSVRR2Lists_DAHBCLA, complexSVRMSELists_DAHBCLA, \
complexPredictedSVRSpearmanLists_DAHBCLA, complexPredictedSVRPearsonLists_DAHBCLA, complexPredictedSVRR2Lists_DAHBCLA, \
complexPredictedSVRMSELists_DAHBCLA, \
r2Recorder_DAHBCLA, mseRecorder_DAHBCLA, pearsonRecorder_DAHBCLA, spearmanRecorder_DAHBCLA, \
SVRSpearmanLists_DAHBCLA, SVRPearsonLists_DAHBCLA, SVRR2Lists_DAHBCLA, SVRMSELists_DAHBCLA, \
PredictedSVRSpearmanLists_DAHBCLA, PredictedSVRPearsonLists_DAHBCLA, \
PredictedSVRR2Lists_DAHBCLA, PredictedSVRMSELists_DAHBCLA, \
SVRr2Recorder_DAHBCLA, SVRmseRecorder_DAHBCLA, SVRpearsonRecorder_DAHBCLA, SVRspearmanRecorder_DAHBCLA,\
indexRecorder_DAHBCLA, TrainXDFRecorder_DAHBCLA, TrainYDFRecorder_DAHBCLA, \
SVMSVRFullXDFRecorder_DAHBCLA, SVMSVRFullYDFRecorder_DAHBCLA, scXRecorder_DAHBCLA, \
scDensityRecorder_DAHBCLA, scDistanceRecorder_DAHBCLA = \
ComparisonMultipleRun(40, 'HelenaExpDAHBCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.5) #, 'sigmoid', 'rbf'

In [None]:
DAHBCLA = ComparisonResultsPlot([complexSVRR2Lists_DAHBCLA, SVRR2Lists_DAHBCLA], 'Poly', \
                                position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBCLA, 'Poly')

In [None]:
DAHMCLA = processExcelFormat03('HelenaExpDAHMCLA.xlsx', 'Sheet1', 0.6)
heatmapGenerator(DAHMCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAHMCLA)

In [None]:
complexSVRSpearmanLists_DAHMCLA, complexSVRPearsonLists_DAHMCLA, complexSVRR2Lists_DAHMCLA, complexSVRMSELists_DAHMCLA, \
complexPredictedSVRSpearmanLists_DAHMCLA, complexPredictedSVRPearsonLists_DAHMCLA, complexPredictedSVRR2Lists_DAHMCLA, \
complexPredictedSVRMSELists_DAHMCLA, \
r2Recorder_DAHMCLA, mseRecorder_DAHMCLA, pearsonRecorder_DAHMCLA, spearmanRecorder_DAHMCLA, \
SVRSpearmanLists_DAHMCLA, SVRPearsonLists_DAHMCLA, SVRR2Lists_DAHMCLA, SVRMSELists_DAHMCLA, \
PredictedSVRSpearmanLists_DAHMCLA, PredictedSVRPearsonLists_DAHMCLA, \
PredictedSVRR2Lists_DAHMCLA, PredictedSVRMSELists_DAHMCLA, \
SVRr2Recorder_DAHMCLA, SVRmseRecorder_DAHMCLA, SVRpearsonRecorder_DAHMCLA, SVRspearmanRecorder_DAHMCLA,\
indexRecorder_DAHMCLA, TrainXDFRecorder_DAHMCLA, TrainYDFRecorder_DAHMCLA, \
SVMSVRFullXDFRecorder_DAHMCLA, SVMSVRFullYDFRecorder_DAHMCLA, scXRecorder_DAHMCLA, \
scDensityRecorder_DAHMCLA, scDistanceRecorder_DAHMCLA = \
ComparisonMultipleRun(40, 'HelenaExpDAHMCLA.xlsx',  3, 'Sheet1', [10, 20, 30, 40,50], 0, ['rbf'],\
                     0.6)

In [None]:
DAHMCLA = ComparisonResultsPlot([complexSVRR2Lists_DAHMCLA, SVRR2Lists_DAHMCLA], 'Poly', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHMCLA, 'Poly')

In [None]:
DACLA = processExcelFormat03('HelenaExpDACLA.xlsx', 'Sheet1', 0.8)
heatmapGenerator(DACLA, 'BinaryClass')

In [None]:
heatmapGenerator(DACLA)

In [None]:
complexSVRSpearmanLists_DACLA, complexSVRPearsonLists_DACLA, complexSVRR2Lists_DACLA, complexSVRMSELists_DACLA, \
complexPredictedSVRSpearmanLists_DACLA, complexPredictedSVRPearsonLists_DACLA, complexPredictedSVRR2Lists_DACLA, \
complexPredictedSVRMSELists_DACLA, \
r2Recorder_DACLA, mseRecorder_DACLA, pearsonRecorder_DACLA, spearmanRecorder_DACLA, \
SVRSpearmanLists_DACLA, SVRPearsonLists_DACLA, SVRR2Lists_DACLA, SVRMSELists_DACLA, \
PredictedSVRSpearmanLists_DACLA, PredictedSVRPearsonLists_DACLA, \
PredictedSVRR2Lists_DACLA, PredictedSVRMSELists_DACLA, \
SVRr2Recorder_DACLA, SVRmseRecorder_DACLA, SVRpearsonRecorder_DACLA, SVRspearmanRecorder_DACLA,\
indexRecorder_DACLA, TrainXDFRecorder_DACLA, TrainYDFRecorder_DACLA, \
SVMSVRFullXDFRecorder_DACLA, SVMSVRFullYDFRecorder_DACLA, scXRecorder_DACLA, \
scDensityRecorder_DACLA, scDistanceRecorder_DACLA = \
ComparisonMultipleRun(30, 'HelenaExpDAMCLA.xlsx',  3, 'Sheet1', [10, 20, 30, 40,50], 0, ['rbf'],\
                     0.8) #, 'sigmoid', 'rbf'

In [None]:
DACLA = ComparisonResultsPlot([complexSVRR2Lists_DACLA, SVRR2Lists_DACLA], 'Poly', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DACLA, 'Poly')

In [None]:
DAHBSUL = processExcelFormat03('HelenaExpDAHBSUL.xlsx', 'Sheet1', 0.4)
heatmapGenerator(DAHBSUL, 'BinaryClass')

In [None]:
heatmapGenerator(DAHBSUL)

In [None]:
complexSVRSpearmanLists_DAHBSUL, complexSVRPearsonLists_DAHBSUL, complexSVRR2Lists_DAHBSUL, complexSVRMSELists_DAHBSUL, \
complexPredictedSVRSpearmanLists_DAHBSUL, complexPredictedSVRPearsonLists_DAHBSUL, complexPredictedSVRR2Lists_DAHBSUL, \
complexPredictedSVRMSELists_DAHBSUL, \
r2Recorder_DAHBSUL, mseRecorder_DAHBSUL, pearsonRecorder_DAHBSUL, spearmanRecorder_DAHBSUL, \
SVRSpearmanLists_DAHBSUL, SVRPearsonLists_DAHBSUL, SVRR2Lists_DAHBSUL, SVRMSELists_DAHBSUL, \
PredictedSVRSpearmanLists_DAHBSUL, PredictedSVRPearsonLists_DAHBSUL, \
PredictedSVRR2Lists_DAHBSUL, PredictedSVRMSELists_DAHBSUL, \
SVRr2Recorder_DAHBSUL, SVRmseRecorder_DAHBSUL, SVRpearsonRecorder_DAHBSUL, SVRspearmanRecorder_DAHBSUL,\
indexRecorder_DAHBSUL, TrainXDFRecorder_DAHBSUL, TrainYDFRecorder_DAHBSUL, \
SVMSVRFullXDFRecorder_DAHBSUL, SVMSVRFullYDFRecorder_DAHBSUL, scXRecorder_DAHBSUL, \
scDensityRecorder_DAHBSUL, scDistanceRecorder_DAHBSUL = \
ComparisonMultipleRun(30, 'HelenaExpDAHBSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.4) #, 'sigmoid', 'rbf'

In [None]:
DAHBSUL = ComparisonResultsPlot([complexSVRR2Lists_DAHBSUL, SVRR2Lists_DAHBSUL], 'Poly', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBSUL, 'Poly')

In [None]:
DABSUL = processExcelFormat03('HelenaExpDABSUL.xlsx', 'Sheet1', 0.1)
heatmapGenerator(DABSUL, 'BinaryClass')

In [None]:
heatmapGenerator(DABSUL)

In [None]:
complexSVRSpearmanLists_DABSUL, complexSVRPearsonLists_DABSUL, complexSVRR2Lists_DABSUL, complexSVRMSELists_DABSUL, \
complexPredictedSVRSpearmanLists_DABSUL, complexPredictedSVRPearsonLists_DABSUL, complexPredictedSVRR2Lists_DABSUL, \
complexPredictedSVRMSELists_DABSUL, \
r2Recorder_DABSUL, mseRecorder_DABSUL, pearsonRecorder_DABSUL, spearmanRecorder_DABSUL, \
SVRSpearmanLists_DABSUL, SVRPearsonLists_DABSUL, SVRR2Lists_DABSUL, SVRMSELists_DABSUL, \
PredictedSVRSpearmanLists_DABSUL, PredictedSVRPearsonLists_DABSUL, \
PredictedSVRR2Lists_DABSUL, PredictedSVRMSELists_DABSUL, \
SVRr2Recorder_DABSUL, SVRmseRecorder_DABSUL, SVRpearsonRecorder_DABSUL, SVRspearmanRecorder_DABSUL,\
indexRecorder_DABSUL, TrainXDFRecorder_DABSUL, TrainYDFRecorder_DABSUL, \
SVMSVRFullXDFRecorder_DABSUL, SVMSVRFullYDFRecorder_DABSUL, scXRecorder_DABSUL, \
scDensityRecorder_DABSUL, scDistanceRecorder_DABSUL = \
ComparisonMultipleRun(30, 'HelenaExpDABSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.1) #, 'sigmoid', 'rbf'

In [None]:
DABSUL = ComparisonResultsPlot([complexSVRR2Lists_DABSUL, SVRR2Lists_DABSUL], 'Poly', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABSUL, 'Poly')

In [None]:
DABCLA = processExcelFormat03('HelenaExpDABCLA.xlsx', 'Sheet1', 0.3)
heatmapGenerator(DABCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DABCLA)

In [None]:
complexSVRSpearmanLists_DABCLA, complexSVRPearsonLists_DABCLA, complexSVRR2Lists_DABCLA, complexSVRMSELists_DABCLA, \
complexPredictedSVRSpearmanLists_DABCLA, complexPredictedSVRPearsonLists_DABCLA, complexPredictedSVRR2Lists_DABCLA, \
complexPredictedSVRMSELists_DABCLA, \
r2Recorder_DABCLA, mseRecorder_DABCLA, pearsonRecorder_DABCLA, spearmanRecorder_DABCLA, \
SVRSpearmanLists_DABCLA, SVRPearsonLists_DABCLA, SVRR2Lists_DABCLA, SVRMSELists_DABCLA, \
PredictedSVRSpearmanLists_DABCLA, PredictedSVRPearsonLists_DABCLA, \
PredictedSVRR2Lists_DABCLA, PredictedSVRMSELists_DABCLA, \
SVRr2Recorder_DABCLA, SVRmseRecorder_DABCLA, SVRpearsonRecorder_DABCLA, SVRspearmanRecorder_DABCLA,\
indexRecorder_DABCLA, TrainXDFRecorder_DABCLA, TrainYDFRecorder_DABCLA, \
SVMSVRFullXDFRecorder_DABCLA, SVMSVRFullYDFRecorder_DABCLA, scXRecorder_DABCLA, \
scDensityRecorder_DABCLA, scDistanceRecorder_DABCLA = \
ComparisonMultipleRun(30, 'HelenaExpDABCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.3) #, 'sigmoid', 'rbf'

In [None]:
DABCLA = ComparisonResultsPlot([complexSVRR2Lists_DABCLA, SVRR2Lists_DABCLA], 'Poly', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABCLA, 'Poly')

In [None]:
TFHMdensity = processExcelFormat03('HelenaExpTFHMdensity.xlsx', 'Sheet1', 0.7)
heatmapGenerator(TFHMdensity, 'BinaryClass')

In [None]:
heatmapGenerator(TFHMdensity)

In [None]:
complexSVRSpearmanLists_TFHMdensity, complexSVRPearsonLists_TFHMdensity, \
complexSVRR2Lists_TFHMdensity, complexSVRMSELists_TFHMdensity, \
complexPredictedSVRSpearmanLists_TFHMdensity, complexPredictedSVRPearsonLists_TFHMdensity, \
complexPredictedSVRR2Lists_TFHMdensity, complexPredictedSVRMSELists_TFHMdensity, \
r2Recorder_TFHMdensity, mseRecorder_TFHMdensity, pearsonRecorder_TFHMdensity, spearmanRecorder_TFHMdensity, \
SVRSpearmanLists_TFHMdensity, SVRPearsonLists_TFHMdensity, SVRR2Lists_TFHMdensity, SVRMSELists_TFHMdensity, \
PredictedSVRSpearmanLists_TFHMdensity, PredictedSVRPearsonLists_TFHMdensity, \
PredictedSVRR2Lists_TFHMdensity, PredictedSVRMSELists_TFHMdensity, \
SVRr2Recorder_TFHMdensity, SVRmseRecorder_TFHMdensity, SVRpearsonRecorder_TFHMdensity, SVRspearmanRecorder_TFHMdensity,\
indexRecorder_TFHMdensity, TrainXDFRecorder_TFHMdensity, TrainYDFRecorder_TFHMdensity, \
SVMSVRFullXDFRecorder_TFHMdensity, SVMSVRFullYDFRecorder_TFHMdensity, scXRecorder_TFHMdensity, \
scDensityRecorder_TFHMdensity, scDistanceRecorder_TFHMdensity = \
ComparisonMultipleRun(30, 'HelenaExpTFHMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.7) #, 'sigmoid', 'rbf'

In [None]:
TFHMdensity = ComparisonResultsPlot([complexSVRR2Lists_TFHMdensity, SVRR2Lists_TFHMdensity], 'Poly', \
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFHMdensity, 'Poly')

In [None]:
TFBdensity = processExcelFormat03('HelenaExpTFBdensity.xlsx', 'Sheet1', 0.4)
heatmapGenerator(TFBdensity, 'BinaryClass')

In [None]:
heatmapGenerator(TFBdensity)

In [None]:
complexSVRSpearmanLists_TFBdensity, complexSVRPearsonLists_TFBdensity, \
complexSVRR2Lists_TFBdensity, complexSVRMSELists_TFBdensity, \
complexPredictedSVRSpearmanLists_TFBdensity, complexPredictedSVRPearsonLists_TFBdensity, \
complexPredictedSVRR2Lists_TFBdensity, complexPredictedSVRMSELists_TFBdensity, \
r2Recorder_TFBdensity, mseRecorder_TFBdensity, pearsonRecorder_TFBdensity, spearmanRecorder_TFBdensity, \
SVRSpearmanLists_TFBdensity, SVRPearsonLists_TFBdensity, SVRR2Lists_TFBdensity, SVRMSELists_TFBdensity, \
PredictedSVRSpearmanLists_TFBdensity, PredictedSVRPearsonLists_TFBdensity, \
PredictedSVRR2Lists_TFBdensity, PredictedSVRMSELists_TFBdensity, \
SVRr2Recorder_TFBdensity, SVRmseRecorder_TFBdensity, SVRpearsonRecorder_TFBdensity, SVRspearmanRecorder_TFBdensity,\
indexRecorder_TFBdensity, TrainXDFRecorder_TFBdensity, TrainYDFRecorder_TFBdensity, \
SVMSVRFullXDFRecorder_TFBdensity, SVMSVRFullYDFRecorder_TFBdensity, scXRecorder_TFBdensity, \
scDensityRecorder_TFBdensity, scDistanceRecorder_TFBdensity = \
ComparisonMultipleRun(30, 'HelenaExpTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.4) #, 'sigmoid', 'rbf'

In [None]:
TFBdensity = ComparisonResultsPlot([complexSVRR2Lists_TFBdensity, SVRR2Lists_TFBdensity], 'Poly',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFBdensity, 'Poly')

In [None]:
TFHBdensity = processExcelFormat03('HelenaExpTFHBdensity.xlsx', 'Sheet1', 0.6)
heatmapGenerator(TFHBdensity, 'BinaryClass')

In [None]:
heatmapGenerator(TFHBdensity)

In [None]:
complexSVRSpearmanLists_TFHBdensity, complexSVRPearsonLists_TFHBdensity, \
complexSVRR2Lists_TFHBdensity, complexSVRMSELists_TFHBdensity, \
complexPredictedSVRSpearmanLists_TFHBdensity, complexPredictedSVRPearsonLists_TFHBdensity, \
complexPredictedSVRR2Lists_TFHBdensity, complexPredictedSVRMSELists_TFHBdensity, \
r2Recorder_TFHBdensity, mseRecorder_TFHBdensity, pearsonRecorder_TFHBdensity, spearmanRecorder_TFHBdensity, \
SVRSpearmanLists_TFHBdensity, SVRPearsonLists_TFHBdensity, SVRR2Lists_TFHBdensity, SVRMSELists_TFHBdensity, \
PredictedSVRSpearmanLists_TFHBdensity, PredictedSVRPearsonLists_TFHBdensity, \
PredictedSVRR2Lists_TFHBdensity, PredictedSVRMSELists_TFHBdensity, \
SVRr2Recorder_TFHBdensity, SVRmseRecorder_TFHBdensity, SVRpearsonRecorder_TFHBdensity, SVRspearmanRecorder_TFHBdensity,\
indexRecorder_TFHBdensity, TrainXDFRecorder_TFHBdensity, TrainYDFRecorder_TFHBdensity, \
SVMSVRFullXDFRecorder_TFHBdensity, SVMSVRFullYDFRecorder_TFHBdensity, scXRecorder_TFHBdensity, \
scDensityRecorder_TFHBdensity, scDistanceRecorder_TFHBdensity = \
ComparisonMultipleRun(30, 'HelenaExpTFHBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.6) #, 'sigmoid', 'rbf'

In [None]:
TFHBdensity = ComparisonResultsPlot([complexSVRR2Lists_TFHBdensity, SVRR2Lists_TFHBdensity], 'Poly', \
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFHBdensity, 'Poly')

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
### Select best degrees
### X_train: X training dataset
### Y_train: Y training dataset
### X_test: X testing dataset
### Y_test: Y testing dataset
def rf_rand_param_selection(TrainDF_X, TrainDF_Y, kernelList):
    # possible parameters to tune
    param_rand = [{'n_estimators': [5, 10, 15, 20], \
                   'max_depth': [1, 2, 3, 4, 5], \
                   'max_features': ['auto', 'sqrt']}] 

    if len(TrainDF_X) > 10:
        rand = RandomizedSearchCV(RandomForestRegressor(), param_rand, refit = True, scoring='r2', n_jobs = -1) # , n_jobs = -1
    else:
        rand = RandomizedSearchCV(RandomForestRegressor(), param_rand, refit = True, scoring='r2', cv = 2, n_jobs = -1)
    
    rand.fit(TrainDF_X, TrainDF_Y)
    
    # print the best estimator: all kernel information & best 2 parameters & best_score_ & sanity check
    print(rand.best_estimator_, rand.best_params_, rand.best_score_)
    
    # Return best estimator
    return rand.best_estimator_

In [None]:
# Run SVR regression once, given training and testing sets
### For both SVCSCR & SVR
### TestY is density for both
### TrainX is 3-variable input for SVCSVR
### TrainY is density for both
### TrainSVRX is 2-variable input for SVR only
def regressionSVR(trainX, trainSVRX, trainY, \
                  testX, testSVRX, testY, \
                  kernelList, Newfull_df_X, sc_density):
    # Weights to be used
    #### Make this tunable too
    weightList = [0.01, 0.1, 1]
    k = 2  
    kf = KFold(n_splits=k, shuffle=True)

    r2WeightList = []
    MSEWeightList = []
    PearsonWeightList = []
    SpearmanWeightList = []
    # May not need this anymore
    r2ModelList = []
    MSEModelList = []
    PearsonModelList = []
    SpearmanModelList = []
    
    # Score for best SVR model
    SVRR2scoreTemp = -1000
    SVRMSEscoreTemp = 200
    SVRPearsonScoreTemp = -2
    SVRSpearmanScoreTemp = -2
    
    SVRbestR2Model = 0
    SVRbestMSEModel = 0
    SVRbestPearsonModel = 0
    SVRbestSpearmanModel = 0

    bestR2Weight = 1
    bestMSEWeight = 1
    bestPearsonWeight = 1
    bestSpearmanWeight = 1
    
    bestR2Model = 0
    bestMSEModel = 0
    bestPearsonModel = 0
    bestSpearmanModel = 0
    

    for i in range(1):    
        if len(trainX) <= 10:
            weightTrainXDF = trainX.sample(frac = 0.7)
        else:
            weightTrainXDF = trainX.sample(frac = 0.8)

        weightTestXDF = trainX.drop(weightTrainXDF.index)
        train_index = weightTrainXDF.index
        test_index = weightTestXDF.index
        TrainY, TestY = trainY.loc[train_index], trainY.loc[test_index]
        TrainYArr = np.array(TrainY)
        TestYArr = np.array(TestY)
        SVRTrainXDF, SVRTestXDF = trainSVRX.loc[train_index], trainSVRX.loc[test_index]

        # Score for best model
        R2scoreTemp = -1000
        MSEscoreTemp = 200
        PearsonScoreTemp = -2
        SpearmanScoreTemp = -2
        R2modelTemp = 0
        MSEmodelTemp = 0
        PearsonmodelTemp = 0
        SpearmanmodelTemp = 0

        # Loop through distances * different weights:
        for w in weightList:
            trainXCopy = weightTrainXDF.copy()
            testXCopy = weightTestXDF.copy()
            # Transform
            trainXCopy['SVM Quant Estimation'] = trainXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            testXCopy['SVM Quant Estimation'] = testXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            # Select best parameters
            model = \
        rf_rand_param_selection(trainXCopy, TrainYArr, kernelList)
            #### Predict with the best model of the current weight
            weightY_pred = model.predict(testXCopy.values)
            predictedDensityInversedBack = sc_density.inverse_transform(weightY_pred.reshape(-1, 1))
            # Invert back testY
            testDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
            #### Select the best weight for all metrics
            # R2
            corrTestDensity = testDensityInversedBack.reshape(len(TestYArr))
            corrPredictedDensity = predictedDensityInversedBack.reshape(len(TestYArr)) 
            correlation_matrix = np.corrcoef(corrTestDensity, corrPredictedDensity)
            correlation_xy = correlation_matrix[0,1]
            R2score = correlation_xy**2
            print('R2: ', R2score)
            if (math.isnan(R2score) == False) & (R2score > R2scoreTemp):
                R2scoreTemp = R2score
                bestR2Model = model
                bestR2Weight = w
            # MSE
            MSEscore = mean_squared_error(corrTestDensity, corrPredictedDensity, squared=True)
            print('MSE: ', MSEscore)
            if (math.isnan(MSEscore) == False) & (MSEscore < MSEscoreTemp):
                MSEscoreTemp = MSEscore
                bestMSEModel = model
                bestMSEWeight = w
#                 print(MSEscoreTemp, bestMSEWeight)
            # Pearson
            PearsonScore = pearsonr(corrTestDensity, corrPredictedDensity)[0]
            print('Pearson: ', PearsonScore)
            if (math.isnan(PearsonScore) == False) & (PearsonScore > PearsonScoreTemp):
                PearsonScoreTemp = PearsonScore
                bestPearsonModel = model
                bestPearsonWeight = w
#                 print(PearsonScoreTemp, bestPearsonWeight)
            # Spearman
            SpearmanScore = spearmanr(corrTestDensity, corrPredictedDensity)[0]
            print('Spearman: ', SpearmanScore)
            #print(SpearmanScore)
            if (math.isnan(SpearmanScore) == False) & (SpearmanScore > SpearmanScoreTemp):
                SpearmanScoreTemp = SpearmanScore
                bestSpearmanModel = model
                bestSpearmanWeight = w

        # Record best weight now for each fold
#         r2ModelList.append(bestR2Model)
        r2WeightList.append(bestR2Weight)
#         MSEModelList.append(bestMSEModel)
        MSEWeightList.append(bestMSEWeight)
#         PearsonModelList.append(bestPearsonModel)
        PearsonWeightList.append(bestPearsonWeight)
#         SpearmanModelList.append(bestSpearmanModel)
        SpearmanWeightList.append(bestSpearmanWeight)

        #### Train SVR only now
        SVRmodel = \
            rf_rand_param_selection(SVRTrainXDF, TrainYArr, kernelList)
        SVRDensity_pred = SVRmodel.predict(SVRTestXDF.values)
        SVRpredictedDensityInversedBack = sc_density.inverse_transform(SVRDensity_pred.reshape(-1, 1))
        # Invert back testY
        SVRtestDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
#             print(testDensityInversedBack)
        SVRcorrTestDensity = SVRtestDensityInversedBack.reshape(len(TestYArr))
        SVRcorrPredictedDensity = SVRpredictedDensityInversedBack.reshape(len(TestYArr))

        SVRcorrelation_matrix = np.corrcoef(SVRcorrTestDensity, SVRcorrPredictedDensity)
        SVRcorrelation_xy = SVRcorrelation_matrix[0,1]
        SVRR2score = SVRcorrelation_xy**2

        if SVRR2score > SVRR2scoreTemp:
            SVRR2scoreTemp = SVRR2score
            SVRbestR2Model = SVRmodel
        # MSE
        SVRMSEscore = mean_squared_error(SVRcorrTestDensity, SVRcorrPredictedDensity, squared=True)
        if SVRMSEscore < SVRMSEscoreTemp:
            SVRMSEscoreTemp = SVRMSEscore
            SVRbestMSEModel = SVRmodel
        # Pearson
        SVRPearsonScore = pearsonr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        if SVRPearsonScore > SVRPearsonScoreTemp:
            SVRPearsonScoreTemp = SVRPearsonScore
            SVRbestPearsonModel = SVRmodel
        # Spearman
        SVRSpearmanScore = spearmanr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        #print(SpearmanScore)
        if SVRSpearmanScore > SVRSpearmanScoreTemp:
            SVRSpearmanScoreTemp = SVRSpearmanScore
            SVRbestSpearmanModel = SVRmodel
    
    ### SVCSVR
    bestR2Weight = max(set(r2WeightList), key = r2WeightList.count)
    bestMSEWeight = max(set(MSEWeightList), key = MSEWeightList.count)
    bestPearsonWeight = max(set(PearsonWeightList), key = PearsonWeightList.count)
    bestSpearmanWeight = max(set(SpearmanWeightList), key = SpearmanWeightList.count)

    #### To predict using the best weight
    metricWeightList = [bestR2Weight, bestMSEWeight, bestPearsonWeight, bestSpearmanWeight]
    ### Record SVR models
    SVRModelList = [SVRbestR2Model, SVRbestMSEModel, SVRbestPearsonModel, SVRbestSpearmanModel]
    metricList = ['R2', 'MSE', 'Pearson', 'Spearman']
    # To record final prediction scores
    corrList = []
    predictedList = []
    SVRcorrList = []
    SVRpredictedList = []
    # Loop through the 4 metric
    for i in range(4):
        ### Reserved for final training and testing sets
        finalTrainXCopy = trainX.copy()
        finalTestXCopy = testX.copy()

        finalTrainSVRXCopy = trainSVRX.copy()
        finalTestSVRXCopy = testSVRX.copy()

        weight = metricWeightList[i]
        m = metricList[i]
        # First, retrain on the full training set using the selected weight
        finalTrainXCopy['SVM Quant Estimation'] = \
        finalTrainXCopy['SVM Quant Estimation'].apply(lambda x: x*weight)

        finalModel = \
        rf_rand_param_selection(finalTrainXCopy, trainY, kernelList)
        
        # Now predict
        corr, predictedDensity = \
        finalCorrPrediction(finalTestXCopy, testY, weight, finalModel, sc_density, metric = m)
        corrList.append(corr) # Save results
        predictedList.append(predictedDensity)
        
        #### For SVR -- retrain with the best model and predict
        SVRCurrModel = SVRModelList[i]
        if SVRCurrModel != 0:
            SVRCurrModel.fit(finalTrainSVRXCopy.values, trainY)
        else: 
            SVRCurrModel = \
            rf_rand_param_selection(finalTrainSVRXCopy, trainY, kernelList)
        # Predict
        SVRcorr, SVRpredictedDensity = \
        SVROnlyfinalCorrPrediction(finalTestSVRXCopy, testY, 1, SVRCurrModel, sc_density, metric = m)
        SVRcorrList.append(SVRcorr)
        SVRpredictedList.append(SVRpredictedDensity)

    print('SVM: ', corrList) #, predictedList)
    print('SVR: ', SVRcorrList) #, SVRpredictedList)
    R2Corr, MSECorr, PearsonCorr, SpearmanCorr = corrList[0], corrList[1], corrList[2], corrList[3]
    R2Predicted, MSEPredicted, PearsonPredicted, SpearmanPredicted = \
    predictedList[0], predictedList[1], predictedList[2], predictedList[3]
    
    SVRR2Corr, SVRMSECorr, SVRPearsonCorr, SVRSpearmanCorr = \
    SVRcorrList[0], SVRcorrList[1], SVRcorrList[2], SVRcorrList[3]
    SVRR2Predicted, SVRMSEPredicted, SVRPearsonPredicted, SVRSpearmanPredicted = \
    SVRpredictedList[0], SVRpredictedList[1], SVRpredictedList[2], SVRpredictedList[3]

    return bestR2Model, bestR2Weight, R2Corr, R2Predicted, bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, SVRbestSpearmanModel, SVRSpearmanCorr, SVRSpearmanPredicted
# bestModel, RegressionR2Corr, RegressionMSECorr, RegressionPearsonCorr, RegressionSpearmanCorr"


In [None]:
#HelenaTFTFHBdensity

In [None]:
complexSVRSpearmanLists_TFTFHB_RF, complexSVRPearsonLists_TFTFHB_RF, \
complexSVRR2Lists_TFTFHB_RF, complexSVRMSELists_TFTFHB_RF, \
complexPredictedSVRSpearmanLists_TFTFHB_RF, complexPredictedSVRPearsonLists_TFTFHB_RF, \
complexPredictedSVRR2Lists_TFTFHB_RF, complexPredictedSVRMSELists_TFTFHB_RF, \
r2Recorder_TFTFHB_RF, mseRecorder_TFTFHB_RF, pearsonRecorder_TFTFHB_RF, spearmanRecorder_TFTFHB_RF, \
SVRSpearmanLists_TFTFHB_RF, SVRPearsonLists_TFTFHB_RF, SVRR2Lists_TFTFHB_RF, SVRMSELists_TFTFHB_RF, \
PredictedSVRSpearmanLists_TFTFHB_RF, PredictedSVRPearsonLists_TFTFHB_RF, \
PredictedSVRR2Lists_TFTFHB_RF, PredictedSVRMSELists_TFTFHB_RF, \
SVRr2Recorder_TFTFHB_RF, SVRmseRecorder_TFTFHB_RF, SVRpearsonRecorder_TFTFHB_RF, SVRspearmanRecorder_TFTFHB_RF,\
indexRecorder_TFTFHB_RF, TrainXDFRecorder_TFTFHB_RF, TrainYDFRecorder_TFTFHB_RF, \
SVMSVRFullXDFRecorder_TFTFHB_RF, SVMSVRFullYDFRecorder_TFTFHB_RF, scXRecorder_TFTFHB_RF, \
scDensityRecorder_TFTFHB_RF, scDistanceRecorder_TFTFHB_RF = \
ComparisonMultipleRun(30, 'HelenaTFTFHBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6)

In [None]:
TFTFHB_123_RF = ComparisonResultsPlot([complexSVRR2Lists_TFTFHB_RF, SVRR2Lists_TFTFHB_RF], 'R.F.', \
                                      position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFHB_123_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_TFTFB_RF, complexSVRPearsonLists_TFTFB_RF, \
complexSVRR2Lists_TFTFB_RF, complexSVRMSELists_TFTFB_RF, \
complexPredictedSVRSpearmanLists_TFTFB_RF, complexPredictedSVRPearsonLists_TFTFB_RF, \
complexPredictedSVRR2Lists_TFTFB_RF, complexPredictedSVRMSELists_TFTFB_RF, \
r2Recorder_TFTFB_RF, mseRecorder_TFTFB_RF, pearsonRecorder_TFTFB_RF, spearmanRecorder_TFTFB_RF, \
SVRSpearmanLists_TFTFB_RF, SVRPearsonLists_TFTFB_RF, SVRR2Lists_TFTFB_RF, SVRMSELists_TFTFB_RF, \
PredictedSVRSpearmanLists_TFTFB_RF, PredictedSVRPearsonLists_TFTFB_RF, \
PredictedSVRR2Lists_TFTFB_RF, PredictedSVRMSELists_TFTFB_RF, \
SVRr2Recorder_TFTFB_RF, SVRmseRecorder_TFTFB_RF, SVRpearsonRecorder_TFTFB_RF, SVRspearmanRecorder_TFTFB_RF,\
indexRecorder_TFTFB_RF, TrainXDFRecorder_TFTFB_RF, TrainYDFRecorder_TFTFB_RF, \
SVMSVRFullXDFRecorder_TFTFB_RF, SVMSVRFullYDFRecorder_TFTFB_RF, scXRecorder_TFTFB_RF, \
scDensityRecorder_TFTFB_RF, scDistanceRecorder_TFTFB_RF = \
ComparisonMultipleRun(30, 'HelenaTFTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.3) #, 'sigmoid', 'rbf'

In [None]:
TFTFB_RF = ComparisonResultsPlot([complexSVRR2Lists_TFTFB_RF, SVRR2Lists_TFTFB_RF], 'R.F.', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFB_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_DAHBCLA_RF, complexSVRPearsonLists_DAHBCLA_RF, \
complexSVRR2Lists_DAHBCLA_RF, complexSVRMSELists_DAHBCLA_RF, \
complexPredictedSVRSpearmanLists_DAHBCLA_RF, complexPredictedSVRPearsonLists_DAHBCLA_RF, \
complexPredictedSVRR2Lists_DAHBCLA_RF, complexPredictedSVRMSELists_DAHBCLA_RF, \
r2Recorder_DAHBCLA_RF, mseRecorder_DAHBCLA_RF, pearsonRecorder_DAHBCLA_RF, spearmanRecorder_DAHBCLA_RF, \
SVRSpearmanLists_DAHBCLA_RF, SVRPearsonLists_DAHBCLA_RF, SVRR2Lists_DAHBCLA_RF, SVRMSELists_DAHBCLA_RF, \
PredictedSVRSpearmanLists_DAHBCLA_RF, PredictedSVRPearsonLists_DAHBCLA_RF, \
PredictedSVRR2Lists_DAHBCLA_RF, PredictedSVRMSELists_DAHBCLA_RF, \
SVRr2Recorder_DAHBCLA_RF, SVRmseRecorder_DAHBCLA_RF, SVRpearsonRecorder_DAHBCLA_RF, SVRspearmanRecorder_DAHBCLA_RF,\
indexRecorder_DAHBCLA_RF, TrainXDFRecorder_DAHBCLA_RF, TrainYDFRecorder_DAHBCLA_RF, \
SVMSVRFullXDFRecorder_DAHBCLA_RF, SVMSVRFullYDFRecorder_DAHBCLA_RF, scXRecorder_DAHBCLA_RF, \
scDensityRecorder_DAHBCLA_RF, scDistanceRecorder_DAHBCLA_RF = \
ComparisonMultipleRun(30, 'HelenaExpDAHBCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6)

In [None]:
DAHBCLA_RF = ComparisonResultsPlot([complexSVRR2Lists_DAHBCLA_RF, SVRR2Lists_DAHBCLA_RF], 'R.F.',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBCLA_RF, 'R.F.')

In [None]:
scatterComparisonPlot(DAHBCLA_RF)

In [None]:
DAHMCLA = processExcelFormat03('HelenaExpDAHMCLA.xlsx', 'Sheet1', 0.6)
heatmapGenerator(DAHMCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAHMCLA)

In [None]:
complexSVRSpearmanLists_DAHMCLA_RF, complexSVRPearsonLists_DAHMCLA_RF, \
complexSVRR2Lists_DAHMCLA_RF, complexSVRMSELists_DAHMCLA_RF, \
complexPredictedSVRSpearmanLists_DAHMCLA_RF, complexPredictedSVRPearsonLists_DAHMCLA_RF, \
complexPredictedSVRR2Lists_DAHMCLA_RF, complexPredictedSVRMSELists_DAHMCLA_RF, \
r2Recorder_DAHMCLA_RF, mseRecorder_DAHMCLA_RF, pearsonRecorder_DAHMCLA_RF, spearmanRecorder_DAHMCLA_RF, \
SVRSpearmanLists_DAHMCLA_RF, SVRPearsonLists_DAHMCLA_RF, SVRR2Lists_DAHMCLA_RF, SVRMSELists_DAHMCLA_RF, \
PredictedSVRSpearmanLists_DAHMCLA_RF, PredictedSVRPearsonLists_DAHMCLA_RF, \
PredictedSVRR2Lists_DAHMCLA_RF, PredictedSVRMSELists_DAHMCLA_RF, \
SVRr2Recorder_DAHMCLA_RF, SVRmseRecorder_DAHMCLA_RF, SVRpearsonRecorder_DAHMCLA_RF, SVRspearmanRecorder_DAHMCLA_RF,\
indexRecorder_DAHMCLA_RF, TrainXDFRecorder_DAHMCLA_RF, TrainYDFRecorder_DAHMCLA_RF, \
SVMSVRFullXDFRecorder_DAHMCLA_RF, SVMSVRFullYDFRecorder_DAHMCLA_RF, scXRecorder_DAHMCLA_RF, \
scDensityRecorder_DAHMCLA_RF, scDistanceRecorder_DAHMCLA_RF = \
ComparisonMultipleRun(30, 'HelenaExpDAHMCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf', 'poly'],\
                     0.5) #, 'sigmoid', 'rbf'

In [None]:
DAHMCLA_RF = ComparisonResultsPlot([complexSVRR2Lists_DAHMCLA_RF, SVRR2Lists_DAHMCLA_RF], 'R.F.',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHMCLA_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_DAMCLA_RF, complexSVRPearsonLists_DAMCLA_RF, \
complexSVRR2Lists_DAMCLA_RF, complexSVRMSELists_DAMCLA_RF, \
complexPredictedSVRSpearmanLists_DAMCLA_RF, complexPredictedSVRPearsonLists_DAMCLA_RF, \
complexPredictedSVRR2Lists_DAMCLA_RF, complexPredictedSVRMSELists_DAMCLA_RF, \
r2Recorder_DAMCLA_RF, mseRecorder_DAMCLA_RF, pearsonRecorder_DAMCLA_RF, spearmanRecorder_DAMCLA_RF, \
SVRSpearmanLists_DAMCLA_RF, SVRPearsonLists_DAMCLA_RF, SVRR2Lists_DAMCLA_RF, SVRMSELists_DAMCLA_RF, \
PredictedSVRSpearmanLists_DAMCLA_RF, PredictedSVRPearsonLists_DAMCLA_RF, \
PredictedSVRR2Lists_DAMCLA_RF, PredictedSVRMSELists_DAMCLA_RF, \
SVRr2Recorder_DAMCLA_RF, SVRmseRecorder_DAMCLA_RF, SVRpearsonRecorder_DAMCLA_RF, SVRspearmanRecorder_DAMCLA_RF,\
indexRecorder_DAMCLA_RF, TrainXDFRecorder_DAMCLA_RF, TrainYDFRecorder_DAMCLA_RF, \
SVMSVRFullXDFRecorder_DAMCLA_RF, SVMSVRFullYDFRecorder_DAMCLA_RF, scXRecorder_DAMCLA_RF, \
scDensityRecorder_DAMCLA_RF, scDistanceRecorder_DAMCLA_RF = \
ComparisonMultipleRun(30, 'HelenaExpDAMCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.5)

In [None]:
DAMCLA_RF = ComparisonResultsPlot([complexSVRR2Lists_DAMCLA_RF, SVRR2Lists_DAMCLA_RF], 'R.F.', \
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAMCLA_RF, 'R.F.')

In [None]:
scatterComparisonPlot(DAMCLA_RF)

In [None]:
t = processExcelFormat03('HelenaExpDACLA.xlsx', 'Sheet1', 0.8)
heatmapGenerator(t, 'BinaryClass')

In [None]:
heatmapGenerator(t)

In [None]:
complexSVRSpearmanLists_DACLA_RF, complexSVRPearsonLists_DACLA_RF, \
complexSVRR2Lists_DACLA_RF, complexSVRMSELists_DACLA_RF, \
complexPredictedSVRSpearmanLists_DACLA_RF, complexPredictedSVRPearsonLists_DACLA_RF, \
complexPredictedSVRR2Lists_DACLA_RF, complexPredictedSVRMSELists_DACLA_RF, \
r2Recorder_DACLA_RF, mseRecorder_DACLA_RF, pearsonRecorder_DACLA_RF, spearmanRecorder_DACLA_RF, \
SVRSpearmanLists_DACLA_RF, SVRPearsonLists_DACLA_RF, SVRR2Lists_DACLA_RF, SVRMSELists_DACLA_RF, \
PredictedSVRSpearmanLists_DACLA_RF, PredictedSVRPearsonLists_DACLA_RF, \
PredictedSVRR2Lists_DACLA_RF, PredictedSVRMSELists_DACLA_RF, \
SVRr2Recorder_DACLA_RF, SVRmseRecorder_DACLA_RF, SVRpearsonRecorder_DACLA_RF, SVRspearmanRecorder_DACLA_RF,\
indexRecorder_DACLA_RF, TrainXDFRecorder_DACLA_RF, TrainYDFRecorder_DACLA_RF, \
SVMSVRFullXDFRecorder_DACLA_RF, SVMSVRFullYDFRecorder_DACLA_RF, scXRecorder_DACLA_RF, \
scDensityRecorder_DACLA_RF, scDistanceRecorder_DACLA_RF = \
ComparisonMultipleRun(30, 'HelenaExpDACLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.8) #, 'sigmoid', 'rbf'

In [None]:
DACLA_RF = ComparisonResultsPlot([complexSVRR2Lists_DACLA_RF, SVRR2Lists_DACLA_RF], 'R.F.', \
                                 position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DACLA_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_DAHBSUL_RF, complexSVRPearsonLists_DAHBSUL_RF, \
complexSVRR2Lists_DAHBSUL_RF, complexSVRMSELists_DAHBSUL_RF, \
complexPredictedSVRSpearmanLists_DAHBSUL_RF, complexPredictedSVRPearsonLists_DAHBSUL_RF, \
complexPredictedSVRR2Lists_DAHBSUL_RF, complexPredictedSVRMSELists_DAHBSUL_RF, \
r2Recorder_DAHBSUL_RF, mseRecorder_DAHBSUL_RF, pearsonRecorder_DAHBSUL_RF, spearmanRecorder_DAHBSUL_RF, \
SVRSpearmanLists_DAHBSUL_RF, SVRPearsonLists_DAHBSUL_RF, SVRR2Lists_DAHBSUL_RF, SVRMSELists_DAHBSUL_RF, \
PredictedSVRSpearmanLists_DAHBSUL_RF, PredictedSVRPearsonLists_DAHBSUL_RF, \
PredictedSVRR2Lists_DAHBSUL_RF, PredictedSVRMSELists_DAHBSUL_RF, \
SVRr2Recorder_DAHBSUL_RF, SVRmseRecorder_DAHBSUL_RF, SVRpearsonRecorder_DAHBSUL_RF, SVRspearmanRecorder_DAHBSUL_RF,\
indexRecorder_DAHBSUL_RF, TrainXDFRecorder_DAHBSUL_RF, TrainYDFRecorder_DAHBSUL_RF, \
SVMSVRFullXDFRecorder_DAHBSUL_RF, SVMSVRFullYDFRecorder_DAHBSUL_RF, scXRecorder_DAHBSUL_RF, \
scDensityRecorder_DAHBSUL_RF, scDistanceRecorder_DAHBSUL_RF = \
ComparisonMultipleRun(30, 'HelenaExpDAHBSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.4)

In [None]:
DAHBSUL_RF = ComparisonResultsPlot([complexSVRR2Lists_DAHBSUL_RF, SVRR2Lists_DAHBSUL_RF], 'R.F.', \
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBSUL_RF, 'R.F.')

In [None]:
T = processExcelFormat03('HelenaExpDABSUL.xlsx', 'Sheet1', 0.1)
heatmapGenerator(T, 'BinaryClass')

In [None]:
complexSVRSpearmanLists_DABSUL_RF, complexSVRPearsonLists_DABSUL_RF, \
complexSVRR2Lists_DABSUL_RF, complexSVRMSELists_DABSUL_RF, \
complexPredictedSVRSpearmanLists_DABSUL_RF, complexPredictedSVRPearsonLists_DABSUL_RF, \
complexPredictedSVRR2Lists_DABSUL_RF, complexPredictedSVRMSELists_DABSUL_RF, \
r2Recorder_DABSUL_RF, mseRecorder_DABSUL_RF, pearsonRecorder_DABSUL_RF, spearmanRecorder_DABSUL_RF, \
SVRSpearmanLists_DABSUL_RF, SVRPearsonLists_DABSUL_RF, SVRR2Lists_DABSUL_RF, SVRMSELists_DABSUL_RF, \
PredictedSVRSpearmanLists_DABSUL_RF, PredictedSVRPearsonLists_DABSUL_RF, \
PredictedSVRR2Lists_DABSUL_RF, PredictedSVRMSELists_DABSUL_RF, \
SVRr2Recorder_DABSUL_RF, SVRmseRecorder_DABSUL_RF, SVRpearsonRecorder_DABSUL_RF, SVRspearmanRecorder_DABSUL_RF,\
indexRecorder_DABSUL_RF, TrainXDFRecorder_DABSUL_RF, TrainYDFRecorder_DABSUL_RF, \
SVMSVRFullXDFRecorder_DABSUL_RF, SVMSVRFullYDFRecorder_DABSUL_RF, scXRecorder_DABSUL_RF, \
scDensityRecorder_DABSUL_RF, scDistanceRecorder_DABSUL_RF = \
ComparisonMultipleRun(30, 'HelenaExpDABSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.1) 

In [None]:
DABSUL_RF = ComparisonResultsPlot([complexSVRR2Lists_DABSUL_RF, SVRR2Lists_DABSUL_RF], 'R.F.', \
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABSUL_RF, 'R.F.')

In [None]:
DABCLA = processExcelFormat03('HelenaExpDABCLA.xlsx', 'Sheet1', 0.3)
heatmapGenerator(DABCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DABCLA)

In [None]:
complexSVRSpearmanLists_DABCLA_RF, complexSVRPearsonLists_DABCLA_RF, \
complexSVRR2Lists_DABCLA_RF, complexSVRMSELists_DABCLA_RF, \
complexPredictedSVRSpearmanLists_DABCLA_RF, complexPredictedSVRPearsonLists_DABCLA_RF, \
complexPredictedSVRR2Lists_DABCLA_RF, complexPredictedSVRMSELists_DABCLA_RF, \
r2Recorder_DABCLA_RF, mseRecorder_DABCLA_RF, pearsonRecorder_DABCLA_RF, spearmanRecorder_DABCLA_RF, \
SVRSpearmanLists_DABCLA_RF, SVRPearsonLists_DABCLA_RF, SVRR2Lists_DABCLA_RF, SVRMSELists_DABCLA_RF, \
PredictedSVRSpearmanLists_DABCLA_RF, PredictedSVRPearsonLists_DABCLA_RF, \
PredictedSVRR2Lists_DABCLA_RF, PredictedSVRMSELists_DABCLA_RF, \
SVRr2Recorder_DABCLA_RF, SVRmseRecorder_DABCLA_RF, SVRpearsonRecorder_DABCLA_RF, SVRspearmanRecorder_DABCLA_RF,\
indexRecorder_DABCLA_RF, TrainXDFRecorder_DABCLA_RF, TrainYDFRecorder_DABCLA_RF, \
SVMSVRFullXDFRecorder_DABCLA_RF, SVMSVRFullYDFRecorder_DABCLA_RF, scXRecorder_DABCLA_RF, \
scDensityRecorder_DABCLA_RF, scDistanceRecorder_DABCLA_RF = \
ComparisonMultipleRun(30, 'HelenaExpDABCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.3) #, 'sigmoid', 'rbf'

In [None]:
DABCLA_RF = ComparisonResultsPlot([complexSVRR2Lists_DABCLA_RF, SVRR2Lists_DABCLA_RF], 'R.F.', \
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABCLA_RF, 'R.F.')

In [None]:
TFM = processExcelFormat03('HelenaExpTFMdensity.xlsx', 'Sheet1', 0.6)
heatmapGenerator(TFM, 'BinaryClass')

In [None]:
heatmapGenerator(TFM)

In [None]:
complexSVRSpearmanLists_ExpTFM_RF, complexSVRPearsonLists_ExpTFM_RF, \
complexSVRR2Lists_ExpTFM_RF, complexSVRMSELists_ExpTFM_RF, \
complexPredictedSVRSpearmanLists_ExpTFM_RF, complexPredictedSVRPearsonLists_ExpTFM_RF, \
complexPredictedSVRR2Lists_ExpTFM_RF, complexPredictedSVRMSELists_ExpTFM_RF, \
r2Recorder_ExpTFM_RF, mseRecorder_ExpTFM_RF, pearsonRecorder_ExpTFM_RF, spearmanRecorder_ExpTFM_RF, \
SVRSpearmanLists_ExpTFM_RF, SVRPearsonLists_ExpTFM_RF, SVRR2Lists_ExpTFM_RF, SVRMSELists_ExpTFM_RF, \
PredictedSVRSpearmanLists_ExpTFM_RF, PredictedSVRPearsonLists_ExpTFMA_RF, \
PredictedSVRR2Lists_ExpTFM_RF, PredictedSVRMSELists_ExpTFM_RF, \
SVRr2Recorder_ExpTFM_RF, SVRmseRecorder_ExpTFM_RF, SVRpearsonRecorder_ExpTFM_RF, SVRspearmanRecorder_ExpTFM_RF,\
indexRecorder_ExpTFM_RF, TrainXDFRecorder_ExpTFM_RF, TrainYDFRecorder_ExpTFM_RF, \
SVMSVRFullXDFRecorder_ExpTFM_RF, SVMSVRFullYDFRecorder_ExpTFM_RF, scXRecorder_ExpTFM_RF, \
scDensityRecorder_ExpTFM_RF, scDistanceRecorder_ExpTFM_RF = \
ComparisonMultipleRun(30, 'HelenaExpTFMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf'],\
                     0.6)

In [None]:
ExpTFM_RF = ComparisonResultsPlot([complexSVRR2Lists_ExpTFM_RF, SVRR2Lists_ExpTFM_RF], 'R.F.',\
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFM_RF, 'R.F.')

In [None]:
r = processExcelFormat03('HelenaExpTFHMdensity.xlsx', 'Sheet1', 0.7)
heatmapGenerator(r, 'BinaryClass')

In [None]:
heatmapGenerator(r)

In [None]:
complexSVRSpearmanLists_ExpTFHM_RF, complexSVRPearsonLists_ExpTFHM_RF, \
complexSVRR2Lists_ExpTFHM_RF, complexSVRMSELists_ExpTFHM_RF, \
complexPredictedSVRSpearmanLists_ExpTFHM_RF, complexPredictedSVRPearsonLists_ExpTFHM_RF, \
complexPredictedSVRR2Lists_ExpTFHM_RF, complexPredictedSVRMSELists_ExpTFHM_RF, \
r2Recorder_ExpTFHM_RF, mseRecorder_ExpTFHM_RF, pearsonRecorder_ExpTFHM_RF, spearmanRecorder_ExpTFHM_RF, \
SVRSpearmanLists_ExpTFHM_RF, SVRPearsonLists_ExpTFHM_RF, SVRR2Lists_ExpTFHM_RF, SVRMSELists_ExpTFHM_RF, \
PredictedSVRSpearmanLists_ExpTFHM_RF, PredictedSVRPearsonLists_ExpTFHM_RF, \
PredictedSVRR2Lists_ExpTFHM_RF, PredictedSVRMSELists_ExpTFHM_RF, \
SVRr2Recorder_ExpTFHM_RF, SVRmseRecorder_ExpTFHM_RF, SVRpearsonRecorder_ExpTFHM_RF, SVRspearmanRecorder_ExpTFHM_RF,\
indexRecorder_ExpTFHM_RF, TrainXDFRecorder_ExpTFHM_RF, TrainYDFRecorder_ExpTFHM_RF, \
SVMSVRFullXDFRecorder_ExpTFHM_RF, SVMSVRFullYDFRecorder_ExpTFHM_RF, scXRecorder_ExpTFHM_RF, \
scDensityRecorder_ExpTFHM_RF, scDistanceRecorder_ExpTFHM_RF = \
ComparisonMultipleRun(30, 'HelenaExpTFHMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.7) #, 'sigmoid', 'rbf'

In [None]:
ExpTFHM_RF = ComparisonResultsPlot([complexSVRR2Lists_ExpTFHM_RF, SVRR2Lists_ExpTFHM_RF], 'R.F.',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFHM_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_ExpTFB_RF, complexSVRPearsonLists_ExpTFB_RF, \
complexSVRR2Lists_ExpTFB_RF, complexSVRMSELists_ExpTFB_RF, \
complexPredictedSVRSpearmanLists_ExpTFB_RF, complexPredictedSVRPearsonLists_ExpTFB_RF, \
complexPredictedSVRR2Lists_ExpTFB_RF, complexPredictedSVRMSELists_ExpTFB_RF, \
r2Recorder_ExpTFB_RF, mseRecorder_ExpTFB_RF, pearsonRecorder_ExpTFB_RF, spearmanRecorder_ExpTFB_RF, \
SVRSpearmanLists_ExpTFB_RF, SVRPearsonLists_ExpTFB_RF, SVRR2Lists_ExpTFB_RF, SVRMSELists_ExpTFB_RF, \
PredictedSVRSpearmanLists_ExpTFB_RF, PredictedSVRPearsonLists_ExpTFB_RF, \
PredictedSVRR2Lists_ExpTFB_RF, PredictedSVRMSELists_ExpTFB_RF, \
SVRr2Recorder_ExpTFB_RF, SVRmseRecorder_ExpTFB_RF, SVRpearsonRecorder_ExpTFB_RF, SVRspearmanRecorder_ExpTFB_RF,\
indexRecorder_ExpTFB_RF, TrainXDFRecorder_ExpTFB_RF, TrainYDFRecorder_ExpTFB_RF, \
SVMSVRFullXDFRecorder_ExpTFB_RF, SVMSVRFullYDFRecorder_ExpTFB_RF, scXRecorder_ExpTFB_RF, \
scDensityRecorder_ExpTFB_RF, scDistanceRecorder_ExpTFB_RF = \
ComparisonMultipleRun(30, 'HelenaExpTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.4) 

In [None]:
ExpTFB_RF = ComparisonResultsPlot([complexSVRR2Lists_ExpTFB_RF, SVRR2Lists_ExpTFB_RF], 'R.F.',\
                                  position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFB_RF, 'R.F.')

In [None]:
complexSVRSpearmanLists_ExpTFHB_RF, complexSVRPearsonLists_ExpTFHB_RF, \
complexSVRR2Lists_ExpTFHB_RF, complexSVRMSELists_ExpTFHB_RF, \
complexPredictedSVRSpearmanLists_ExpTFHB_RF, complexPredictedSVRPearsonLists_ExpTFHB_RF, \
complexPredictedSVRR2Lists_ExpTFHB_RF, complexPredictedSVRMSELists_ExpTFHB_RF, \
r2Recorder_ExpTFHB_RF, mseRecorder_ExpTFHB_RF, pearsonRecorder_ExpTFHB_RF, spearmanRecorder_ExpTFHB_RF, \
SVRSpearmanLists_ExpTFHB_RF, SVRPearsonLists_ExpTFHB_RF, SVRR2Lists_ExpTFHB_RF, SVRMSELists_ExpTFHB_RF, \
PredictedSVRSpearmanLists_ExpTFHB_RF, PredictedSVRPearsonLists_ExpTFHB_RF, \
PredictedSVRR2Lists_ExpTFHB_RF, PredictedSVRMSELists_ExpTFHB_RF, \
SVRr2Recorder_ExpTFHB_RF, SVRmseRecorder_ExpTFHB_RF, SVRpearsonRecorder_ExpTFHB_RF, SVRspearmanRecorder_ExpTFHB_RF,\
indexRecorder_ExpTFHB_RF, TrainXDFRecorder_ExpTFHB_RF, TrainYDFRecorder_ExpTFHB_RF, \
SVMSVRFullXDFRecorder_ExpTFHB_RF, SVMSVRFullYDFRecorder_ExpTFHB_RF, scXRecorder_ExpTFHB_RF, \
scDensityRecorder_ExpTFHB_RF, scDistanceRecorder_ExpTFHB_RF = \
ComparisonMultipleRun(30, 'HelenaExpTFHBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6)

In [None]:
ExpTFHB_RF = ComparisonResultsPlot([complexSVRR2Lists_ExpTFHB_RF, SVRR2Lists_ExpTFHB_RF], 'R.F.',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFHB_RF, 'R.F.')

# KNN

In [None]:
from sklearn.neighbors import KNeighborsRegressor

In [None]:
### Select best degrees
### X_train: X training dataset
### Y_train: Y training dataset
### X_test: X testing dataset
### Y_test: Y testing dataset
def knn_rand_param_selection(TrainDF_X, TrainDF_Y, kernelList): 
    #List Hyperparameters that we want to tune.
    leaf_size = list(range(1, 10))
    n_neighbors = list(range(1, 5))
    weights = ['uniform', 'distance']
    metric = ['euclidean', 'manhattan', 'minkowski']
    p=[1,2,3,4,5]
    #Convert to dictionary
    param_rand = dict(leaf_size=leaf_size, n_neighbors=n_neighbors, weights=weights, metric=metric, p=p)
    #Create new KNN object
    knn = KNeighborsRegressor()

    if len(TrainDF_X) > 10:
        rand = RandomizedSearchCV(knn, param_rand, refit = True, scoring='r2', n_jobs = -1)
    else:
        rand = RandomizedSearchCV(knn, param_rand, refit = True, scoring='r2', cv = 2, n_jobs = -1)
    
    rand.fit(TrainDF_X, TrainDF_Y)
    
    # print the best estimator: all kernel information & best 2 parameters & best_score_ & sanity check
    print(rand.best_estimator_, rand.best_params_, rand.best_score_) #, grid.score(X_test, Y_test))
    
    # Return best estimator
    return rand.best_estimator_

In [None]:
# Run SVR regression once, given training and testing sets
### For both SVCSCR & SVR
### TestY is density for both
### TrainX is 3-variable input for SVCSVR
### TrainY is density for both
### TrainSVRX is 2-variable input for SVR only\n",
def regressionSVR(trainX, trainSVRX, trainY, \
                  testX, testSVRX, testY, \
                  kernelList, Newfull_df_X, sc_density):
    # Weights to be used
    #### Make this tunable too
    weightList = [0.01, 0.1, 1]
    k = 2  
    kf = KFold(n_splits=k, shuffle=True)

    r2WeightList = []
    MSEWeightList = []
    PearsonWeightList = []
    SpearmanWeightList = []
    # May not need this anymore
    r2ModelList = []
    MSEModelList = []
    PearsonModelList = []
    SpearmanModelList = []
    
    # Score for best SVR model
    SVRR2scoreTemp = -1000
    SVRMSEscoreTemp = 200
    SVRPearsonScoreTemp = -2
    SVRSpearmanScoreTemp = -2
    
    SVRbestR2Model = 0
    SVRbestMSEModel = 0
    SVRbestPearsonModel = 0
    SVRbestSpearmanModel = 0
    
#     R2scoreTemp = -1000
#     MSEscoreTemp = 200
#     PearsonScoreTemp = -2
#     SpearmanScoreTemp = -2
    bestR2Weight = 1
    bestMSEWeight = 1
    bestPearsonWeight = 1
    bestSpearmanWeight = 1
    
    bestR2Model = 0
    bestMSEModel = 0
    bestPearsonModel = 0
    bestSpearmanModel = 0
    

    for i in range(1):    
        if len(trainX) <= 10:
            weightTrainXDF = trainX.sample(frac = 0.7)
        else:
            weightTrainXDF = trainX.sample(frac = 0.8)

        weightTestXDF = trainX.drop(weightTrainXDF.index)
        train_index = weightTrainXDF.index
        test_index = weightTestXDF.index
        TrainY, TestY = trainY.loc[train_index], trainY.loc[test_index]
        TrainYArr = np.array(TrainY)
        TestYArr = np.array(TestY)
        SVRTrainXDF, SVRTestXDF = trainSVRX.loc[train_index], trainSVRX.loc[test_index]

        # Score for best model
        R2scoreTemp = -1000
        MSEscoreTemp = 200
        PearsonScoreTemp = -2
        SpearmanScoreTemp = -2
        R2modelTemp = 0
        MSEmodelTemp = 0
        PearsonmodelTemp = 0
        SpearmanmodelTemp = 0

        # Loop through distances * different weights:
        for w in weightList:
            trainXCopy = weightTrainXDF.copy()
            testXCopy = weightTestXDF.copy()
            # Transform
            trainXCopy['SVM Quant Estimation'] = trainXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            testXCopy['SVM Quant Estimation'] = testXCopy['SVM Quant Estimation'].apply(lambda x: x*w)
            # Select best parameters
            model = \
        knn_rand_param_selection(trainXCopy, TrainYArr, kernelList)
            #### Predict with the best model of the current weight
            #### On a slice of the training data
            weightY_pred = model.predict(testXCopy.values)
            predictedDensityInversedBack = sc_density.inverse_transform(weightY_pred.reshape(-1, 1))
            # Invert back testY
            testDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
            #### Select the best weight for all metrics
            # R2
            corrTestDensity = testDensityInversedBack.reshape(len(TestYArr))
            corrPredictedDensity = predictedDensityInversedBack.reshape(len(TestYArr)) 
            correlation_matrix = np.corrcoef(corrTestDensity, corrPredictedDensity)
            correlation_xy = correlation_matrix[0,1]
            R2score = correlation_xy**2
            print('R2: ', R2score)
            if (math.isnan(R2score) == False) & (R2score > R2scoreTemp):
                R2scoreTemp = R2score
                bestR2Model = model
                bestR2Weight = w
            # MSE
            MSEscore = mean_squared_error(corrTestDensity, corrPredictedDensity, squared=True)
            print('MSE: ', MSEscore)
            if (math.isnan(MSEscore) == False) & (MSEscore < MSEscoreTemp):
                MSEscoreTemp = MSEscore
                bestMSEModel = model
                bestMSEWeight = w
#                 print(MSEscoreTemp, bestMSEWeight)
            # Pearson
            PearsonScore = pearsonr(corrTestDensity, corrPredictedDensity)[0]
            print('Pearson: ', PearsonScore)
            if (math.isnan(PearsonScore) == False) & (PearsonScore > PearsonScoreTemp):
                PearsonScoreTemp = PearsonScore
                bestPearsonModel = model
                bestPearsonWeight = w
#                 print(PearsonScoreTemp, bestPearsonWeight)
            # Spearman
            SpearmanScore = spearmanr(corrTestDensity, corrPredictedDensity)[0]
            print('Spearman: ', SpearmanScore)
            #print(SpearmanScore)
            if (math.isnan(SpearmanScore) == False) & (SpearmanScore > SpearmanScoreTemp):
                SpearmanScoreTemp = SpearmanScore
                bestSpearmanModel = model
                bestSpearmanWeight = w

        # Record best weight now for each fold
#         r2ModelList.append(bestR2Model)
        r2WeightList.append(bestR2Weight)
#         MSEModelList.append(bestMSEModel)
        MSEWeightList.append(bestMSEWeight)
#         PearsonModelList.append(bestPearsonModel)
        PearsonWeightList.append(bestPearsonWeight)
#         SpearmanModelList.append(bestSpearmanModel)
        SpearmanWeightList.append(bestSpearmanWeight)

        #### Train SVR only now
        SVRmodel = \
            knn_rand_param_selection(SVRTrainXDF, TrainYArr, kernelList)
        SVRDensity_pred = SVRmodel.predict(SVRTestXDF.values)
#             print(weightY_pred)
        SVRpredictedDensityInversedBack = sc_density.inverse_transform(SVRDensity_pred.reshape(-1, 1))
        # Invert back testY
#             print(weightTestYArr)
        SVRtestDensityInversedBack = sc_density.inverse_transform(TestYArr.reshape(-1, 1))
#             print(testDensityInversedBack)
        SVRcorrTestDensity = SVRtestDensityInversedBack.reshape(len(TestYArr))
        SVRcorrPredictedDensity = SVRpredictedDensityInversedBack.reshape(len(TestYArr))

        SVRcorrelation_matrix = np.corrcoef(SVRcorrTestDensity, SVRcorrPredictedDensity)
#             print(correlation_matrix)
        SVRcorrelation_xy = SVRcorrelation_matrix[0,1]
        SVRR2score = SVRcorrelation_xy**2

        if SVRR2score > SVRR2scoreTemp:
            SVRR2scoreTemp = SVRR2score
            SVRbestR2Model = SVRmodel
        # MSE
        SVRMSEscore = mean_squared_error(SVRcorrTestDensity, SVRcorrPredictedDensity, squared=True)
        if SVRMSEscore < SVRMSEscoreTemp:
            SVRMSEscoreTemp = SVRMSEscore
            SVRbestMSEModel = SVRmodel
        # Pearson
        SVRPearsonScore = pearsonr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        if SVRPearsonScore > SVRPearsonScoreTemp:
            SVRPearsonScoreTemp = SVRPearsonScore
            SVRbestPearsonModel = SVRmodel
        # Spearman
        SVRSpearmanScore = spearmanr(SVRcorrTestDensity, SVRcorrPredictedDensity)[0]
        #print(SpearmanScore)
        if SVRSpearmanScore > SVRSpearmanScoreTemp:
            SVRSpearmanScoreTemp = SVRSpearmanScore
            SVRbestSpearmanModel = SVRmodel
    
    ### SVCSVR
    bestR2Weight = max(set(r2WeightList), key = r2WeightList.count)
    bestMSEWeight = max(set(MSEWeightList), key = MSEWeightList.count)
    bestPearsonWeight = max(set(PearsonWeightList), key = PearsonWeightList.count)
    bestSpearmanWeight = max(set(SpearmanWeightList), key = SpearmanWeightList.count)

    #### To predict using the best weight
    metricWeightList = [bestR2Weight, bestMSEWeight, bestPearsonWeight, bestSpearmanWeight]
    ### Record SVR models
    SVRModelList = [SVRbestR2Model, SVRbestMSEModel, SVRbestPearsonModel, SVRbestSpearmanModel]
    metricList = ['R2', 'MSE', 'Pearson', 'Spearman']
    # To record final prediction scores
    corrList = []
    predictedList = []
    SVRcorrList = []
    SVRpredictedList = []
    # Loop through the 4 metric
    for i in range(4):
        ### Reserved for final training and testing sets
        finalTrainXCopy = trainX.copy()
        finalTestXCopy = testX.copy()

        finalTrainSVRXCopy = trainSVRX.copy()
        finalTestSVRXCopy = testSVRX.copy()

        weight = metricWeightList[i]
#         finalModel = metricModelList[i]
        m = metricList[i]
        # First, retrain on the full training set using the selected weight
        finalTrainXCopy['SVM Quant Estimation'] = \
        finalTrainXCopy['SVM Quant Estimation'].apply(lambda x: x*weight)

        finalModel = \
        knn_rand_param_selection(finalTrainXCopy, trainY, kernelList)
        
        # Now predict
        corr, predictedDensity = \
        finalCorrPrediction(finalTestXCopy, testY, weight, finalModel, sc_density, metric = m)
        corrList.append(corr) # Save results
        predictedList.append(predictedDensity)
        
        #### For SVR -- retrain with the best model and predict
        SVRCurrModel = SVRModelList[i]
        if SVRCurrModel != 0:
            SVRCurrModel.fit(finalTrainSVRXCopy.values, trainY)
        else: 
            SVRCurrModel = \
            knn_rand_param_selection(finalTrainSVRXCopy, trainY, kernelList)
        # Predict
        SVRcorr, SVRpredictedDensity = \
        SVROnlyfinalCorrPrediction(finalTestSVRXCopy, testY, 1, SVRCurrModel, sc_density, metric = m)
        SVRcorrList.append(SVRcorr)
        SVRpredictedList.append(SVRpredictedDensity)

    print('SVM: ', corrList) #, predictedList)
    print('SVR: ', SVRcorrList) #, SVRpredictedList)
    R2Corr, MSECorr, PearsonCorr, SpearmanCorr = corrList[0], corrList[1], corrList[2], corrList[3]
    R2Predicted, MSEPredicted, PearsonPredicted, SpearmanPredicted = \
    predictedList[0], predictedList[1], predictedList[2], predictedList[3]
    
    SVRR2Corr, SVRMSECorr, SVRPearsonCorr, SVRSpearmanCorr = \
    SVRcorrList[0], SVRcorrList[1], SVRcorrList[2], SVRcorrList[3]
    SVRR2Predicted, SVRMSEPredicted, SVRPearsonPredicted, SVRSpearmanPredicted = \
    SVRpredictedList[0], SVRpredictedList[1], SVRpredictedList[2], SVRpredictedList[3]

    return bestR2Model, bestR2Weight, R2Corr, R2Predicted, bestMSEModel, bestMSEWeight, MSECorr, MSEPredicted,\
bestPearsonModel, bestPearsonWeight, PearsonCorr, PearsonPredicted,\
bestSpearmanModel, bestSpearmanWeight, SpearmanCorr, SpearmanPredicted,\
SVRbestR2Model, SVRR2Corr, SVRR2Predicted, SVRbestMSEModel, SVRMSECorr, SVRMSEPredicted, \
SVRbestPearsonModel, SVRPearsonCorr, SVRPearsonPredicted, SVRbestSpearmanModel, SVRSpearmanCorr, SVRSpearmanPredicted


In [None]:
# HelenaTFTFHBdensity.xlsx

In [None]:
TFTFHB = processExcelFormat03('HelenaTFTFHBdensity.xlsx', 'Sheet1', 0.6)
heatmapGenerator(TFTFHB, 'BinaryClass')

In [None]:
heatmapGenerator(TFTFHB)

In [None]:
complexSVRSpearmanLists_TFTFHB_KNN, complexSVRPearsonLists_TFTFHB_KNN, \
complexSVRR2Lists_TFTFHB_KNN, complexSVRMSELists_TFTFHB_KNN, \
complexPredictedSVRSpearmanLists_TFTFHB_KNN, complexPredictedSVRPearsonLists_TFTFHB_KNN, \
complexPredictedSVRR2Lists_TFTFHB_KNN, complexPredictedSVRMSELists_TFTFHB_KNN, \
r2Recorder_TFTFHB_KNN, mseRecorder_TFTFHB_KNN, pearsonRecorder_TFTFHB_KNN, spearmanRecorder_TFTFHB_KNN, \
SVRSpearmanLists_TFTFHB_KNN, SVRPearsonLists_TFTFHB_KNN, SVRR2Lists_TFTFHB_KNN, SVRMSELists_TFTFHB_KNN, \
PredictedSVRSpearmanLists_TFTFHB_KNN, PredictedSVRPearsonLists_TFTFHB_KNN, \
PredictedSVRR2Lists_TFTFHB_KNN, PredictedSVRMSELists_TFTFHB_KNN, \
SVRr2Recorder_TFTFHB_KNN, SVRmseRecorder_TFTFHB_KNN, SVRpearsonRecorder_TFTFHB_KNN, SVRspearmanRecorder_TFTFHB_KNN,\
indexRecorder_TFTFHB_KNN, TrainXDFRecorder_TFTFHB_KNN, TrainYDFRecorder_TFTFHB_KNN, \
SVMSVRFullXDFRecorder_TFTFHB_KNN, SVMSVRFullYDFRecorder_TFTFHB_KNN, scXRecorder_TFTFHB_KNN, \
scDensityRecorder_TFTFHB_KNN, scDistanceRecorder_TFTFHB_KNN = \
ComparisonMultipleRun(30, 'HelenaTFTFHBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6) 

In [None]:
TFTFHB_123_KNN = ComparisonResultsPlot([complexSVRR2Lists_TFTFHB_KNN, SVRR2Lists_TFTFHB_KNN], 'KNN', \
                                       position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFHB_123_KNN, 'KNN')

In [None]:
TFTFHM = processExcelFormat03('HelenaTFTFHMdensity.xlsx', 'Sheet1', 0.7)
heatmapGenerator(TFTFHM, 'BinaryClass')

In [None]:
heatmapGenerator(TFTFHM)

In [None]:
complexSVRSpearmanLists_TFTFHM_KNN, complexSVRPearsonLists_TFTFHM_KNN, \
complexSVRR2Lists_TFTFHM_KNN, complexSVRMSELists_TFTFHM_KNN, \
complexPredictedSVRSpearmanLists_TFTFHM_KNN, complexPredictedSVRPearsonLists_TFTFHM_KNN, \
complexPredictedSVRR2Lists_TFTFHM_KNN, complexPredictedSVRMSELists_TFTFHM_KNN, \
r2Recorder_TFTFHM_KNN, mseRecorder_TFTFHM_KNN, pearsonRecorder_TFTFHM_KNN, spearmanRecorder_TFTFHM_KNN, \
SVRSpearmanLists_TFTFHM_KNN, SVRPearsonLists_TFTFHM_KNN, SVRR2Lists_TFTFHM_KNN, SVRMSELists_TFTFHM_KNN, \
PredictedSVRSpearmanLists_TFTFHM_KNN, PredictedSVRPearsonLists_TFTFHM_KNN, \
PredictedSVRR2Lists_TFTFHM_KNN, PredictedSVRMSELists_TFTFHM_KNN, \
SVRr2Recorder_TFTFHM_KNN, SVRmseRecorder_TFTFHM_KNN, SVRpearsonRecorder_TFTFHM_KNN, SVRspearmanRecorder_TFTFHM_KNN,\
indexRecorder_TFTFHM_KNN, TrainXDFRecorder_TFTFHM_KNN, TrainYDFRecorder_TFTFHM_KNN, \
SVMSVRFullXDFRecorder_TFTFHM_KNN, SVMSVRFullYDFRecorder_TFTFHM_KNN, scXRecorder_TFTFHM_KNN, \
scDensityRecorder_TFTFHM_KNN, scDistanceRecorder_TFTFHM_KNN = \
ComparisonMultipleRun(30, 'HelenaTFTFHMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.7) 

In [None]:
TFTFHM_123_KNN = ComparisonResultsPlot([complexSVRR2Lists_TFTFHM_KNN, SVRR2Lists_TFTFHM_KNN], 'KNN',\
                                       position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFHM_123_KNN, 'KNN')

In [None]:
# TFTFM

In [None]:
TFTFM = processExcelFormat03('HelenaTFTFMdensity.xlsx', 'Sheet1', 0.6)
heatmapGenerator(TFTFM, 'BinaryClass')

In [None]:
heatmapGenerator(TFTFM)

In [None]:
complexSVRSpearmanLists_TFTFM_KNN, complexSVRPearsonLists_TFTFM_KNN, \
complexSVRR2Lists_TFTFM_KNN, complexSVRMSELists_TFTFM_KNN, \
complexPredictedSVRSpearmanLists_TFTFM_KNN, complexPredictedSVRPearsonLists_TFTFM_KNN, \
complexPredictedSVRR2Lists_TFTFM_KNN, complexPredictedSVRMSELists_TFTFM_KNN, \
r2Recorder_TFTFM_KNN, mseRecorder_TFTFM_KNN, pearsonRecorder_TFTFM_KNN, spearmanRecorder_TFTFM_KNN, \
SVRSpearmanLists_TFTFM_KNN, SVRPearsonLists_TFTFM_KNN, SVRR2Lists_TFTFM_KNN, SVRMSELists_TFTFM_KNN, \
PredictedSVRSpearmanLists_TFTFM_KNN, PredictedSVRPearsonLists_TFTFM_KNN, \
PredictedSVRR2Lists_TFTFM_KNN, PredictedSVRMSELists_TFTFM_KNN, \
SVRr2Recorder_TFTFM_KNN, SVRmseRecorder_TFTFM_KNN, SVRpearsonRecorder_TFTFM_KNN, SVRspearmanRecorder_TFTFM_KNN,\
indexRecorder_TFTFM_KNN, TrainXDFRecorder_TFTFM_KNN, TrainYDFRecorder_TFTFM_KNN, \
SVMSVRFullXDFRecorder_TFTFM_KNN, SVMSVRFullYDFRecorder_TFTFM_KNN, scXRecorder_TFTFM_KNN, \
scDensityRecorder_TFTFM_KNN, scDistanceRecorder_TFTFM_KNN = \
ComparisonMultipleRun(30, 'HelenaTFTFMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['rbf', 'poly'],\
                     0.6) 

In [None]:
TFTFM_123_KNN = ComparisonResultsPlot([complexSVRR2Lists_TFTFM_KNN, SVRR2Lists_TFTFM_KNN], 'KNN',\
                                      position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFM_123_KNN, 'KNN')

In [None]:
scatterComparisonPlot(TFTFM_123_KNN)

In [None]:
TFTFB = processExcelFormat03('HelenaTFTFBdensity.xlsx', 'Sheet1', 0.4)
heatmapGenerator(TFTFB, 'BinaryClass')

In [None]:
heatmapGenerator(TFTFB)

In [None]:
complexSVRSpearmanLists_TFTFB_KNN, complexSVRPearsonLists_TFTFB_KNN, \
complexSVRR2Lists_TFTFB_KNN, complexSVRMSELists_TFTFB_KNN, \
complexPredictedSVRSpearmanLists_TFTFB_KNN, complexPredictedSVRPearsonLists_TFTFB_KNN, \
complexPredictedSVRR2Lists_TFTFB_KNN, complexPredictedSVRMSELists_TFTFB_KNN, \
r2Recorder_TFTFB_KNN, mseRecorder_TFTFB_KNN, pearsonRecorder_TFTFB_KNN, spearmanRecorder_TFTFB_KNN, \
SVRSpearmanLists_TFTFB_KNN, SVRPearsonLists_TFTFB_KNN, SVRR2Lists_TFTFB_KNN, SVRMSELists_TFTFB_KNN, \
PredictedSVRSpearmanLists_TFTFB_KNN, PredictedSVRPearsonLists_TFTFB_KNN, \
PredictedSVRR2Lists_TFTFB_KNN, PredictedSVRMSELists_TFTFB_KNN, \
SVRr2Recorder_TFTFB_KNN, SVRmseRecorder_TFTFB_KNN, SVRpearsonRecorder_TFTFB_KNN, SVRspearmanRecorder_TFTFB_KNN,\
indexRecorder_TFTFB_KNN, TrainXDFRecorder_TFTFB_KNN, TrainYDFRecorder_TFTFB_KNN, \
SVMSVRFullXDFRecorder_TFTFB_KNN, SVMSVRFullYDFRecorder_TFTFB_KNN, scXRecorder_TFTFB_KNN, \
scDensityRecorder_TFTFB_KNN, scDistanceRecorder_TFTFB_KNN = \
ComparisonMultipleRun(30, 'HelenaTFTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.4) 

In [None]:
TFTFB_KNN = ComparisonResultsPlot([complexSVRR2Lists_TFTFB_KNN, SVRR2Lists_TFTFB_KNN], 'KNN',\
                                      position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(TFTFB_KNN, 'KNN')

In [None]:
DAHBCLA = processExcelFormat03('HelenaExpDAHBCLA.xlsx', 'Sheet1', 0.5)
heatmapGenerator(DAHBCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAHBCLA)

In [None]:
complexSVRSpearmanLists_DAHBCLA_KNN, complexSVRPearsonLists_DAHBCLA_KNN, \
complexSVRR2Lists_DAHBCLA_KNN, complexSVRMSELists_DAHBCLA_KNN, \
complexPredictedSVRSpearmanLists_DAHBCLA_KNN, complexPredictedSVRPearsonLists_DAHBCLA_KNN, \
complexPredictedSVRR2Lists_DAHBCLA_KNN, complexPredictedSVRMSELists_DAHBCLA_KNN, \
r2Recorder_DAHBCLA_KNN, mseRecorder_DAHBCLA_KNN, pearsonRecorder_DAHBCLA_KNN, spearmanRecorder_DAHBCLA_KNN, \
SVRSpearmanLists_DAHBCLA_KNN, SVRPearsonLists_DAHBCLA_KNN, SVRR2Lists_DAHBCLA_KNN, SVRMSELists_DAHBCLA_KNN, \
PredictedSVRSpearmanLists_DAHBCLA_KNN, PredictedSVRPearsonLists_DAHBCLA_KNN, \
PredictedSVRR2Lists_DAHBCLA_KNN, PredictedSVRMSELists_DAHBCLA_KNN, \
SVRr2Recorder_DAHBCLA_KNN, SVRmseRecorder_DAHBCLA_KNN, SVRpearsonRecorder_DAHBCLA_KNN, SVRspearmanRecorder_DAHBCLA_KNN,\
indexRecorder_DAHBCLA_KNN, TrainXDFRecorder_DAHBCLA_KNN, TrainYDFRecorder_DAHBCLA_KNN, \
SVMSVRFullXDFRecorder_DAHBCLA_KNN, SVMSVRFullYDFRecorder_DAHBCLA_KNN, scXRecorder_DAHBCLA_KNN, \
scDensityRecorder_DAHBCLA_KNN, scDistanceRecorder_DAHBCLA_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDAHBCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.5) #, 'sigmoid', 'rbf'

In [None]:
DAHBCLA_KNN = ComparisonResultsPlot([complexSVRR2Lists_DAHBCLA_KNN, SVRR2Lists_DAHBCLA_KNN], 'KNN',\
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBCLA_KNN, 'KNN')

In [None]:
DAHMCLA = processExcelFormat03('HelenaExpDAHMCLA.xlsx', 'Sheet1', 0.5)
heatmapGenerator(DAHMCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAHMCLA)

In [None]:
complexSVRSpearmanLists_DAHMCLA_KNN, complexSVRPearsonLists_DAHMCLA_KNN, \
complexSVRR2Lists_DAHMCLA_KNN, complexSVRMSELists_DAHMCLA_KNN, \
complexPredictedSVRSpearmanLists_DAHMCLA_KNN, complexPredictedSVRPearsonLists_DAHMCLA_KNN, \
complexPredictedSVRR2Lists_DAHMCLA_KNN, complexPredictedSVRMSELists_DAHMCLA_KNN, \
r2Recorder_DAHMCLA_KNN, mseRecorder_DAHMCLA_KNN, pearsonRecorder_DAHMCLA_KNN, spearmanRecorder_DAHMCLA_KNN, \
SVRSpearmanLists_DAHMCLA_KNN, SVRPearsonLists_DAHMCLA_KNN, SVRR2Lists_DAHMCLA_KNN, SVRMSELists_DAHMCLA_KNN, \
PredictedSVRSpearmanLists_DAHMCLA_KNN, PredictedSVRPearsonLists_DAHMCLA_KNN, \
PredictedSVRR2Lists_DAHMCLA_KNN, PredictedSVRMSELists_DAHMCLA_KNN, \
SVRr2Recorder_DAHMCLA_KNN, SVRmseRecorder_DAHMCLA_KNN, SVRpearsonRecorder_DAHMCLA_KNN, SVRspearmanRecorder_DAHMCLA_KNN,\
indexRecorder_DAHMCLA_KNN, TrainXDFRecorder_DAHMCLA_KNN, TrainYDFRecorder_DAHMCLA_KNN, \
SVMSVRFullXDFRecorder_DAHMCLA_KNN, SVMSVRFullYDFRecorder_DAHMCLA_KNN, scXRecorder_DAHMCLA_KNN, \
scDensityRecorder_DAHMCLA_KNN, scDistanceRecorder_DAHMCLA_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDAHMCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.5) #, 'sigmoid', 'rbf'

In [None]:
DAHMCLA_KNN = ComparisonResultsPlot([complexSVRR2Lists_DAHMCLA_KNN, SVRR2Lists_DAHMCLA_KNN], 'KNN',\
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHMCLA_KNN, 'KNN')

In [None]:
DAMCLA = processExcelFormat03('HelenaExpDAMCLA.xlsx', 'Sheet1', 0.5)
heatmapGenerator(DAMCLA, 'BinaryClass')

In [None]:
heatmapGenerator(DAMCLA)

In [None]:
complexSVRSpearmanLists_DAMCLA_KNN, complexSVRPearsonLists_DAMCLA_KNN, \
complexSVRR2Lists_DAMCLA_KNN, complexSVRMSELists_DAMCLA_KNN, \
complexPredictedSVRSpearmanLists_DAMCLA_KNN, complexPredictedSVRPearsonLists_DAMCLA_KNN, \
complexPredictedSVRR2Lists_DAMCLA_KNN, complexPredictedSVRMSELists_DAMCLA_KNN, \
r2Recorder_DAMCLA_KNN, mseRecorder_DAMCLA_KNN, pearsonRecorder_DAMCLA_KNN, spearmanRecorder_DAMCLA_KNN, \
SVRSpearmanLists_DAMCLA_KNN, SVRPearsonLists_DAMCLA_KNN, SVRR2Lists_DAMCLA_KNN, SVRMSELists_DAMCLA_KNN, \
PredictedSVRSpearmanLists_DAMCLA_KNN, PredictedSVRPearsonLists_DAMCLA_KNN, \
PredictedSVRR2Lists_DAMCLA_KNN, PredictedSVRMSELists_DAMCLA_KNN, \
SVRr2Recorder_DAMCLA_KNN, SVRmseRecorder_DAMCLA_KNN, SVRpearsonRecorder_DAMCLA_KNN, SVRspearmanRecorder_DAMCLA_KNN,\
indexRecorder_DAMCLA_KNN, TrainXDFRecorder_DAMCLA_KNN, TrainYDFRecorder_DAMCLA_KNN, \
SVMSVRFullXDFRecorder_DAMCLA_KNN, SVMSVRFullYDFRecorder_DAMCLA_KNN, scXRecorder_DAMCLA_KNN, \
scDensityRecorder_DAMCLA_KNN, scDistanceRecorder_DAMCLA_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDAMCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.5) #, 'sigmoid', 'rbf'

In [None]:
DAMCLA_KNN = ComparisonResultsPlot([complexSVRR2Lists_DAMCLA_KNN, SVRR2Lists_DAMCLA_KNN], 'KNN',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAMCLA_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_DACLA_KNN, complexSVRPearsonLists_DACLA_KNN, \
complexSVRR2Lists_DACLA_KNN, complexSVRMSELists_DACLA_KNN, \
complexPredictedSVRSpearmanLists_DACLA_KNN, complexPredictedSVRPearsonLists_DACLA_KNN, \
complexPredictedSVRR2Lists_DACLA_KNN, complexPredictedSVRMSELists_DACLA_KNN, \
r2Recorder_DACLA_KNN, mseRecorder_DACLA_KNN, pearsonRecorder_DACLA_KNN, spearmanRecorder_DACLA_KNN, \
SVRSpearmanLists_DACLA_KNN, SVRPearsonLists_DACLA_KNN, SVRR2Lists_DACLA_KNN, SVRMSELists_DACLA_KNN, \
PredictedSVRSpearmanLists_DACLA_KNN, PredictedSVRPearsonLists_DACLA_KNN, \
PredictedSVRR2Lists_DACLA_KNN, PredictedSVRMSELists_DACLA_KNN, \
SVRr2Recorder_DACLA_KNN, SVRmseRecorder_DACLA_KNN, SVRpearsonRecorder_DACLA_KNN, SVRspearmanRecorder_DACLA_KNN,\
indexRecorder_DACLA_KNN, TrainXDFRecorder_DACLA_KNN, TrainYDFRecorder_DACLA_KNN, \
SVMSVRFullXDFRecorder_DACLA_KNN, SVMSVRFullYDFRecorder_DACLA_KNN, scXRecorder_DACLA_KNN, \
scDensityRecorder_DACLA_KNN, scDistanceRecorder_DACLA_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDACLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.8) 

In [None]:
DACLA_KNN = ComparisonResultsPlot([complexSVRR2Lists_DACLA_KNN, SVRR2Lists_DACLA_KNN], 'KNN', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DACLA_KNN, 'KNN')

In [None]:
scatterComparisonPlot(DACLA_KNN)

In [None]:
complexSVRSpearmanLists_DAHBSUL_KNN, complexSVRPearsonLists_DAHBSUL_KNN, \
complexSVRR2Lists_DAHBSUL_KNN, complexSVRMSELists_DAHBSUL_KNN, \
complexPredictedSVRSpearmanLists_DAHBSUL_KNN, complexPredictedSVRPearsonLists_DAHBSUL_KNN, \
complexPredictedSVRR2Lists_DAHBSUL_KNN, complexPredictedSVRMSELists_DAHBSUL_KNN, \
r2Recorder_DAHBSUL_KNN, mseRecorder_DAHBSUL_KNN, pearsonRecorder_DAHBSUL_KNN, spearmanRecorder_DAHBSUL_KNN, \
SVRSpearmanLists_DAHBSUL_KNN, SVRPearsonLists_DAHBSUL_KNN, SVRR2Lists_DAHBSUL_KNN, SVRMSELists_DAHBSUL_KNN, \
PredictedSVRSpearmanLists_DAHBSUL_KNN, PredictedSVRPearsonLists_DAHBSUL_KNN, \
PredictedSVRR2Lists_DAHBSUL_KNN, PredictedSVRMSELists_DAHBSUL_KNN, \
SVRr2Recorder_DAHBSUL_KNN, SVRmseRecorder_DAHBSUL_KNN, SVRpearsonRecorder_DAHBSUL_KNN, SVRspearmanRecorder_DAHBSUL_KNN,\
indexRecorder_DAHBSUL_KNN, TrainXDFRecorder_DAHBSUL_KNN, TrainYDFRecorder_DAHBSUL_KNN, \
SVMSVRFullXDFRecorder_DAHBSUL_KNN, SVMSVRFullYDFRecorder_DAHBSUL_KNN, scXRecorder_DAHBSUL_KNN, \
scDensityRecorder_DAHBSUL_KNN, scDistanceRecorder_DAHBSUL_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDAHBSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.4) 

In [None]:
DAHBSUL_KNN = ComparisonResultsPlot([complexSVRR2Lists_DAHBSUL_KNN, SVRR2Lists_DAHBSUL_KNN],'KNN', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DAHBSUL_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_DABSUL_KNN, complexSVRPearsonLists_DABSUL_KNN, \
complexSVRR2Lists_DABSUL_KNN, complexSVRMSELists_DABSUL_KNN, \
complexPredictedSVRSpearmanLists_DABSUL_KNN, complexPredictedSVRPearsonLists_DABSUL_KNN, \
complexPredictedSVRR2Lists_DABSUL_KNN, complexPredictedSVRMSELists_DABSUL_KNN, \
r2Recorder_DABSUL_KNN, mseRecorder_DABSUL_KNN, pearsonRecorder_DABSUL_KNN, spearmanRecorder_DABSUL_KNN, \
SVRSpearmanLists_DABSUL_KNN, SVRPearsonLists_DABSUL_KNN, SVRR2Lists_DABSUL_KNN, SVRMSELists_DABSUL_KNN, \
PredictedSVRSpearmanLists_DABSUL_KNN, PredictedSVRPearsonLists_DABSUL_KNN, \
PredictedSVRR2Lists_DABSUL_KNN, PredictedSVRMSELists_DABSUL_KNN, \
SVRr2Recorder_DABSUL_KNN, SVRmseRecorder_DABSUL_KNN, SVRpearsonRecorder_DABSUL_KNN, SVRspearmanRecorder_DABSUL_KNN,\
indexRecorder_DABSUL_KNN, TrainXDFRecorder_DABSUL_KNN, TrainYDFRecorder_DABSUL_KNN, \
SVMSVRFullXDFRecorder_DABSUL_KNN, SVMSVRFullYDFRecorder_DABSUL_KNN, scXRecorder_DABSUL_KNN, \
scDensityRecorder_DABSUL_KNN, scDistanceRecorder_DABSUL_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDABSUL.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly',],\
                     0.1) #, 'sigmoid', 'rbf'

In [None]:
DABSUL_KNN = ComparisonResultsPlot([complexSVRR2Lists_DABSUL_KNN, SVRR2Lists_DABSUL_KNN], 'KNN', position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABSUL_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_DABCLA_KNN, complexSVRPearsonLists_DABCLA_KNN, \
complexSVRR2Lists_DABCLA_KNN, complexSVRMSELists_DABCLA_KNN, \
complexPredictedSVRSpearmanLists_DABCLA_KNN, complexPredictedSVRPearsonLists_DABCLA_KNN, \
complexPredictedSVRR2Lists_DABCLA_KNN, complexPredictedSVRMSELists_DABCLA_KNN, \
r2Recorder_DABCLA_KNN, mseRecorder_DABCLA_KNN, pearsonRecorder_DABCLA_KNN, spearmanRecorder_DABCLA_KNN, \
SVRSpearmanLists_DABCLA_KNN, SVRPearsonLists_DABCLA_KNN, SVRR2Lists_DABCLA_KNN, SVRMSELists_DABCLA_KNN, \
PredictedSVRSpearmanLists_DABCLA_KNN, PredictedSVRPearsonLists_DABCLA_KNN, \
PredictedSVRR2Lists_DABCLA_KNN, PredictedSVRMSELists_DABCLA_KNN, \
SVRr2Recorder_DABCLA_KNN, SVRmseRecorder_DABCLA_KNN, SVRpearsonRecorder_DABCLA_KNN, SVRspearmanRecorder_DABCLA_KNN,\
indexRecorder_DABCLA_KNN, TrainXDFRecorder_DABCLA_KNN, TrainYDFRecorder_DABCLA_KNN, \
SVMSVRFullXDFRecorder_DABCLA_KNN, SVMSVRFullYDFRecorder_DABCLA_KNN, scXRecorder_DABCLA_KNN, \
scDensityRecorder_DABCLA_KNN, scDistanceRecorder_DABCLA_KNN = \
ComparisonMultipleRun(30, 'HelenaExpDABCLA.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly'],\
                     0.3) #, 'sigmoid', 'rbf'

In [None]:
DABCLA_KNN = ComparisonResultsPlot([complexSVRR2Lists_DABCLA_KNN, SVRR2Lists_DABCLA_KNN], 'KNN',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(DABCLA_KNN, 'KNN')

In [None]:
TFM = processExcelFormat03('HelenaExpTFMdensity.xlsx', 'Sheet1', 0.6)
heatmapGenerator(TFM, 'BinaryClass')

In [None]:
heatmapGenerator(TFM)

In [None]:
complexSVRSpearmanLists_ExpTFM_KNN, complexSVRPearsonLists_ExpTFM_KNN, \
complexSVRR2Lists_ExpTFM_KNN, complexSVRMSELists_ExpTFM_KNN, \
complexPredictedSVRSpearmanLists_ExpTFM_KNN, complexPredictedSVRPearsonLists_ExpTFM_KNN, \
complexPredictedSVRR2Lists_ExpTFM_KNN, complexPredictedSVRMSELists_ExpTFM_KNN, \
r2Recorder_ExpTFM_KNN, mseRecorder_ExpTFM_KNN, pearsonRecorder_ExpTFM_KNN, spearmanRecorder_ExpTFM_KNN, \
SVRSpearmanLists_ExpTFM_KNN, SVRPearsonLists_ExpTFM_KNN, SVRR2Lists_ExpTFM_KNN, SVRMSELists_ExpTFM_KNN, \
PredictedSVRSpearmanLists_ExpTFM_KNN, PredictedSVRPearsonLists_ExpTFM_KNN, \
PredictedSVRR2Lists_ExpTFM_KNN, PredictedSVRMSELists_ExpTFM_KNN, \
SVRr2Recorder_ExpTFM_KNN, SVRmseRecorder_ExpTFM_KNN, SVRpearsonRecorder_ExpTFM_KNN, SVRspearmanRecorder_ExpTFM_KNN,\
indexRecorder_ExpTFM_KNN, TrainXDFRecorder_ExpTFM_KNN, TrainYDFRecorder_ExpTFM_KNN, \
SVMSVRFullXDFRecorder_ExpTFM_KNN, SVMSVRFullYDFRecorder_ExpTFM_KNN, scXRecorder_ExpTFM_KNN, \
scDensityRecorder_ExpTFM_KNN, scDistanceRecorder_ExpTFM_KNN = \
ComparisonMultipleRun(30, 'HelenaExpTFMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6)

In [None]:
ExpTFM_KNN = ComparisonResultsPlot([complexSVRR2Lists_ExpTFM_KNN, SVRR2Lists_ExpTFM_KNN], 'KNN',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFM_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_ExpTFHM_KNN, complexSVRPearsonLists_ExpTFHM_KNN, \
complexSVRR2Lists_ExpTFHM_KNN, complexSVRMSELists_ExpTFHM_KNN, \
complexPredictedSVRSpearmanLists_ExpTFHM_KNN, complexPredictedSVRPearsonLists_ExpTFHM_KNN, \
complexPredictedSVRR2Lists_ExpTFHM_KNN, complexPredictedSVRMSELists_ExpTFHM_KNN, \
r2Recorder_ExpTFHM_KNN, mseRecorder_ExpTFHM_KNN, pearsonRecorder_ExpTFHM_KNN, spearmanRecorder_ExpTFHM_KNN, \
SVRSpearmanLists_ExpTFHM_KNN, SVRPearsonLists_ExpTFHM_KNN, SVRR2Lists_ExpTFHM_KNN, SVRMSELists_ExpTFHM_KNN, \
PredictedSVRSpearmanLists_ExpTFHM_KNN, PredictedSVRPearsonLists_ExpTFHM_KNN, \
PredictedSVRR2Lists_ExpTFHM_KNN, PredictedSVRMSELists_ExpTFHM_KNN, \
SVRr2Recorder_ExpTFHM_KNN, SVRmseRecorder_ExpTFHM_KNN, SVRpearsonRecorder_ExpTFHM_KNN, SVRspearmanRecorder_ExpTFHM_KNN,\
indexRecorder_ExpTFHM_KNN, TrainXDFRecorder_ExpTFHM_KNN, TrainYDFRecorder_ExpTFHM_KNN, \
SVMSVRFullXDFRecorder_ExpTFHM_KNN, SVMSVRFullYDFRecorder_ExpTFHM_KNN, scXRecorder_ExpTFHM_KNN, \
scDensityRecorder_ExpTFHM_KNN, scDistanceRecorder_ExpTFHM_KNN = \
ComparisonMultipleRun(30, 'HelenaExpTFHMdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly'],\
                     0.7)

In [None]:
ExpTFHM_KNN = ComparisonResultsPlot([complexSVRR2Lists_ExpTFHM_KNN, SVRR2Lists_ExpTFHM_KNN], 'KNN',\
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFHM_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_ExpTFB_KNN, complexSVRPearsonLists_ExpTFB_KNN, \
complexSVRR2Lists_ExpTFB_KNN, complexSVRMSELists_ExpTFB_KNN, \
complexPredictedSVRSpearmanLists_ExpTFB_KNN, complexPredictedSVRPearsonLists_ExpTFB_KNN, \
complexPredictedSVRR2Lists_ExpTFB_KNN, complexPredictedSVRMSELists_ExpTFB_KNN, \
r2Recorder_ExpTFB_KNN, mseRecorder_ExpTFB_KNN, pearsonRecorder_ExpTFB_KNN, spearmanRecorder_ExpTFB_KNN, \
SVRSpearmanLists_ExpTFB_KNN, SVRPearsonLists_ExpTFB_KNN, SVRR2Lists_ExpTFB_KNN, SVRMSELists_ExpTFB_KNN, \
PredictedSVRSpearmanLists_ExpTFB_KNN, PredictedSVRPearsonLists_ExpTFB_KNN, \
PredictedSVRR2Lists_ExpTFB_KNN, PredictedSVRMSELists_ExpTFB_KNN, \
SVRr2Recorder_ExpTFB_KNN, SVRmseRecorder_ExpTFB_KNN, SVRpearsonRecorder_ExpTFB_KNN, SVRspearmanRecorder_ExpTFB_KNN,\
indexRecorder_ExpTFB_KNN, TrainXDFRecorder_ExpTFB_KNN, TrainYDFRecorder_ExpTFB_KNN, \
SVMSVRFullXDFRecorder_ExpTFB_KNN, SVMSVRFullYDFRecorder_ExpTFB_KNN, scXRecorder_ExpTFB_KNN, \
scDensityRecorder_ExpTFB_KNN, scDistanceRecorder_ExpTFB_KNN = \
ComparisonMultipleRun(30, 'HelenaExpTFBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly'],\
                     0.4)

In [None]:
ExpTFB_KNN = ComparisonResultsPlot([complexSVRR2Lists_ExpTFB_KNN, SVRR2Lists_ExpTFB_KNN], 'KNN',\
                                   position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFB_KNN, 'KNN')

In [None]:
complexSVRSpearmanLists_ExpTFHB_KNN, complexSVRPearsonLists_ExpTFHB_KNN, \
complexSVRR2Lists_ExpTFHB_KNN, complexSVRMSELists_ExpTFHB_KNN, \
complexPredictedSVRSpearmanLists_ExpTFHB_KNN, complexPredictedSVRPearsonLists_ExpTFHB_KNN, \
complexPredictedSVRR2Lists_ExpTFHB_KNN, complexPredictedSVRMSELists_ExpTFHB_KNN, \
r2Recorder_ExpTFHB_KNN, mseRecorder_ExpTFHB_KNN, pearsonRecorder_ExpTFHB_KNN, spearmanRecorder_ExpTFHB_KNN, \
SVRSpearmanLists_ExpTFHB_KNN, SVRPearsonLists_ExpTFHB_KNN, SVRR2Lists_ExpTFHB_KNN, SVRMSELists_ExpTFHB_KNN, \
PredictedSVRSpearmanLists_ExpTFHB_KNN, PredictedSVRPearsonLists_ExpTFHB_KNN, \
PredictedSVRR2Lists_ExpTFHB_KNN, PredictedSVRMSELists_ExpTFHB_KNN, \
SVRr2Recorder_ExpTFHB_KNN, SVRmseRecorder_ExpTFHB_KNN, SVRpearsonRecorder_ExpTFHB_KNN, SVRspearmanRecorder_ExpTFHB_KNN,\
indexRecorder_ExpTFHB_KNN, TrainXDFRecorder_ExpTFHB_KNN, TrainYDFRecorder_ExpTFHB_KNN, \
SVMSVRFullXDFRecorder_ExpTFHB_KNN, SVMSVRFullYDFRecorder_ExpTFHB_KNN, scXRecorder_ExpTFHB_KNN, \
scDensityRecorder_ExpTFHB_KNN, scDistanceRecorder_ExpTFHB_KNN = \
ComparisonMultipleRun(30, 'HelenaExpTFHBdensity.xlsx',  3, 'Sheet1', [10, 20, 30,40,50], 0, ['poly', 'rbf'],\
                     0.6)

In [None]:
ExpTFHB_KNN = ComparisonResultsPlot([complexSVRR2Lists_ExpTFHB_KNN, SVRR2Lists_ExpTFHB_KNN], 'KNN',\
                                    position = 'lower right', \
                              xname = '# Training data', yname = '$R^2$', show = 1, \
                              ylimit = [0, 1], yline = 0.75)

In [None]:
statsPlot(ExpTFHB_KNN, 'KNN')