In [1]:
# Import required packages
from __future__ import print_function
import pandas as pd
import numpy as np
from random import randint
import matplotlib.pyplot as plt

from IPython.display import display, Math #


from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, recall_score, precision_score, f1_score
from sklearn.model_selection import cross_val_score, KFold
from sklearn.utils import shuffle

from sklearn.svm import LinearSVC, NuSVC, SVC, OneClassSVM

In [2]:
# Setup pandas options
pd.options.display.max_rows = 5
pd.options.display.max_columns = 10
pd.options.display.float_format = '{:.3f}'.format

In [3]:
## READING DATA ##

PT_data = pd.read_excel("PTData01.xlsx")
PT_data = shuffle(PT_data)
targets = PT_data.iloc[:,5:]
targets_headers = np.loadtxt("abscissa.txt")


## DISTRIBUTION OF WATER CONTENT
Data_Cell = pd.read_excel("Data_Colleff_Entire.xlsx") 
meanWater = np.zeros(len(targets_headers))
for h in range(len(targets_headers)):
    meanWater[h] = np.mean(Data_Cell.values[:,h+5])

In [4]:
def write_files(errors, curves_labels, metric, algthm_type, extra_labels=None):
    
    # write txt files
    for i in range(len(errors)):
        file_name = (str(algthm_type)+'_gamma_'+str(curves_labels[i])+'_'+\
                 str(metric)+'.txt')
        np.savetxt('Data/'+str(file_name), errors[i])

In [138]:
##################################
#CLASSIFIER
##################################


## TRAINING THE DECISION TREES ##
def my_training(my_data, my_model):
    ## SETTING UP VARIABLES ##
    abscissa = np.loadtxt("abscissa.txt")
    ncells = len(abscissa) # Number of abscissa's points

    #PRECISION
    prec_mean = np.zeros(ncells)
    vector_prec_std = np.zeros(ncells)
    error_integral_prec = 0.

    #RECALL
    rec_mean  = np.zeros(ncells)
    vector_rec_std = np.zeros(ncells)
    error_integral_rec = 0.
    
    
    for i in range(ncells):
        prec_std = 0
        rec_std = 0
        
        if i%20==0: print(str(i)+' of '+str(ncells))

        target = my_data.iloc[:,5+i]  # PT_data_1000.iloc[:,5:181]
        X = my_data.iloc[:,0:5].values # PT_data_1000.drop(target, axis=1)  # Remove all columns that are target     
        y = target.values
        
        
        
        kf = KFold(n_splits=10, shuffle=False)
        kf.get_n_splits(X)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            if np.sum(np.asarray(y_train)) == 0 or np.sum(np.asarray(y_train)) == len(y_train):

                # if trn_target = [0,0,...,0,0] then metric = 0
                if np.sum(np.asarray(y_train)) == 0:

                    recall = 0
                    precision = 0

                # if trn_target = [1,1,...,1,1] then metric = 1
                if np.sum(np.asarray(y_train)) == len(y_train):
                    recall = 1
                    precision = 1

            else:

                #Fit model
                model.fit(X_train, y_train)

                #Comparing prediction with testing values
                prediction = model.predict(X_test)


                # Making predictions on the testing features set with precision metric
                precision = precision_score(y_test, prediction)


                # Making predictions on the testing features set with recall metric
                recall = recall_score(y_test, prediction)
                
                
            
            prec_mean[i] += precision
            prec_std += precision*precision
            
            rec_mean[i] += recall
            rec_std += recall*recall
            
            
        prec_mean[i] /= 10
        prec_std -= 10*prec_mean[i]*prec_mean[i]
        prec_std /= 9
        vector_prec_std[i] = prec_std
        
        rec_mean[i] /= 10
        rec_std -= 10*rec_mean[i]*rec_mean[i]
        rec_std /= 9
        vector_rec_std[i] = rec_std
        
        # computing integral
        if i>0:
            dx = abscissa[i] - abscissa[i-1]
            error_integral_rec += rec_mean[i]*dx
            error_integral_prec += prec_mean[i]*dx

    rec_mean = rec_mean * 100
    vector_rec_std = vector_rec_std * 100
    error_integral_rec = error_integral_rec * 100
    
    prec_mean = prec_mean * 100
    vector_prec_std = vector_prec_std * 100
    error_integral_prec = error_integral_prec * 100


    return rec_mean, prec_mean, error_integral_rec, \
            error_integral_prec, vector_rec_std, vector_prec_std




In [161]:
gamma_vector = [0.001, 0.01, 0.1, 1]
C_vector = [0.001, 0.01, 0.1, 1]
ngamma = len(gamma_vector)
nC = len(C_vector)

# builds vector_Precision and vector_Recall
vector_Recall = []
vector_Precision = []
vector_Recall_std = []
vector_Precision_std = []
degree = 0
kernel='poly'
for j in range(nC):
    vector_integral_rec = []
    vector_integral_prec = []
    c = C_vector[j]
    print(r'[C] ' + str(c))
    for i in range(ngamma):
        gamma = gamma_vector[i]
        label = str(gamma)
        print(r'[\gamma] ' + str(gamma))
        model = SVC(C=c, kernel=kernel, degree=degree, gamma=gamma)
        Recall, Precision, error_integral_rec, error_integral_prec, \
                    prec_std, rec_std = my_training(PT_data, model)
        vector_Recall.append(Recall)
        vector_Precision.append(Precision)
        vector_Recall_std.append(rec_std)
        vector_Precision_std.append(prec_std)
        vector_integral_rec.append(error_integral_rec)
        vector_integral_prec.append(error_integral_prec)

print('.    ' +str(vector_integral_rec))
print('.    ' +str(vector_integral_prec))

[C] 0.001
[\gamma] 0.001
0 of 176
20 of 176
40 of 176


  'precision', 'predicted', average, warn_for)


60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 0.01
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 0.1
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 1
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[C] 0.01
[\gamma] 0.001
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 0.01
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 0.1
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 1
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[C] 0.1
[\gamma] 0.001
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of 176
120 of 176
140 of 176
160 of 176
[\gamma] 0.01
0 of 176
20 of 176
40 of 176
60 of 176
80 of 176
100 of

I will write the files in .txt so that I don't loose them. See the other notebook for plotting in Data/Plotting.

In [162]:
# This requires creating a directory with the name 'Plots'
ngamma = len(gamma_vector)
nC = len(C_vector)

errors_vector = [vector_Precision, vector_Recall]
errors_metric = ['precision', 'recall']

# set of plots 1
for m in range(2):
    metric = errors_metric[m]
    for j in range(nC):
        c = C_vector[j]
        algthm_type = 'SVC_'+str(kernel)+'_'+str(degree)+'_c_' + str(C_vector[j])
        print(algthm_type)
        errors = []
        curves_labels = []
        print(r'[C] ' + str(c))
        for i in range(ngamma):
            g = gamma_vector[i]
            label = str(g)
            curves_labels.append(label)
            errors.append(errors_vector[m][ngamma*j + i])
        print(curves_labels)
        write_files(errors, curves_labels, metric, algthm_type)
        

SVC_poly_0_c_0.001
[C] 0.001
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_0.01
[C] 0.01
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_0.1
[C] 0.1
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_1
[C] 1
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_0.001
[C] 0.001
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_0.01
[C] 0.01
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_0.1
[C] 0.1
['0.001', '0.01', '0.1', '1']
SVC_poly_0_c_1
[C] 1
['0.001', '0.01', '0.1', '1']


In [163]:
# This requires creating a directory with the name 'Plots'
ngamma = len(gamma_vector)
nC = len(C_vector)

errors_vector = [vector_Precision, vector_Recall]
std_vector = [vector_Precision_std, vector_Recall_std]

errors_metric = ['precision', 'recall']

# set of plots 1
for m in range(2):
    metric = errors_metric[m]
    for j in range(nC):
        c = C_vector[j]
        algthm_type = 'std_SVC_'+str(kernel)+'_'+str(degree)+'_c_' + str(C_vector[j])
        print(algthm_type)
        stds = []
        curves_labels = []
        print(r'[C] ' + str(c))
        for i in range(ngamma):
            g = gamma_vector[i]
            label = str(g)
            curves_labels.append(label)
            stds.append(std_vector[m][ngamma*j + i])
        print(curves_labels)
        write_files(stds, curves_labels, metric, algthm_type)
        




std_SVC_poly_0_c_0.001
[C] 0.001
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_0.01
[C] 0.01
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_0.1
[C] 0.1
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_1
[C] 1
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_0.001
[C] 0.001
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_0.01
[C] 0.01
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_0.1
[C] 0.1
['0.001', '0.01', '0.1', '1']
std_SVC_poly_0_c_1
[C] 1
['0.001', '0.01', '0.1', '1']
