In [49]:
# Import required packages
from __future__ import print_function
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

from sklearn.svm import LinearSVC, NuSVC, SVC

In [50]:
# Setup pandas options
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

# Setup numpy options
np.set_printoptions(precision=3, suppress=True)

# Get the data
PT_data = pd.read_excel("PTResults_trimmed.xlsx")

In [51]:
## TRAINING SVM  ##

target = 0.617035028915481
X = PT_data.drop(target, axis=1) # Just preparing the data
y = PT_data[target]

training_features_vector = []
testing_features_vector = []
training_target_vector = []
testing_target_vector = []

M = 1000 # number of shuffles
test_size = 30 # number of cells to be tested out of 100

#Splitting the data into training and testing and storing to reuze same seeds
# for the different algorithms
for i in range(M):
    
    training_features, testing_features, training_target, testing_target = train_test_split(
        X, y, test_size=1/test_size, shuffle=True)
    #store
    training_features_vector.append(training_features)
    testing_features_vector.append(testing_features)
    training_target_vector.append(training_target)
    testing_target_vector.append(testing_target)
    
print(training_features.describe())

       Temperture[K]  AoA[o]  Mach  MVD[mum]  rho[kg/m3]
count           96.0    96.0  96.0      96.0        96.0
mean           252.1     5.6   0.6       0.0         0.8
std             12.0     3.2   0.1       0.0         0.2
min            233.2     0.1   0.3       0.0         0.4
25%            242.3     2.7   0.4       0.0         0.6
50%            251.2     6.2   0.6       0.0         0.8
75%            263.0     8.5   0.7       0.0         1.0
max            272.7     9.9   0.8       0.0         1.2


Linear SVM

In [52]:
# Linear Support Vector Classification
MSE_linear = 0
VarMSE_linear = 0
for i in range(M):
    training_features = training_features_vector[i]
    testing_features = testing_features_vector[i]
    training_target = training_target_vector[i]
    testing_target = testing_target_vector[i]

    #Training with Linear SVC
    model = LinearSVC(random_state=0, tol=1e-5)
    model.fit(training_features, training_target)

    #Comparing prediction with testing values
    prediction = model.predict(testing_features)
    
    #Get means and std
    MSE = mean_squared_error(prediction, testing_target)
    MSE_linear += MSE
    stdMSE_linear += MSE*MSE
    
MSE_linear /= M
VarMSE_linear -= M*MSE_linear**2
VarMSE_linear /=(M-1)
count_linear = test_size*MSE_linear
countstd_linear = test_size*stdMSE_linear
print('Average number of miss classified cells out of ' + str(M) + ' samples: ' + 
      str(count_linear) + ' +- ' + str(countstd_linear))

Average number of miss classified cells out of 1000 samples: 7.7925 +- 1.84586899399


Nu-Support Vector Classification

In [53]:
# Nu-Support Vector Classification
MSE_Nu = 0
stdMSE_Nu = 0
for i in range(M):
    training_features = training_features_vector[i]
    testing_features = testing_features_vector[i]
    training_target = training_target_vector[i]
    testing_target = testing_target_vector[i]

    #Training with Nu-SVC
    model = NuSVC(gamma=0.001)
    model.fit(training_features, training_target)
    
    #Comparing prediction with testing values
    prediction = model.predict(testing_features)
    
    #Get means and std
    MSE = mean_squared_error(prediction, testing_target)
    MSE_Nu += MSE
    stdMSE_Nu += MSE*MSE
    
MSE_Nu /= M
stdMSE_Nu -= M*MSE_Nu**2
stdMSE_Nu /=(M-1)
count_Nu = test_size*MSE_Nu
countstd_Nu = test_size*stdMSE_Nu
print('Average number of miss classified cells out of ' + str(M) + ' samples: ' + 
      str(count_Nu) + ' +- ' + str(countstd_Nu))

Average number of miss classified cells out of 1000 samples: 5.0325 +- 0.981154279279


C-Support Vector Classification

In [54]:
#Training with C-Support Vector Classification
MSE_svc = 0
stdMSE_svc = 0
for i in range(M):
    training_features = training_features_vector[i]
    testing_features = testing_features_vector[i]
    training_target = training_target_vector[i]
    testing_target = testing_target_vector[i]

    #Training with Nu-SVC
    model = SVC(gamma='auto')
    model.fit(training_features, training_target)
    
    #Comparing prediction with testing values
    prediction = model.predict(testing_features)
    
    #Get means and std
    MSE = mean_squared_error(prediction, testing_target)
    MSE_svc += MSE
    stdMSE_svc += MSE*MSE
    
MSE_svc /= M
stdMSE_svc -= M*MSE_svc**2
stdMSE_svc/=(M-1)
count_svc = test_size*MSE_svc
countstd_svc = test_size*stdMSE_svc
print('Average number of miss classified cells out of ' + str(M) + ' samples: ' + 
      str(str(count_svc) + ' +- ' + str(countstd_svc)))

Average number of miss classified cells out of 1000 samples: 7.7325 +- 1.29511824324
