#To find the best SVM model


In [1]:
import pandas as pd
from sklearn import preprocessing
from IPython.display import display, HTML

df_loan20K=pd.read_csv('Data_Loans_20K.csv', header=0)

print(df_loan20K.shape)

cols_loan20K = df_loan20K.columns

print('Column Name, DataTypes, MissingValues in Loan 20K CSV\n')
for i in cols_loan20K:
    print(i,',', df_loan20K[i].dtype , ',', df_loan20K[i].isnull().any())

##If Yes, fill in missing values by mean values or most frequent nominal values.

df_loan20K["Credit Score"].fillna(df_loan20K["Credit Score"].mean(), inplace=True)
df_loan20K["Annual Income"].fillna(df_loan20K["Annual Income"].mean(), inplace=True)
df_loan20K["Years in current job"].fillna(df_loan20K["Years in current job"].mode().iloc[0], inplace=True)
df_loan20K["Months since last delinquent"].fillna(df_loan20K["Months since last delinquent"].mean(), inplace=True)
df_loan20K["Bankruptcies"].fillna(df_loan20K["Bankruptcies"].mean(), inplace=True)
df_loan20K["Tax Liens"].fillna(df_loan20K["Tax Liens"].mean(), inplace=True)

print('\nColumn Name, DataTypes, MissingValues after filling with Mean and Frequent repeated nominal value in Loan 20K CSV\n')
for i in cols_loan20K:
    print(i,',', df_loan20K[i].dtype , ',', df_loan20K[i].isnull().any())    
    
# encode labels
y = df_loan20K['Term'] # define label as nominal values
le = preprocessing.LabelEncoder()
le.fit(y)
y_encoded = le.transform(y) # encode nominal labels to integers #####################################
df_loan20K['Term'] = y_encoded

(20000, 17)
Column Name, DataTypes, MissingValues in Loan 20K CSV

Loan Status , object , False
Current Loan Amount , int64 , False
Term , object , False
Credit Score , float64 , True
Annual Income , float64 , True
Years in current job , object , True
Home Ownership , object , False
Purpose , object , False
Monthly Debt , float64 , False
Years of Credit History , float64 , False
Months since last delinquent , float64 , True
Number of Open Accounts , int64 , False
Number of Credit Problems , int64 , False
Current Credit Balance , int64 , False
Maximum Open Credit , int64 , False
Bankruptcies , float64 , True
Tax Liens , float64 , True

Column Name, DataTypes, MissingValues after filling with Mean and Frequent repeated nominal value in Loan 20K CSV

Loan Status , object , False
Current Loan Amount , int64 , False
Term , object , False
Credit Score , float64 , False
Annual Income , float64 , False
Years in current job , object , False
Home Ownership , object , False
Purpose , object , Fal

In [2]:
# Data preprocessing ################################################################################
print('Column Datatypes:\n',df_loan20K.dtypes)

# convert all nominal variables to binary variables
df_num=df_loan20K.copy(deep=True) 
# create new binary columns
df_dummies=pd.get_dummies(df_num[['Loan Status','Years in current job','Home Ownership','Purpose']])
# add them to dataframe
df_num=df_num.join(df_dummies)
# drop original columns
df_num=df_num.drop('Loan Status',axis=1)
df_num=df_num.drop('Years in current job',axis=1)
df_num=df_num.drop('Home Ownership', axis=1)
df_num=df_num.drop('Purpose', axis=1)

# drop extra binary columns, since we only need N-1 binary columns
df_num=df_num.drop('Loan Status_Charged Off', axis=1)
df_num=df_num.drop('Years in current job_10+ years', axis=1)
df_num=df_num.drop('Home Ownership_Own Home', axis=1)
df_num=df_num.drop('Purpose_wedding', axis=1)

# print out and display dataframe as table in HTML
display(HTML(df_num.head(10).to_html()))

# standarized data
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
df_num_std = df_num.copy(deep=True) 
x_features = df_num_std.loc[:, df_num_std.columns != 'Term']
cols = x_features.columns
df_num_std = pd.DataFrame(scaler.fit_transform(x_features), columns = cols)
df_num_std['Term'] = y_encoded
display('df_num_std:',HTML(df_num_std.head(10).to_html()))

# MinMax data
scaler = MinMaxScaler(feature_range=(1,5))
df_num_minmax = df_num.copy(deep=True) 
x_features = df_num_minmax.loc[:, df_num_std.columns != 'Term']
cols = x_features.columns
df_num_minmax = pd.DataFrame(scaler.fit_transform(x_features), columns = cols)
df_num_minmax['Term'] = y_encoded
display('df_num_minmax:',HTML(df_num_minmax.head(10).to_html()))



Column Datatypes:
 Loan Status                      object
Current Loan Amount               int64
Term                              int32
Credit Score                    float64
Annual Income                   float64
Years in current job             object
Home Ownership                   object
Purpose                          object
Monthly Debt                    float64
Years of Credit History         float64
Months since last delinquent    float64
Number of Open Accounts           int64
Number of Credit Problems         int64
Current Credit Balance            int64
Maximum Open Credit               int64
Bankruptcies                    float64
Tax Liens                       float64
dtype: object


Unnamed: 0,Current Loan Amount,Term,Credit Score,Annual Income,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens,Loan Status_Fully Paid,Years in current job_1 year,Years in current job_2 years,Years in current job_3 years,Years in current job_4 years,Years in current job_5 years,Years in current job_6 years,Years in current job_7 years,Years in current job_8 years,Years in current job_9 years,Years in current job_< 1 year,Home Ownership_HaveMortgage,Home Ownership_Home Mortgage,Home Ownership_Rent,Purpose_Business Loan,Purpose_Buy House,Purpose_Buy a Car,Purpose_Debt Consolidation,Purpose_Educational Expenses,Purpose_Home Improvements,Purpose_Medical Bills,Purpose_Other,Purpose_Take a Trip,Purpose_major_purchase,Purpose_moving,Purpose_other,Purpose_renewable_energy,Purpose_small_business,Purpose_vacation
0,445412,1,709.0,1167493.0,5214.74,17.2,35.20179,6,1,228190,416746,1.0,0.0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1,262328,1,1094.310471,1376165.0,33295.98,21.1,8.0,35,0,229976,850784,0.0,0.0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,99999999,1,741.0,2231892.0,29200.53,14.9,29.0,18,1,297996,750090,0.0,0.0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
3,347666,0,721.0,806949.0,8741.9,12.0,35.20179,9,0,256329,386958,0.0,0.0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,176220,1,1094.310471,1376165.0,20639.7,6.1,35.20179,15,0,253460,427174,0.0,0.0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,206602,1,7290.0,896857.0,16367.74,17.3,35.20179,6,0,215308,272448,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
6,217646,1,730.0,1184194.0,10855.08,19.6,10.0,13,1,122170,272052,1.0,0.0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
7,648714,0,1094.310471,1376165.0,14806.13,8.2,8.0,15,0,193306,864204,0.0,0.0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
8,548746,1,678.0,2559110.0,18660.28,22.6,33.0,4,0,437171,555038,0.0,0.0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
9,215952,1,739.0,1454735.0,39277.75,13.9,35.20179,20,0,669560,1021460,0.0,0.0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0


'df_num_std:'

Unnamed: 0,Current Loan Amount,Credit Score,Annual Income,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens,Loan Status_Fully Paid,Years in current job_1 year,Years in current job_2 years,Years in current job_3 years,Years in current job_4 years,Years in current job_5 years,Years in current job_6 years,Years in current job_7 years,Years in current job_8 years,Years in current job_9 years,Years in current job_< 1 year,Home Ownership_HaveMortgage,Home Ownership_Home Mortgage,Home Ownership_Rent,Purpose_Business Loan,Purpose_Buy House,Purpose_Buy a Car,Purpose_Debt Consolidation,Purpose_Educational Expenses,Purpose_Home Improvements,Purpose_Medical Bills,Purpose_Other,Purpose_Take a Trip,Purpose_major_purchase,Purpose_moving,Purpose_other,Purpose_renewable_energy,Purpose_small_business,Purpose_vacation,Term
0,-0.351545,-0.284234,-0.256844,-1.082788,-0.141489,0.0,-1.023607,1.748786,-0.161907,-0.05447,2.485469,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,4.633779,-0.205582,-0.301347,-0.050063,1.029845,-0.861399,-0.122566,-0.082437,-0.113643,-1.942491,-0.036767,4.06175,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
1,-0.35736,0.0,0.0,1.208568,0.422207,-1.8114,4.743922,-0.357677,-0.157709,0.010313,-0.340743,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,-0.301347,-0.050063,1.029845,-0.861399,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
2,2.810785,-0.260628,1.05327,0.87439,-0.473926,-0.412985,1.362957,1.748786,0.002173,-0.004716,-0.340743,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,4.633779,-0.205582,-0.301347,-0.050063,-0.97102,-0.861399,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
3,-0.35465,-0.275382,-0.700619,-0.794981,-0.893085,0.0,-0.426966,-0.357677,-0.095766,-0.058916,-0.340743,-0.119026,0.542214,-0.261488,-0.314581,3.45252,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,-0.301347,-0.050063,-0.97102,-0.861399,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,0
4,-0.360096,0.0,0.0,0.175848,-1.745856,0.0,0.766316,-0.357677,-0.102509,-0.052913,-0.340743,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,3.681853,-0.250736,-0.243676,-0.215807,-0.205582,-0.301347,-0.050063,-0.97102,1.160902,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
5,-0.359131,4.570401,-0.589956,-0.172732,-0.127036,0.0,-1.023607,-0.357677,-0.192186,-0.076007,-0.340743,-0.119026,-1.844291,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,-0.301347,-0.050063,1.029845,-0.861399,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
6,-0.35878,-0.268742,-0.236288,-0.622551,0.205401,-1.678218,0.368555,1.748786,-0.411107,-0.076066,2.485469,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,3.318435,-0.050063,1.029845,-0.861399,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
7,-0.345087,0.0,0.0,-0.300156,-1.442327,-1.8114,0.766316,-0.357677,-0.243902,0.012316,-0.340743,-0.119026,-1.844291,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,3.318435,-0.050063,1.029845,-0.861399,-0.122566,12.130464,-0.113643,-1.942491,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,0
8,-0.348262,-0.307102,1.456025,0.014333,0.639014,-0.14662,-1.421368,-0.357677,0.329304,-0.033829,-0.340743,-0.119026,0.542214,-0.261488,3.178827,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,-0.301347,-0.050063,-0.97102,1.160902,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1
9,-0.358834,-0.262103,0.096707,1.696664,-0.618463,0.0,1.760717,-0.357677,0.875535,0.035787,-0.340743,-0.119026,0.542214,-0.261488,-0.314581,-0.289643,-0.259409,-0.271602,-0.250736,-0.243676,-0.215807,-0.205582,3.318435,-0.050063,-0.97102,1.160902,-0.122566,-0.082437,-0.113643,0.514803,-0.036767,-0.246199,-0.103259,-0.181965,-0.078342,-0.057541,-0.032421,-0.25006,-0.010001,-0.052513,-0.036767,1


'df_num_minmax:'

Unnamed: 0,Current Loan Amount,Term,Credit Score,Annual Income,Monthly Debt,Years of Credit History,Months since last delinquent,Number of Open Accounts,Number of Credit Problems,Current Credit Balance,Maximum Open Credit,Bankruptcies,Tax Liens,Loan Status_Fully Paid,Years in current job_1 year,Years in current job_2 years,Years in current job_3 years,Years in current job_4 years,Years in current job_5 years,Years in current job_6 years,Years in current job_7 years,Years in current job_8 years,Years in current job_9 years,Years in current job_< 1 year,Home Ownership_HaveMortgage,Home Ownership_Home Mortgage,Home Ownership_Rent,Purpose_Business Loan,Purpose_Buy House,Purpose_Buy a Car,Purpose_Debt Consolidation,Purpose_Educational Expenses,Purpose_Home Improvements,Purpose_Medical Bills,Purpose_Other,Purpose_Take a Trip,Purpose_major_purchase,Purpose_moving,Purpose_other,Purpose_renewable_energy,Purpose_small_business
0,1.017369,1,1.071057,1.139283,1.101355,1.885993,1.926363,1.428571,1.571429,1.027761,1.002636,1.8,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1.010045,1,1.293651,1.166435,1.647148,2.140065,1.210526,3.5,1.0,1.027978,1.005381,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,5.0,1,1.089544,1.277778,1.567548,1.736156,1.763158,2.285714,1.571429,1.036254,1.004744,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1.013458,0,1.07799,1.092371,1.169909,1.547231,1.926363,1.642857,1.0,1.031185,1.002447,1.0,1.0,5.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1.0066,1,1.293651,1.166435,1.401158,1.162866,1.926363,2.071429,1.0,1.030836,1.002702,1.0,1.0,5.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
5,1.007815,1,4.872906,1.104069,1.318127,1.892508,1.926363,1.428571,1.0,1.026194,1.001723,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
6,1.008257,1,1.083189,1.141456,1.210982,2.042345,1.263158,1.928571,1.571429,1.014863,1.001721,1.8,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,5.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7,1.025502,0,1.293651,1.166435,1.287775,1.299674,1.210526,2.071429,1.0,1.023517,1.005466,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,5.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
8,1.021503,1,1.053148,1.320354,1.362685,2.237785,1.868421,1.285714,1.0,1.053185,1.00351,1.0,1.0,5.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
9,1.008189,1,1.088388,1.176658,1.763411,1.67101,1.926363,2.428571,1.0,1.081458,1.00646,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5.0,1.0,1.0,5.0,1.0,1.0,1.0,5.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [3]:
# 02. Logistic regression

import pandas as pd
import numpy as np
from sklearn import metrics 
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, precision_score, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix as cm

x=df_num.drop('Term',axis=1)
y=df_num['Term']

# by 10-fold cross validation
clf=LogisticRegression(penalty='l2',solver='lbfgs')
precision = make_scorer(precision_score, average='macro')
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
pre=cross_val_score(clf, x, y, cv=10, scoring=precision).mean()
print('By 10-fold Cross Validation: acc = ',acc, ', precision = ', pre)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, msg_start, len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please als

By 10-fold Cross Validation: acc =  0.7166 , precision =  0.49123539179874404


In [4]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=0.0000000001, max_iter=1)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=0.0000000001, max_iter=1)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.6285499999999999
Accuracy by N-fold Cross Validation: 0.6303




In [5]:
# 03. SVM

# by N-fold cross validation
clf=SVC(kernel='linear', C=0.0000000001, max_iter=10)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=0.0000000001, max_iter=10)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.5384




Accuracy by N-fold Cross Validation: 0.6520499999999999




In [6]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=0.0000000001, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=0.0000000001, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.58005




Accuracy by N-fold Cross Validation: 0.6520499999999999




In [7]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=1, max_iter=1)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=1, max_iter=1)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.54915




Accuracy by N-fold Cross Validation: 0.6303


In [8]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=1, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=1, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.45585000000000003




Accuracy by N-fold Cross Validation: 0.6520499999999999




In [9]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=100, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=100, max_iter=100)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.45585000000000003




Accuracy by N-fold Cross Validation: 0.65845




In [10]:
# 03. SVM

from sklearn.svm import SVC

# by N-fold cross validation
clf=SVC(kernel='linear', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.5731999999999999




Accuracy by N-fold Cross Validation: 0.74115


In [11]:
df_num_std
# 03. SVM

from sklearn.svm import SVC
x=df_num_std.drop('Term',axis=1)
y=df_num_std['Term']
# by N-fold cross validation
clf=SVC(kernel='linear', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.6276499999999999




Accuracy by N-fold Cross Validation: 0.6992


In [12]:
# 03. SVM

from sklearn.svm import SVC
x=df_num_minmax.drop('Term',axis=1)
y=df_num_minmax['Term']
# by N-fold cross validation
clf=SVC(kernel='linear', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)
clf=SVC(kernel='rbf', C=100, max_iter=20000)
acc=cross_val_score(clf, x, y, cv=10, scoring='accuracy').mean()
print("Accuracy by N-fold Cross Validation:",acc)



Accuracy by N-fold Cross Validation: 0.5510499999999999




Accuracy by N-fold Cross Validation: 0.71635
