In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest

from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [2]:
def apply_lda(indep_X, dep_Y, n_components=2):
    lda = LDA(n_components=n_components)
    X_lda = lda.fit_transform(indep_X, dep_Y)

    print("LDA transformation completed.")
    
    return X_lda

In [3]:
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, y_train, y_test

In [4]:
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2 = r2_score(y_test, y_pred)
    return r2

In [5]:
def Linear(X_train, y_train, X_test):       
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2   

In [6]:
def svm_linear(X_train, y_train, X_test):
    from sklearn.svm import SVR
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

In [7]:
def svm_NL(X_train, y_train, X_test):
    from sklearn.svm import SVR
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

In [8]:
def Decision(X_train, y_train, X_test):
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

In [9]:
def random(X_train, y_train, X_test):       
    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 


In [10]:
def lda_i(acclin, accsvml, accsvmnl, accdes, accrf): 
    dataframe = pd.DataFrame(index=['LDA'], columns=['Linear', 'SVMl', 'SVMnl', 'Decision', 'Random'])

    for number, idex in enumerate(dataframe.index):
        dataframe['Linear'][idex] = acclin[number]       
        dataframe['SVMl'][idex] = accsvml[number]
        dataframe['SVMnl'][idex] = accsvmnl[number]
        dataframe['Decision'][idex] = accdes[number]
        dataframe['Random'][idex] = accrf[number]
    return dataframe

In [11]:
dataset1 = pd.read_csv("prep.csv", index_col=None)


In [12]:
df2 = dataset1

In [13]:
df2 = pd.get_dummies(df2, drop_first=True)

In [14]:

indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']


In [15]:
kbest = apply_lda(indep_X, dep_Y, n_components=1)      

acclin = []
accsvml = []
accsvmnl = []
accdes = []
accrf = []


LDA transformation completed.


In [16]:
X_train, X_test, y_train, y_test = split_scalar(kbest, dep_Y)  
for i in kbest:   
    r2_lin = Linear(X_train, y_train, X_test)
    acclin.append(r2_lin)
    
    r2_sl = svm_linear(X_train, y_train, X_test)    
    accsvml.append(r2_sl)
    
    r2_NL = svm_NL(X_train, y_train, X_test)
    accsvmnl.append(r2_NL)
    
    r2_d = Decision(X_train, y_train, X_test)
    accdes.append(r2_d)
    
    r2_r = random(X_train, y_train, X_test)
    accrf.append(r2_r)
    
result = lda_i(acclin, accsvml, accsvmnl, accdes, accrf)


In [17]:
result

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
LDA,0.750587,0.739355,0.946535,0.956597,0.977865
