In [130]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error,r2_score,explained_variance_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

#%matplotlib inline

In [135]:

class RegressionModels():

    
    def scores(self,y_test,y_pred, model):
        print("Variance Score : " , explained_variance_score(y_test, y_pred))
        print("R2 Score : " ,r2_score(y_test,y_pred))
        print("Root Mean Square : ",math.sqrt(mean_squared_error(y_test, y_pred)))
        print("Best Parameters : ", model.best_params_ )

    def genericModel(self,model,params, X_train, fullTest="x"):
        generic_grid = GridSearchCV(model, params, verbose=False, cv=3,return_train_score=True)
        generic_grid.fit(X_train,y_train)
        svm_predict = generic_grid.predict(X_test)
        return svm_predict
    
    
    def svmRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------SVM Regression Starts------------\n\n ")
        clf = SVR()
        svm_grid = GridSearchCV(clf, params, verbose=False, cv=3,return_train_score=True)
        svm_grid.fit(X_train,y_train)
        svm_predict = svm_grid.predict(X_test)
       
        print("Best Parameters : ", svm_grid.best_params_ )
        self.scores(Y_test, svm_predict, svm_grid)
        print("\n-----------SVM Regression ends------------\n\n ")
        

    def decisionTreeRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Decision Tree Regression Starts------------\n\n")
        DTregressor = DecisionTreeRegressor(random_state=0)
        DT_grid = GridSearchCV(DTregressor, params, verbose=False, cv=3,return_train_score=True)
        DT_grid.fit(X_train,y_train)
        DT_predict = DT_grid.predict(X_test)
        self.scores(Y_test, DT_predict, DT_grid)
        print("\n-----------Decision Tree Regression Ends------------\n\n")
        

    def randomForestRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Random Forest Regression Starts------------\n\n")
        RFRegressor = RandomForestRegressor(random_state=0)
        RF_grid = GridSearchCV(RFRegressor, params, verbose=False, cv=3, return_train_score=True)
        RF_grid.fit(X_train,y_train)
        RF_predict = RF_grid.predict(X_test)
        self.scores(Y_test, RF_predict, RF_grid)
        print("\n-----------Random Forest Regression Ends------------\n\n")
       
  
    def adaBoostRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------AdaBoost Regression Starts------------\n\n")
        AdaRegressor = AdaBoostRegressor(random_state=0)
        adaBoost_grid = GridSearchCV(AdaRegressor, params, verbose=False, cv=3,return_train_score=True)
        adaBoost_grid.fit(X_train,y_train)
        adaBoost_predict = adaBoost_grid.predict(X_test)
        self.scores(Y_test, adaBoost_predict, adaBoost_grid)
        print("\n-----------AdaBoost Regression Ends------------\n\n")

    def gaussianProcessRegression(self, X_train, X_test, y_train, Y_test, params, fullTest="x"):
        print("-----------GaussianProcess Regression Starts------------\n\n")
        GPRRegressor = GaussianProcessRegressor(random_state=0)
        GPR_grid = GridSearchCV(GPRRegressor, params, verbose=False, cv=3,return_train_score=True)
        GPR_grid.fit(X_train,y_train)
        GPR_predict = GPR_grid.predict(X_test)
        self.scores(Y_test, GPR_predict, GPR_grid)
        print("\n-----------GaussianProcess Regression Ends------------\n\n")
        

    def LinearRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Linear Regression Starts------------\n\n")
        LinearRegressor = LinearRegression()
        linearRegression_grid = GridSearchCV(LinearRegressor, params, verbose=False, cv=3,return_train_score=True)
        linearRegression_grid.fit(X_train,y_train)
        linearRegression_predict = linearRegression_grid.predict(X_test)
        self.scores(Y_test, linearRegression_predict, linearRegression_grid)
        print("\n-----------Linear Regression Ends------------\n\n")
        
    def mlpRegression(self, X_train, X_test, y_train, Y_test, params):
        print("-----------Neural Network Regression Starts------------\n\n")
        MLPRegressor_obj = MLPRegressor(random_state=0)
        MLPRegressor_grid = GridSearchCV(MLPRegressor_obj, params, verbose=False, cv=3,return_train_score=True)
        MLPRegressor_grid.fit(X_train,y_train)
        MLPRegressor_predict = MLPRegressor_grid.predict(X_test)
        self.scores(Y_test, MLPRegressor_predict, MLPRegressor_grid)
        print("\n-----------Neural Network Regression Ends------------\n\n")
        
    

    def train_split(self,X,y,test_size=0.2,random_state=0):
        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
        return X_train,X_test,y_train,y_test
 
    def train_all__models(self, X, y):
        svm_regression_params =  { 'C' : np.logspace(0, 3, 4), 'gamma' : np.logspace(-2, 1, 4)}
        dt_params = {'max_depth' : np.arange(1, 10, 10),'min_samples_split': np.arange(0.1, 1.0, 10)}
        rd_params = {'n_estimators' : np.arange(10,100,10),'max_depth' : np.arange(1,6,2)}
        ada_params = {'n_estimators' : np.arange(10,100,10)}
        gpr_params = {'n_restarts_optimizer' : np.arange(1,10,1)}
        linear_params = {'n_jobs' : np.arange(1,5,1)}
        mlp_params = {'hidden_layer_sizes': np.arange(30,150,20),'learning_rate': ['constant','invscaling','adaptive'],'max_iter': np.arange(20,200,50)}
        X_train,X_test,y_train,y_test = self.train_split(X,y)
        self.svmRegression(X_train,X_test,y_train,y_test, svm_regression_params)
        self.decisionTreeRegression(X_train,X_test,y_train,y_test, dt_params)
        self.randomForestRegression(X_train,X_test,y_train,y_test, rd_params)
        self.adaBoostRegression(X_train,X_test,y_train,y_test, ada_params)
        self.gaussianProcessRegression(X_train,X_test,y_train,y_test, gpr_params)
        self.LinearRegression(X_train,X_test,y_train,y_test, linear_params)
        self.mlpRegression(X_train,X_test,y_train,y_test, mlp_params)

        
        

    def speech_data(self):
        col = ['Subject_id','local_jitter','absolute_jitter','rap_jitter','ppq5_jitter','ddp_jitter','local_shimmer',
                       'db_shimmer','apq3_shimmer','apq5_shimmer','apq11_shimmer','dda_shimmer','AC','NTH','HTN','Median_pitch',
                       'Mean_pitch','Standard_deviation','Minimum_pitch','Maximum_pitch','Number_of_pulses','Number_of_periods',
                       'Mean_period','Standard_deviation_of_period','Fraction_of_locally_unvoiced_frames','Number_of_voice_breaks',
                       'Degree_of_voice_breaks','UPDRS','class_info']
        data = pd.read_csv("../Datasets/Parkinson_Multiple_Sound_Recording/Prakinson_Multiple_sound_recording_train_data.txt")
        data.columns=col

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
        X = data.drop(['class_info'],axis=1)
        X = a.fit_transform(X)
        y = data['class_info']
        self.train_all__models(X, y.values.ravel())
    
    def concrete_data(self):
        col = ['Cement','Blast Furnace Slag','Fly Ash','Water','Superplasticizer','Coarse Aggregate','Fine Aggregate',
                       'Age','Concrete compressive strength']
        data  = pd.read_excel("../Datasets/concrete_data/Concrete_compressive_strength_Data.xls",skiprows=1)
        data.columns=col

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
        X = data.drop(['Concrete compressive strength'],axis=1)
        X = a.fit_transform(X)
        y = data['Concrete compressive strength']
        self.train_all__models(X, y.values.ravel())
        
    
    def student_data_train_G3(self):
        df1 = pd.read_csv("../Datasets/Student_performance/student1/student-mat.csv",delimiter=";")
        df2 = pd.read_csv("../Datasets/Student_performance/student1/student-por.csv",delimiter=";")

        data = pd.concat([df1,df2])

        categorical_columns = ['school','sex','famsize','address','famsize','Pstatus','Mjob','Fjob','reason','guardian','schoolsup',
                               'famsup','paid','activities','nursery','higher','internet','romantic']
        for i in categorical_columns:
            data[i] = pd.Categorical(data[i]).codes

        data.columns = data.columns.str.lstrip()
        a = StandardScaler()
       
        X = a.fit_transform(data)
        y = data['G3']
        self.train_all__models(X, y.values.ravel())
        
    def store_predicitons(model,prediction):
        model_predicitons = {}
        
        
        
        

In [136]:
regressionModels = RegressionModels()
regressionModels.speech_data()
regressionModels.concrete_data()
regressionModels.student_data_train_G3()


In [137]:
regressionModels.student_data_train_G3()
#prediciton_g2 = regressionModels.student_data_train_G2()

-----------SVM Regression Starts------------

 
Best Parameters :  {'C': 1000.0, 'gamma': 0.01}
Variance Score :  0.9947819975119885
R2 Score :  0.9947799274944696
Root Mean Square :  0.2923329301727089
Best Parameters :  {'C': 1000.0, 'gamma': 0.01}

-----------SVM Regression ends------------

 
-----------Decision Tree Regression Starts------------


Variance Score :  0.5472047463530132
R2 Score :  0.5466839917521795
Root Mean Square :  2.7242063441776763
Best Parameters :  {'max_depth': 1, 'min_samples_split': 0.1}

-----------Decision Tree Regression Ends------------


-----------Random Forest Regression Starts------------


Variance Score :  0.999605696276707
R2 Score :  0.9996045025330286
Root Mean Square :  0.0804657984772739
Best Parameters :  {'max_depth': 5, 'n_estimators': 90}

-----------Random Forest Regression Ends------------


-----------AdaBoost Regression Starts------------


Variance Score :  0.9929941437488801
R2 Score :  0.9929855316099279
Root Mean Square :  0.338















Variance Score :  0.9149729758162379
R2 Score :  0.9126385633072939
Root Mean Square :  1.1959123638787188
Best Parameters :  {'hidden_layer_sizes': 130, 'learning_rate': 'constant', 'max_iter': 170}

-----------Neural Network Regression Ends------------




