In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import PolynomialFeatures

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.linear_model import Lasso

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
df = pd.read_csv('IphoneData.csv')
newdf = df.drop(columns=['NAME', 'CO2E'])
features = newdf.to_numpy()
emissions = df['CO2E'].to_numpy()

In [3]:
def modelScore(X, y, model):
    avgSSE = []
    avgMAPE = []
    splits = LeaveOneOut().get_n_splits(X)
    #cross validation using leave one out
    for i, (trainIndex, testIndex) in enumerate(LeaveOneOut().split(X)):
        X_train = X[trainIndex]
        X_test = X[testIndex]
        y_train = y[trainIndex]
        y_test = y[testIndex]
    

        if model == 'linear regression':
            fittedModel = LinearRegression().fit(X_train, y_train)
                
        elif model == 'decision tree':
            fittedModel = tree.DecisionTreeRegressor().fit(X_train, y_train)
                
        elif model == 'random forest':
            y_train = y_train.flatten()
            fittedModel = RandomForestRegressor().fit(X_train, y_train)
            
        elif model == 'support vector regression':
            y_train = y_train.flatten()
            fittedModel = SVR(kernel = 'linear').fit(X_train, y_train)
    
        elif model == 'polynomial regression':
            polyX_train = PolynomialFeatures(degree=2).fit_transform(X_train)
            polyX_test = PolynomialFeatures(degree=2).fit_transform(X_test)
            fittedModel = LinearRegression().fit(polyX_train, y_train)
            X_test = polyX_test

        elif model == 'xgboost':
            fittedModel = xgb.XGBRFRegressor(objective ='reg:linear', n_estimators = 20, seed = 42).fit(X_train, y_train)
        
        elif model =='lasso':
            fittedModel = Lasso().fit(X_train, y_train)
            

        y_pred = fittedModel.predict(X_test)
        print(y_test)
        print(y_pred)
        
        #scores
        avgSSE.append(np.sum((y_test - y_pred) ** 2))
        avgMAPE.append(mean_absolute_percentage_error(y_test, y_pred))

    avgSSE, avgMAPE = np.mean(avgSSE), np.mean(avgMAPE)

   #print results
    print("SSE = " + str(avgSSE))
    print("MAPE = " + str(avgMAPE))
    print(r2_score([1], [2]))




In [4]:
modelScore(features, emissions, 'linear regression')

[57]
[61.26866822]
[62]
[63.43767098]
[73]
[67.27254188]
[46]
[42.62462575]
[50]
[45.43167469]
[58]
[50.98701723]
[57]
[69.33954843]
[71]
[75.96810712]
[72]
[71.53288011]
[77]
[74.01845621]
[89]
[78.2586899]
[80]
[83.04311898]
[96]
[89.86997991]
[110]
[101.61494401]
[70]
[69.66219264]
[75]
[72.02401736]
[85]
[76.70105222]
[64]
[71.26109392]
[61]
[67.21720494]
[69]
[72.89752784]
[76]
[78.9305895]
[88]
[91.48955646]
[112]
[117.64959638]
[74]
[75.17037858]
[81]
[81.24404833]
[93]
[93.82861669]
[117]
[119.40530154]
[61]
[64.50302928]
[67]
[70.73248129]
[83]
[82.65880431]
[68]
[67.87782969]
[75]
[73.98183044]
[91]
[85.77163009]
[65]
[66.96976117]
[73]
[68.23619122]
[56]
[59.22681974]
[66]
[64.88555215]
[79]
[77.23445084]
[61]
[63.33415032]
[66]
[69.79955833]
[79]
[82.31727121]
[66]
[62.96840293]
[71]
[69.65047853]
[83]
[82.29772481]
[107]
[107.5670223]
[75]
[72.68886422]
[87]
[85.37000334]
[110]
[110.76854474]
SSE = 19.697207782900612
MAPE = 0.04873289978762851
nan




In [5]:
modelScore(features, emissions, 'decision tree')

[57]
[62.]
[62]
[57.]
[73]
[71.]
[46]
[50.]
[50]
[61.]
[58]
[73.]
[57]
[57.]
[71]
[73.]
[72]
[77.]
[77]
[72.]
[89]
[76.]
[80]
[69.]
[96]
[89.]
[110]
[83.]
[70]
[72.]
[75]
[64.]
[85]
[67.]
[64]
[75.]
[61]
[50.]
[69]
[80.]
[76]
[75.]
[88]
[110.]
[112]
[107.]
[74]
[81.]
[81]
[75.]
[93]
[88.]
[117]
[110.]
[61]
[56.]
[67]
[66.]
[83]
[79.]
[68]
[73.]
[75]
[67.]
[91]
[87.]
[65]
[68.]
[73]
[74.]
[56]
[61.]
[66]
[66.]
[79]
[83.]
[61]
[68.]
[66]
[75.]
[79]
[91.]
[66]
[61.]
[71]
[66.]
[83]
[83.]
[107]
[110.]
[75]
[75.]
[87]
[93.]
[110]
[117.]
SSE = 75.72916666666667
MAPE = 0.08944496644168443
nan




In [6]:
modelScore(features, emissions, 'random forest')

[57]
[62.27]
[62]
[59.82]
[73]
[67.85]
[46]
[54.03]
[50]
[54.89]
[58]
[68.53]
[57]
[59.81]
[71]
[69.87]
[72]
[76.2]
[77]
[72.7]
[89]
[79.09]
[80]
[76.85]
[96]
[83.18]
[110]
[88.98]
[70]
[68.61]
[75]
[66.66]
[85]
[71.69]
[64]
[68.99]
[61]
[59.21]
[69]
[72.8]
[76]
[78.16]
[88]
[95.77]
[112]
[110.31]
[74]
[72.25]
[81]
[75.53]
[93]
[88.95]
[117]
[105.91]
[61]
[64.7]
[67]
[71.52]
[83]
[83.27]
[68]
[68.89]
[75]
[75.43]
[91]
[85.92]
[65]
[69.32]
[73]
[68.87]
[56]
[62.9]
[66]
[67.19]
[79]
[80.94]
[61]
[67.44]
[66]
[72.46]
[79]
[88.22]
[66]
[67.06]
[71]
[71.77]
[83]
[87.26]
[107]
[106.11]
[75]
[72.03]
[87]
[89.45]
[110]
[107.84]
SSE = 39.734358333333326
MAPE = 0.0647321532672896
nan




In [7]:
modelScore(features, emissions, 'support vector regression')

[57]
[58.67271433]
[62]
[59.68403169]
[73]
[65.68403441]
[46]
[47.05914482]
[50]
[49.1078083]
[58]
[55.03834985]
[57]
[68.73429345]
[71]
[78.19310244]
[72]
[74.15362192]
[77]
[75.14742224]
[89]
[81.14742161]
[80]
[85.5135565]
[96]
[88.33164724]
[110]
[100.05065896]
[70]
[61.52331239]
[75]
[64.5214512]
[85]
[70.5215402]
[64]
[68.54000165]
[61]
[64.47377053]
[69]
[70.28485132]
[76]
[76.14856924]
[88]
[88.14856046]
[112]
[112.14849326]
[74]
[77.07155384]
[81]
[80.99743973]
[93]
[94.41495932]
[117]
[116.65664217]
[61]
[65.89390035]
[67]
[71.8938334]
[83]
[83.89315016]
[68]
[68.60746622]
[75]
[74.1273858]
[91]
[86.15185628]
[65]
[65.32762072]
[73]
[68.30521518]
[56]
[60.96118172]
[66]
[66.96151527]
[79]
[78.34120406]
[61]
[64.88313317]
[66]
[70.88313232]
[79]
[82.88399173]
[66]
[65.11592742]
[71]
[71.45728331]
[83]
[83.34848179]
[107]
[107.64992103]
[75]
[74.69665309]
[87]
[86.49972528]
[110]
[110.92163419]
SSE = 23.649005892460583
MAPE = 0.04674170011550882
nan




In [8]:
modelScore(features, emissions, 'polynomial regression')

[57]
[56.45665831]
[62]
[62.36222779]
[73]
[71.91331663]
[46]
[47.92294866]
[50]
[50.30478208]
[58]
[53.46697829]
[57]
[49.44269481]
[71]
[78.55730519]
[72]
[74.21484058]
[77]
[78.16587768]
[89]
[83.31824571]
[80]
[85.10397405]
[96]
[92.98075722]
[110]
[116.50141561]
[70]
[72.91073808]
[75]
[75.48628381]
[85]
[79.23888432]
[64]
[33.83847832]
[61]
[-8.95616543]
[69]
[68.4475736]
[76]
[75.0210546]
[88]
[88.42188132]
[112]
[115.1222958]
[74]
[74.78034542]
[81]
[80.62248]
[93]
[93.32820697]
[117]
[115.70788201]
[61]
[58.66413904]
[67]
[68.27743759]
[83]
[83.66765273]
[68]
[67.38819985]
[75]
[75.79641794]
[91]
[90.28207759]
[65]
[72.95902602]
[73]
[64.5308139]
[56]
[59.29803858]
[66]
[63.98248463]
[79]
[78.55589893]
[61]
[57.11454707]
[66]
[66.58258067]
[79]
[82.97966686]
[66]
[64.33294245]
[71]
[71.73615149]
[83]
[84.11968822]
[107]
[105.53949094]
[75]
[75.82356957]
[87]
[87.3139762]
[110]
[108.16253265]
SSE = 131.59927368581603
MAPE = 0.0650026720431112
nan




In [9]:
modelScore(features, emissions, 'xgboost')

[57]
[59.137653]
[62]
[60.700104]
[73]
[67.74171]
[46]
[51.725197]
[50]
[57.250187]
[58]
[66.95005]
[57]
[61.250145]
[71]
[68.72505]
[72]
[77.11248]
[77]
[78.59166]
[89]
[80.402466]
[80]
[76.955]
[96]
[86.5249]
[110]
[88.099915]
[70]
[70.30004]
[75]
[68.57505]
[85]
[71.44173]
[64]
[71.59169]
[61]
[53.275223]
[69]
[73.59002]
[76]
[77.2125]
[88]
[92.24988]




[112]
[110.89966]
[74]
[73.06504]
[81]
[75.591675]
[93]
[94.899864]
[117]
[109.49961]
[61]
[63.183407]
[67]
[70.67505]
[83]
[81.09996]
[68]
[71.849205]
[75]
[71.68296]
[91]
[85.19992]
[65]
[70.50006]
[73]
[69.39006]
[56]
[63.592247]
[66]
[65.325096]
[79]
[82.599945]
[61]
[66.88337]
[66]
[71.41753]
[79]
[89.549904]
[66]
[66.122215]
[71]
[69.76256]
[83]
[86.924904]
[107]
[101.19976]
[75]
[75.06663]
[87]
[89.59986]
[110]
[105.59973]
SSE = 38.03783664951061
MAPE = 0.06440490683705988
nan




In [10]:
modelScore(features, emissions, 'lasso')

[57]
[57.90363727]
[62]
[60.60062854]
[73]
[65.7941736]
[46]
[49.21626694]
[50]
[51.99281399]
[58]
[57.56450164]
[57]
[68.0299457]
[71]
[75.63119451]
[72]
[72.62561239]
[77]
[75.36205577]
[89]
[80.84049434]
[80]
[81.74840732]
[96]
[88.46892648]
[110]
[99.82781387]
[70]
[62.34826306]
[75]
[65.0907168]
[85]
[70.65492073]
[64]
[67.99207912]
[61]
[67.35031755]
[69]
[73.81068749]
[76]
[79.539846]
[88]
[91.53138886]
[112]
[116.32323453]
[74]
[76.72259726]
[81]
[82.48175113]
[93]
[94.47922891]
[117]
[118.80646835]
[61]
[67.48054272]
[67]
[73.36488138]
[83]
[84.76668918]
[68]
[69.34227062]
[75]
[75.122618]
[91]
[86.33305924]
[65]
[65.92357318]
[73]
[66.50732873]
[56]
[59.54702311]
[66]
[65.03603204]
[79]
[76.87004786]
[61]
[61.86872788]
[66]
[68.05370606]
[79]
[79.90805342]
[66]
[65.57836897]
[71]
[71.82810364]
[83]
[83.81204556]
[107]
[107.95453261]
[75]
[74.54039165]
[87]
[86.56495074]
[110]
[110.79779658]
SSE = 23.44672381260511
MAPE = 0.0479361956012399
nan


