In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.preprocessing import PolynomialFeatures

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.linear_model import Lasso

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [3]:
df = pd.read_csv('IphoneData.csv')
display = df['DISPLAY (inch)'].to_numpy().reshape(-1, 1)
storage = df['STORAGE (gb)'].to_numpy().reshape(-1, 1)
memory = df['MEMORY (gb)'].to_numpy().reshape(-1, 1)
cpu = df['CPU (cores)'].to_numpy().reshape(-1, 1)
gpu = df['GPU (cores)'].to_numpy().reshape(-1, 1)
neuralEngine = df['NEURAL ENGINE (cores)'].to_numpy().reshape(-1, 1)
weight = df['WEIGHT (gm)'].to_numpy().reshape(-1, 1)
frontCamera = df['FRONT CAMERA (mp)'].to_numpy().reshape(-1, 1)
battery = df['BATTERY (mAh)'].to_numpy().reshape(-1, 1)
numCameras = df['number of camera (back)'].to_numpy().reshape(-1, 1)
megapixels = df['Sum of megapixels'].to_numpy().reshape(-1, 1)
mainCamera = df['MAIN CAMERA (mp)'].to_numpy().reshape(-1, 1)
newdf = df.drop(columns=['NAME', 'CO2E'])
features = newdf.to_numpy()
emissions = df['CO2E'].to_numpy()

In [54]:
def modelScore(X, y, model):
    avgR2 = avgSSE = avgMAPE = 0
    #cross validation using K folds
    kFolds = 5
    for i, (trainIndex, testIndex) in enumerate(KFold(n_splits=kFolds, shuffle=True, random_state=42).split(X)):
        X_train = X[trainIndex]
        X_test = X[testIndex]
        y_train = y[trainIndex]
        y_test = y[testIndex]
    

        if model == 'linear regression':
            fittedModel = LinearRegression().fit(X_train, y_train)
                
        elif model == 'decision tree':
            fittedModel = tree.DecisionTreeRegressor().fit(X_train, y_train)
                
        elif model == 'random forest':
            y_train = y_train.flatten()
            fittedModel = RandomForestRegressor().fit(X_train, y_train)

        elif model == 'support vector regression':
            y_train = y_train.flatten()
            fittedModel = SVR(kernel = 'linear').fit(X_train, y_train)    
    
        elif model == 'polynomial regression':
            polyX = PolynomialFeatures(degree=2).fit_transform(X)
            fittedModel = LinearRegression().fit(polyX[trainIndex], y_train)
            X_test = polyX[testIndex]

        elif model == 'xgboost':
            fittedModel = xgb.XGBRFRegressor(objective ='reg:linear', n_estimators = 10, seed = 123).fit(X_train, y_train)
        
        elif model =='lasso':
            fittedModel = Lasso().fit(X_train, y_train)
            

        y_pred = fittedModel.predict(X_test)
        
        #scores
        avgR2 += r2_score(y_test, y_pred)    
        avgSSE += np.sum((y_test - y_pred) ** 2)
        avgMAPE += mean_absolute_percentage_error(y_test, y_pred)

    
    avgR2, avgSSE, avgMAPE = avgR2 / kFolds, avgSSE / kFolds, avgMAPE / kFolds
   #print results
    """print(model)
    print("R2 = " + str(avgR2))
    print("SSE = " + str(avgSSE))
    print("MAPE = " + str(avgMAPE))"""

    print((avgR2))
    print((avgSSE))
    print((avgMAPE))

In [57]:
modelsList = ['linear regression', 'decision tree', 'random forest', 'support vector regression', 'polynomial regression', 'xgboost', 'lasso']


In [60]:
#display
for model in modelsList:
    print(modelScore(display, emissions, model))

0.15589470797548524
2094.038080922909
0.15360560549266966
None
0.05843016638822514
2266.046979501335
0.16051508703483333
None
0.08026640056389185
2240.2129268079325
0.15956600965497705
None
0.015396372660609604
2445.969054213753
0.15137075102256967
None
0.1554786194445508
2135.6302732177437
0.15758999966292087
None
0.1312418457085313
2088.2762252153507
0.15119684022488897
None
0.1525236352233146
2117.496135548886
0.1549017491345198
None




In [47]:
#storage
for model in modelsList:
    print(modelScore(storage, emissions, model))

linear regression
R2 = 0.6431018671108133
SSE = 770.9140755021252
MAPE = 0.0973274358732411
None
decision tree
R2 = 0.5713544960090955
SSE = 946.0476095992199
MAPE = 0.10521336782807274
None
random forest
R2 = 0.5694890394816211
SSE = 955.5759104027936
MAPE = 0.1065952900107651
None
support vector regression
R2 = 0.6249976571543177
SSE = 792.0741224468253
MAPE = 0.09627068270048764
None


In [48]:
#memory
for model in modelsList:
    modelScore(memory, emissions, model)

linear regression
R2 = 0.06993150539898914
SSE = 2299.446182115787
MAPE = 0.16274849541165504
decision tree
R2 = 0.01769628861973025
SSE = 2452.7603573407832
MAPE = 0.17341850035894196
random forest
R2 = 0.03737311181002338
SSE = 2407.6131267491496
MAPE = 0.16902444785464876
support vector regression
R2 = 0.015170986283357423
SSE = 2405.0439999999994
MAPE = 0.16341339210601147


In [49]:
#cpu
for model in modelsList:
    (modelScore(cpu, emissions, model))

linear regression
R2 = -0.061858614547081546
SSE = 2586.8108147978164
MAPE = 0.17237285700478913
decision tree
R2 = -0.061858614547081546
SSE = 2586.8108147978164
MAPE = 0.17237285700478913
random forest
R2 = -0.0671838871991282
SSE = 2600.984295064499
MAPE = 0.17394394534782265
support vector regression
R2 = -0.07618225374089085
SSE = 2617.536
MAPE = 0.1652835629374711


In [50]:
#gpu
for model in modelsList:
    (modelScore(gpu, emissions, model))

linear regression
R2 = 0.05433537845387386
SSE = 2322.8752374510905
MAPE = 0.1632028213561844
decision tree
R2 = 0.010118312081191605
SSE = 2446.617506260046
MAPE = 0.170671498050579
random forest
R2 = 0.022570030594744696
SSE = 2403.154452137812
MAPE = 0.16867567189191116
support vector regression
R2 = -0.01638589413256808
SSE = 2483.488
MAPE = 0.1604225217270992


In [51]:
#neural engine
for model in modelsList:
    (modelScore(neuralEngine, emissions, model))

linear regression
R2 = -0.06826250472796207
SSE = 2602.102845324158
MAPE = 0.17206672103453602
decision tree
R2 = -0.12598815165443383
SSE = 2682.2246647220936
MAPE = 0.17666078885527264
random forest
R2 = -0.12760171516541047
SSE = 2675.8847722642945
MAPE = 0.17553635188770422
support vector regression
R2 = -0.06465568608256511
SSE = 2605.9598571427805
MAPE = 0.16249886543586095


In [52]:
#weight
for model in modelsList:
    (modelScore(weight, emissions, model))

linear regression
R2 = 0.17725347590613208
SSE = 1880.6279016376764
MAPE = 0.1447997974512276
decision tree
R2 = -0.12430209591888633
SSE = 2554.4555555555553
MAPE = 0.17447144713641027
random forest
R2 = -0.03217829999619552
SSE = 2360.2619843642015
MAPE = 0.16700170596912678
support vector regression
R2 = 0.12388424206471949
SSE = 2064.640642355173
MAPE = 0.1401791543442429


In [53]:
#camera
for model in modelsList:
    (modelScore(camera, emissions, model))

NameError: name 'camera' is not defined

In [None]:
#battery
for model in modelsList:
    (modelScore(battery, emissions, model))

0.0700727998724161
57132.8307379457
0.1880184705512142


In [None]:
for model in modelsList:
    (modelScore(, emissions, model))