In [14]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import PolynomialFeatures

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree

from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [21]:
df = pd.read_csv('IphoneData.csv')
newdf = df.drop(columns=['NAME', 'CO2E'])
features = newdf.to_numpy()
emissions = df['CO2E'].to_numpy()

In [22]:
def modelScore(X, y, model):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size = .75)
    

    if model == 'linear regression':
        fittedModel = LinearRegression().fit(X_train, y_train)
            
    elif model == 'decision tree':
        fittedModel = tree.DecisionTreeRegressor().fit(X_train, y_train)
            
    elif model == 'random forest':
        y_train = y_train.flatten()
        fittedModel = RandomForestRegressor().fit(X_train, y_train)

    elif model == 'support vector regression':
        y_train = y_train.flatten()
        fittedModel = SVR(kernel = 'linear').fit(X_train, y_train)    
 
    elif model == 'polynomial regression':
        polyX = PolynomialFeatures(degree=2).fit_transform(X)
        polyX_train, polyX_test, polyy_train, polyy_test = train_test_split(polyX, y, random_state=42, train_size=.75)
        fittedModel = LinearRegression().fit(polyX_train, polyy_train)
        X_test = polyX_test

    y_pred = fittedModel.predict(X_test)
    
    r2 = r2_score(y_test, y_pred)    
    sse = np.sum((y_test - y_pred) ** 2)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    
   #print results
    print(model)
    print("R2 = " + str(r2))
    print("SSE = " + str(sse))
    print("MAPE = " + str(mape))

In [30]:
modelScore(features, emissions, 'linear regression')

linear regression
R2 = 0.9682603940497357
SSE = 136.8506009888892
MAPE = 0.04293170663755083


In [24]:
modelScore(features, emissions, 'decision tree')

decision tree
R2 = 0.854116737533823
SSE = 629.0
MAPE = 0.09747656542554901


In [25]:
modelScore(features, emissions, 'random forest')

random forest
R2 = 0.7728763200618477
SSE = 979.2816000000001
MAPE = 0.11495638546157438


In [26]:
modelScore(features, emissions, 'support vector regression')

support vector regression
R2 = 0.9769350516333538
SSE = 99.4483690408563
MAPE = 0.025507395370475017


In [27]:
modelScore(features, emissions, 'polynomial regression')

polynomial regression
R2 = 0.9506411798055517
SSE = 212.8187797383959
MAPE = 0.050550119569909124


In [28]:
modelsList = ['linear regression', 'decision tree', 'random forest', 'support vector regression', 'polynomial regression']
multipleFeatures = df[[ 'STORAGE (gb)',  'BATTERY (mAh)']].to_numpy()

In [29]:
for model in modelsList:
    modelScore(multipleFeatures, emissions, model)

linear regression
R2 = 0.6843880149908889
SSE = 1360.8136753642843
MAPE = 0.13006742811128877
decision tree
R2 = 0.7801314263625821
SSE = 948.0
MAPE = 0.10517705825709024
random forest
R2 = 0.7827949193080788
SSE = 936.5159062500003
MAPE = 0.1136679269917321
support vector regression
R2 = 0.6753960541944355
SSE = 1399.584012998326
MAPE = 0.12660648111291717
polynomial regression
R2 = 0.7092527891809036
SSE = 1253.605057315004
MAPE = 0.12984773201013636
