In [54]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVR
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [55]:
df = pd.read_csv('IphoneData.csv')
newdf = df.drop(columns=['NAME', 'CO2E'])
features = newdf.to_numpy()
emissions = df['CO2E'].to_numpy()

In [46]:
def modelScore(X, y, model):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, train_size = .75)
    

    if model == 'linear regression':
        fittedModel = LinearRegression().fit(X_train, y_train)
            
    elif model == 'decision tree':
        fittedModel = tree.DecisionTreeRegressor().fit(X_train, y_train)
            
    elif model == 'random forest':
        y_train = y_train.flatten()
        fittedModel = RandomForestRegressor().fit(X_train, y_train)

    elif model == 'support vector regression':
        y_train = y_train.flatten()
        fittedModel = SVR(kernel = 'linear').fit(X_train, y_train)    
 
    elif model == 'polynomial regression':
        polyX = PolynomialFeatures(degree=2).fit_transform(X)
        polyX_train, polyX_test, polyy_train, polyy_test = train_test_split(polyX, y, random_state=42, train_size=.75)
        fittedModel = LinearRegression().fit(polyX_train, polyy_train)
        X_test = polyX_test

    y_pred = fittedModel.predict(X_test)
    
    r2 = r2_score(y_test, y_pred)    
    sse = np.sum((y_test - y_pred) ** 2)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    
   #print results
    print(model)
    print("R2 = " + str(r2))
    print("SSE = " + str(sse))
    print("MAPE = " + str(mape))

In [5]:
modelScore(features, emissions, 'linear regression')

linear regression
R2 = 0.9210088454620518
SSE = 340.5835279827868
MAPE = 0.04867998113160696


In [6]:
modelScore(features, emissions, 'decision tree')

decision tree
R2 = 0.838113645148821
SSE = 698.0
MAPE = 0.1007999173605792


In [41]:
modelScore(features, emissions, 'random forest')

random forest
R2 = 0.7631965365287978
SSE = 1021.0175999999999
MAPE = 0.1197113016815351


In [40]:
modelScore(features, emissions, 'support vector regression')

support vector regression
R2 = 0.7969520639952627
SSE = 875.4750174070921
MAPE = 0.0890968384980616


In [48]:
modelScore(features, emissions, 'polynomial regression')

polynomial regression
R2 = 0.9582546957391193
SSE = 179.99183687149736
MAPE = 0.04579050585792105


In [52]:
modelsList = ['linear regression', 'decision tree', 'random forest', 'support vector regression', 'polynomial regression']
multipleFeatures = df[['WEIGHT (gm)', 'STORAGE (gb)',  'BATTERY (mAh)']].to_numpy()

In [53]:
for model in modelsList:
    modelScore(multipleFeatures, emissions, model)

linear regression
R2 = 0.7843487773844471
SSE = 929.8161881773924
MAPE = 0.09737703297942069
decision tree
R2 = 0.8265172013915733
SSE = 748.0
MAPE = 0.10516819693047168
random forest
R2 = 0.7636805268393247
SSE = 1018.9307951111114
MAPE = 0.11688193170759725
support vector regression
R2 = 0.7910558669894937
SSE = 900.8974534969665
MAPE = 0.08695442306969002
polynomial regression
R2 = 0.8404470905515723
SSE = 687.9389612384709
MAPE = 0.08509967009149548
