In [3]:
import pandas as pd
import numpy as np

from sklearn.model_selection import cross_val_score, KFold, train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import PolynomialFeatures

from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn import tree
from sklearn.svm import SVR
import xgboost as xgb
from sklearn.linear_model import Lasso
from sklearn.neural_network import MLPRegressor

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [4]:
df = pd.read_csv('SamsungData.csv')
newdf = df.drop(columns=['NAME', 'CO2E'])
features = newdf.to_numpy()
emissions = df['CO2E'].to_numpy()

In [65]:
def modelScore(X, y, model):
    avgSSE = []
    avgMAPE = []
    splits = LeaveOneOut().get_n_splits(X)
    #cross validation using leave one out
    for i, (trainIndex, testIndex) in enumerate(LeaveOneOut().split(X)):
        X_train = X[trainIndex]
        X_test = X[testIndex]
        y_train = y[trainIndex]
        y_test = y[testIndex]
    

        if model == 'linear regression':
            fittedModel = LinearRegression().fit(X_train, y_train)
                
        elif model == 'decision tree':
            fittedModel = tree.DecisionTreeRegressor(criterion='squared_error', max_depth=4, min_samples_split=2,
                                                 max_leaf_nodes=110).fit(X_train, y_train)
                
        elif model == 'random forest':
            y_train = y_train.flatten()
            RF = RandomForestRegressor(random_state=None, criterion='friedman_mse', max_depth=5, n_estimators=50,
                                        max_features=None, max_leaf_nodes=100, ccp_alpha=0.1)
            fittedModel = RF.fit(X_train, y_train)
            
        elif model == 'support vector regression':
            y_train = y_train.flatten()
            fittedModel = SVR(kernel='poly', degree=5, coef0=10, tol=1e-2).fit(X_train, y_train)
    
        elif model == 'polynomial regression':
            polyFeatures = PolynomialFeatures(degree=3, interaction_only=True, include_bias=True, order='C')
            polyX_train = polyFeatures.fit_transform(X_train)
            polyX_test = polyFeatures.transform(X_test)
            fittedModel = LinearRegression().fit(polyX_train, y_train)
            X_test = polyX_test

        elif model == 'xgboost':
            xgbModel = xgb.XGBModel(learning_rate=0.2, n_estimators=225, booster='gblinear', eval_metric='rmse',
                                    feature_selector='thrifty', updater='coord_descent', reg_lambda=0.1, reg_alpha=0.3)
            fittedModel = xgbModel.fit(X_train, y_train)
        
        elif model =='lasso':
            lassoModel = Lasso()
            fittedModel = lassoModel.fit(X_train, y_train)

        elif model =="neural network":
            nn = MLPRegressor(random_state=42, hidden_layer_sizes=(15, 10, 10, 10, 15), activation='identity', solver='lbfgs', max_iter=7500)
            
            fittedModel = nn.fit(X_train, y_train)          

        y_pred = fittedModel.predict(X_test)
        
        #scores
        avgSSE.append(np.sum((y_test - y_pred) ** 2))
        avgMAPE.append(mean_absolute_percentage_error(y_test, y_pred))

    avgSSE, avgMAPE = np.mean(avgSSE), np.mean(avgMAPE)

   #print results
    """print(model)
    print("SSE = " + str(avgSSE))
    print("MAPE = " + str(avgMAPE))"""
    print()
    print(str(avgSSE))
    print(str(avgMAPE))



"""alpha=0.1, batch_size=25, hidden_layer_sizes=(10, 10, 10, 10),
             learning_rate_init=0.5, max_iter=12000, random_state=42, tol=1.01, max_fun=15000,
             solver='lbfgs', activation='identity'"""


"alpha=0.1, batch_size=25, hidden_layer_sizes=(10, 10, 10, 10),\n             learning_rate_init=0.5, max_iter=12000, random_state=42, tol=1.01, max_fun=15000,\n             solver='lbfgs', activation='identity'"

In [66]:
modelScore(features, emissions, 'support vector regression')


98.67874662041812
0.09231360616028209


: 

In [64]:
modelScore(features, emissions, 'linear regression')


149.95890892586743
0.12889975733649217


In [17]:
modelScore(features, emissions, 'decision tree')


165.59727272727275
0.14373075776824762


In [18]:
modelScore(features, emissions, 'random forest')


116.034797078611
0.11967367965663381


In [19]:
modelScore(features, emissions, 'support vector regression')

KeyboardInterrupt: 

In [None]:
modelScore(features, emissions, 'polynomial regression')

-16.83619441848349
125054.0929656862
1.0690758913702914


In [20]:
modelScore(features, emissions, 'xgboost')


103.12439493107054
0.10692752219456352


: 

In [None]:
modelScore(features, emissions, 'lasso')

0.7072522115508122
763.676894340049
0.10457777656664932


In [None]:
modelScore(features, emissions, 'neural network')

-11.131269753282455
48299.81366349415
0.7631495897631655




In [14]:
modelsList = ['linear regression', 'decision tree', 'random forest', 'support vector regression', 'polynomial regression', 'xgboost', 'lasso', 'neural network']
multipleFeatures = df[['DISPLAY (inch)','STORAGE (gb)','MEMORY (gb)','CPU (cores)','CPU SPEED (GHz)','GPU (cores)','GPU BENCHMARK (points)','WEIGHT (gm)','MAIN CAMERA (mp)','FRONT CAMERA (mp)','NUMBER OF CAMERAS (back)','SUM OF MEGAPIXELS','BATTERY (mAh)']].to_numpy()
wholeFeaturesList = ['DISPLAY (inch)','STORAGE (gb)','MEMORY (gb)','CPU (cores)','CPU SPEED (GHz)','GPU (cores)','GPU BENCHMARK (3DMark)','WEIGHT (gm)','MAIN CAMERA (mp)','FRONT CAMERA (mp)','NUMBER OF CAMERAS (back)','SUM OF MEGAPIXELS','BATTERY (mAh)']

KeyError: "['GPU BENCHMARK (points)'] not in index"

In [None]:
for model in modelsList:
    modelScore(multipleFeatures, emissions, model)


149.95890892586743
0.12889975733649217

180.00310606060606
0.14542551168383402

117.08002531130482
0.11754694256878405

353.22713601985015
0.19885087633457998

104279.8369209092
2.1631959336427387

91.89866658516046
0.09838758023268407

120.9429543445197
0.11395622989261046


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(



121.42514453581889
0.10965798656884643
