In [1]:
import pandas as pd
from util import cross_validate

In [2]:
data=pd.read_csv('../data/imputed.csv')
target = ['EURWH_MBOE','OilEURWH_MBBL','GasEURWH_BCF']

X = data[[column for column in data.columns if column not in target]]
y = data['EURWH_MBOE']

In [27]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
predictions = cross_validate(model, X, y)

Mean RMSE: 519.4715650973463
Mean NRMSE: 0.4908922307604242
Mean Adjusted R^2: 0.39171923426434524


In [23]:
from sklearn.linear_model import Ridge

for alpha in [1e-2, 1e-1, 1, 10, 100, 1000]:
    model = Ridge(alpha = alpha)
    print("Evaluating model with alpha =", alpha)
    predictions = cross_validate(model, X, y)
    print()

Evaluating model with alpha = 0.01
Mean RMSE: 519.4715232802155
Mean NRMSE: 0.49089219158713043
Mean Adjusted R^2: 0.39171933362820827

Evaluating model with alpha = 0.1
Mean RMSE: 519.4711478565829
Mean NRMSE: 0.49089183990065893
Mean Adjusted R^2: 0.39172022568627696

Evaluating model with alpha = 1
Mean RMSE: 519.467483742307
Mean NRMSE: 0.490888407599593
Mean Adjusted R^2: 0.39172893158251576

Evaluating model with alpha = 10
Mean RMSE: 519.4382282350982
Mean NRMSE: 0.4908610132668542
Mean Adjusted R^2: 0.3917983976849318

Evaluating model with alpha = 100
Mean RMSE: 519.3871543227492
Mean NRMSE: 0.49081343426810486
Mean Adjusted R^2: 0.3919188351946744

Evaluating model with alpha = 1000
Mean RMSE: 520.9817190365195
Mean NRMSE: 0.492319251433546
Mean Adjusted R^2: 0.3881672024395911



In [22]:
from sklearn.linear_model import Lasso

for alpha in [1e-1, 1, 10, 100, 1000]:
    model = Lasso(alpha = alpha)
    print("Evaluating model with alpha =", alpha)
    predictions = cross_validate(model, X, y)
    print()

Evaluating model with alpha = 0.1
Mean RMSE: 519.4441786337054
Mean NRMSE: 0.49086665626466247
Mean Adjusted R^2: 0.39178723082657996

Evaluating model with alpha = 1
Mean RMSE: 519.424883169093
Mean NRMSE: 0.4908477957086627
Mean Adjusted R^2: 0.39183681758405003

Evaluating model with alpha = 10
Mean RMSE: 523.8801770994944
Mean NRMSE: 0.49505923743201014
Mean Adjusted R^2: 0.38134864202269453

Evaluating model with alpha = 100
Mean RMSE: 575.2585347425963
Mean NRMSE: 0.5436718445900082
Mean Adjusted R^2: 0.25364397978234554

Evaluating model with alpha = 1000
Mean RMSE: 666.8020881572897
Mean NRMSE: 0.6302410686315301
Mean Adjusted R^2: -0.0036927883122761075



In [5]:
from sklearn.ensemble import RandomForestRegressor

for n_estimators in [100, 250, 500]:
    for max_depth in [5, 10]:
        print("Evaluating model with n estimators =", n_estimators, "and max depth =", max_depth)
        model = RandomForestRegressor(n_estimators = n_estimators, max_depth = max_depth)
        predictions = cross_validate(model, X, y)
        print()

Evaluating model with n estimators = 100 and max depth = 5
Mean RMSE: 513.4321666512012
Mean NRMSE: 0.4852101796416882
Mean Adjusted R^2: 0.4055213633943498

Evaluating model with n estimators = 100 and max depth = 10
Mean RMSE: 474.4652836312689
Mean NRMSE: 0.4483637162912134
Mean Adjusted R^2: 0.4923435240898799

Evaluating model with n estimators = 250 and max depth = 5
Mean RMSE: 513.5883846946176
Mean NRMSE: 0.4853605095295051
Mean Adjusted R^2: 0.4051530959080873

Evaluating model with n estimators = 250 and max depth = 10
Mean RMSE: 474.39991320295314
Mean NRMSE: 0.4482923209901471
Mean Adjusted R^2: 0.49250913513552164

Evaluating model with n estimators = 500 and max depth = 5
Mean RMSE: 513.6064844763553
Mean NRMSE: 0.4853783496833385
Mean Adjusted R^2: 0.4051142903852343

Evaluating model with n estimators = 500 and max depth = 10
Mean RMSE: 473.3875077854723
Mean NRMSE: 0.4473385550732334
Mean Adjusted R^2: 0.49466814201739656



In [7]:
from sklearn.ensemble import RandomForestRegressor

for n_estimators in [100, 250, 500]:
    print("Evaluating model with n estimators =", n_estimators, "and no max depth.")
    model = RandomForestRegressor(n_estimators = n_estimators)
    predictions = cross_validate(model, X, y)
    print()

Evaluating model with n estimators = 100 and no max depth.
Mean RMSE: 452.90260722877656
Mean NRMSE: 0.4279659026697905
Mean Adjusted R^2: 0.5373938564864448

Evaluating model with n estimators = 250 and no max depth.
Mean RMSE: 451.17339650047245
Mean NRMSE: 0.42633124771171615
Mean Adjusted R^2: 0.5410033418042046

Evaluating model with n estimators = 500 and no max depth.
Fold: 6

KeyboardInterrupt: 

In [33]:
from lightgbm import LGBMRegressor

for n_estimators in [100, 250, 500]:
    for learning_rate in [1e-2, 1e-1, 1]:
        for max_depth in [5, 10, -1]:
            print("Evaluating model with n estimators =", n_estimators, ", learning rate =", learning_rate, ", max depth = ", max_depth)
            model = LGBMRegressor(n_estimators = n_estimators, learning_rate = learning_rate, max_depth = max_depth, force_col_wise=True, verbose=-1)
            predictions = cross_validate(model, X, y)
            print()

Evaluating model with n estimators = 100 , learning rate = 0.01 , max depth =  5
Mean RMSE: 533.0568941269072
Mean NRMSE: 0.5037600291593229
Mean Adjusted R^2: 0.3593582743610112

Evaluating model with n estimators = 100 , learning rate = 0.01 , max depth =  10
Mean RMSE: 526.9627973959795
Mean NRMSE: 0.49799218751120744
Mean Adjusted R^2: 0.3739330481758871

Evaluating model with n estimators = 100 , learning rate = 0.01 , max depth =  -1
Mean RMSE: 527.0482196933202
Mean NRMSE: 0.49807256747946926
Mean Adjusted R^2: 0.3737211737901768

Evaluating model with n estimators = 100 , learning rate = 0.1 , max depth =  5
Mean RMSE: 463.1925069155379
Mean NRMSE: 0.43771926665917055
Mean Adjusted R^2: 0.5161890096130414

Evaluating model with n estimators = 100 , learning rate = 0.1 , max depth =  10
Mean RMSE: 456.8237154448916
Mean NRMSE: 0.43168869444814195
Mean Adjusted R^2: 0.5293525094439764

Evaluating model with n estimators = 100 , learning rate = 0.1 , max depth =  -1
Mean RMSE: 456

In [38]:
from xgboost import XGBRegressor

for n_estimators in [100, 250, 500]:
    for learning_rate in [1e-2, 1e-1, 1]:
        for max_depth in [5, 10]:
            print("Evaluating model with n estimators =", n_estimators, ", learning rate =", learning_rate, ", max depth = ", max_depth)
            model = XGBRegressor(n_estimators = n_estimators, learning_rate = learning_rate, max_depth = max_depth)
            predictions = cross_validate(model, X, y)
            print()

Evaluating model with n estimators = 100 , learning rate = 0.01 , max depth =  5
Mean RMSE: 533.5513335400916
Mean NRMSE: 0.5042281002563878
Mean Adjusted R^2: 0.3581504913337007

Evaluating model with n estimators = 100 , learning rate = 0.01 , max depth =  10
Mean RMSE: 505.0705884996939
Mean NRMSE: 0.4772804029637746
Mean Adjusted R^2: 0.42501345168019194

Evaluating model with n estimators = 100 , learning rate = 0.1 , max depth =  5
Mean RMSE: 465.54486575861273
Mean NRMSE: 0.43994580294519736
Mean Adjusted R^2: 0.5113389724952138

Evaluating model with n estimators = 100 , learning rate = 0.1 , max depth =  10
Mean RMSE: 451.0848837747424
Mean NRMSE: 0.426230932019146
Mean Adjusted R^2: 0.54112996605766

Evaluating model with n estimators = 100 , learning rate = 1 , max depth =  5
Mean RMSE: 559.3117669981004
Mean NRMSE: 0.528397743920753
Mean Adjusted R^2: 0.2932433258407404

Evaluating model with n estimators = 100 , learning rate = 1 , max depth =  10
Mean RMSE: 583.0109919295

In [3]:
from sklearn.svm import SVR

for kernel in ['rbf', 'linear', 'poly']:
    for C in [1, 10, 100]:
            print("Evaluating model with kernel =", kernel, ", C =", C)
            model = SVR(kernel = kernel, C = C)
            predictions = cross_validate(model, X, y)
            print()

Evaluating model with kernel = rbf , C = 1
Mean RMSE: 613.1975335514837
Mean NRMSE: 0.5794708068806337
Mean Adjusted R^2: 0.151842402127845

Evaluating model with kernel = rbf , C = 10
Mean RMSE: 536.3809681633078
Mean NRMSE: 0.5068165058828301
Mean Adjusted R^2: 0.3514853022143085

Evaluating model with kernel = rbf , C = 100
Mean RMSE: 494.9361286941321
Mean NRMSE: 0.4676568107288356
Mean Adjusted R^2: 0.44790348557060355

Evaluating model with kernel = linear , C = 1
Mean RMSE: 525.3467721214191
Mean NRMSE: 0.4964229902878737
Mean Adjusted R^2: 0.37788753497027494

Evaluating model with kernel = linear , C = 10
Mean RMSE: 524.921496798095
Mean NRMSE: 0.4960175085403863
Mean Adjusted R^2: 0.37889057179224633

Evaluating model with kernel = linear , C = 100
Mean RMSE: 525.1319692143801
Mean NRMSE: 0.4962146512259113
Mean Adjusted R^2: 0.378390965700613

Evaluating model with kernel = polynomial , C = 1
Fold: 1

InvalidParameterError: The 'kernel' parameter of SVR must be a str among {'linear', 'precomputed', 'rbf', 'poly', 'sigmoid'} or a callable. Got 'polynomial' instead.

In [4]:
for kernel in ['poly']:
    for C in [1, 10, 100]:
            print("Evaluating model with kernel =", kernel, ", C =", C)
            model = SVR(kernel = kernel, C = C)
            predictions = cross_validate(model, X, y)
            print()

Evaluating model with kernel = poly , C = 1
Mean RMSE: 630.2832082598261
Mean NRMSE: 0.595650642446912
Mean Adjusted R^2: 0.10358961072947778

Evaluating model with kernel = poly , C = 10
Mean RMSE: 562.2627879879828
Mean NRMSE: 0.5313265129804331
Mean Adjusted R^2: 0.2870583864080281

Evaluating model with kernel = poly , C = 100
Mean RMSE: 531.6520094197416
Mean NRMSE: 0.502339918342152
Mean Adjusted R^2: 0.35894784809051505

