# Settings

In [50]:
import pandas as pd
import numpy as np
import math

from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Ridge
import xgboost as xgb

from matplotlib import pyplot
%matplotlib inline

# Read dataset

In [10]:
df = pd.read_csv('../TempForm_data_version1.csv')
df = df.drop(df.columns[0], axis=1)

In [11]:
df.head(10)

Unnamed: 0,id,time,target,weekday,month,monthday,slope,intersect,social,travel,...,screen,sms,activity,builtin,communication,entertainment,finance,game,office,other
0,AS14.01,2014-03-26,6.6,3,3,26,0.07,6.22,2198.4136,274.511,...,13483.5212,0.4,0.138201,1411.3234,7205.7014,448.0606,38.2366,0.0,35.0432,131.2188
1,AS14.01,2014-03-27,7.0,4,3,27,0.035,6.405,2215.1254,91.422,...,13172.5,0.4,0.131653,1016.9792,7747.274,428.6652,38.794,0.0,0.602,102.7682
2,AS14.01,2014-03-28,6.4,5,3,28,0.1,6.33,2314.2752,93.4238,...,15432.4978,0.4,0.116179,1116.5588,8141.9928,537.8682,43.0226,0.0,37.0922,94.846
3,AS14.01,2014-03-29,8.0,6,3,29,0.105,6.235,2236.6556,320.0256,...,15062.4292,0.4,0.10677,2871.8286,7267.8956,720.0796,52.1752,46.6072,37.0922,125.4716
4,AS14.01,2014-03-30,7.5,0,3,30,0.23,6.26,1686.508,246.5516,...,13717.4834,0.6,0.104619,3088.9634,5957.9082,850.1712,45.354,46.6072,36.4902,146.0788
5,AS14.01,2014-03-31,7.4,1,3,31,0.28,6.26,1336.1508,246.5516,...,10327.0552,0.4,0.094763,2965.2828,4162.259,1143.6654,36.7432,46.6072,36.4902,125.1156
6,AS14.01,2014-04-01,6.0,2,4,1,0.19,6.69,687.87,283.2278,...,9409.6618,0.4,0.088602,3324.0644,3431.0358,1313.3536,34.528,56.8424,36.4902,159.3268
7,AS14.01,2014-04-02,6.5,3,4,2,-0.14,7.48,1508.8072,596.6892,...,9356.013001,0.2,0.080389,3355.3024,3262.8386,1391.6132,135.6494,103.7906,18.4776,171.8584
8,AS14.01,2014-04-03,6.4,4,4,3,-0.45,8.43,2092.0174,370.0874,...,12643.017401,0.2,0.081632,2065.6532,4473.5032,1459.2078,167.3716,57.1834,36.5738,234.9764
9,AS14.01,2014-04-04,6.2,5,4,4,-0.31,7.69,2628.288,363.4168,...,15258.860001,0.0,0.077839,2139.42,6816.9812,1392.4482,182.6184,57.1834,36.5738,243.838


Remove first two columns, because do not need these for prediction

In [12]:
df = df.drop(df.columns[0:2], axis=1)

In [13]:
df.head(10)

Unnamed: 0,target,weekday,month,monthday,slope,intersect,social,travel,unknown,utilities,...,screen,sms,activity,builtin,communication,entertainment,finance,game,office,other
0,6.6,3,3,26,0.07,6.22,2198.4136,274.511,47.0446,229.6172,...,13483.5212,0.4,0.138201,1411.3234,7205.7014,448.0606,38.2366,0.0,35.0432,131.2188
1,7.0,4,3,27,0.035,6.405,2215.1254,91.422,47.0446,116.5394,...,13172.5,0.4,0.131653,1016.9792,7747.274,428.6652,38.794,0.0,0.602,102.7682
2,6.4,5,3,28,0.1,6.33,2314.2752,93.4238,47.0446,128.821,...,15432.4978,0.4,0.116179,1116.5588,8141.9928,537.8682,43.0226,0.0,37.0922,94.846
3,8.0,6,3,29,0.105,6.235,2236.6556,320.0256,47.0446,183.1472,...,15062.4292,0.4,0.10677,2871.8286,7267.8956,720.0796,52.1752,46.6072,37.0922,125.4716
4,7.5,0,3,30,0.23,6.26,1686.508,246.5516,47.0446,267.5282,...,13717.4834,0.6,0.104619,3088.9634,5957.9082,850.1712,45.354,46.6072,36.4902,146.0788
5,7.4,1,3,31,0.28,6.26,1336.1508,246.5516,13.2954,230.6088,...,10327.0552,0.4,0.094763,2965.2828,4162.259,1143.6654,36.7432,46.6072,36.4902,125.1156
6,6.0,2,4,1,0.19,6.69,687.87,283.2278,19.1534,265.9968,...,9409.6618,0.4,0.088602,3324.0644,3431.0358,1313.3536,34.528,56.8424,36.4902,159.3268
7,6.5,3,4,2,-0.14,7.48,1508.8072,596.6892,19.1534,333.7182,...,9356.013001,0.2,0.080389,3355.3024,3262.8386,1391.6132,135.6494,103.7906,18.4776,171.8584
8,6.4,4,4,3,-0.45,8.43,2092.0174,370.0874,19.1534,341.649,...,12643.017401,0.2,0.081632,2065.6532,4473.5032,1459.2078,167.3716,57.1834,36.5738,234.9764
9,6.2,5,4,4,-0.31,7.69,2628.288,363.4168,19.1534,258.076,...,15258.860001,0.0,0.077839,2139.42,6816.9812,1392.4482,182.6184,57.1834,36.5738,243.838


# Make data ready for input

In [15]:
# split into input and output
train_X, train_y = df.iloc[:,1:], df.iloc[:,0]

In [58]:
trainData = xgb.DMatrix(df.iloc[:,1:], label = df.iloc[:,:0])

# Train model

In [62]:
param = {
    'max_depth': 5,
    'eta': 0.3,
    'silent': 1,
    'objective': 'reg:linear'
}
param['nthread'] = 4
num_round = 100

In [63]:
model = xgb.train(param, trainData, num_round)

In [37]:
regr = Ridge(max_iter=None, tol=0.001)

tol = [0.1, 0.01, 0.001, 0.0001]
solver = ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
alpha = [0, 0.1, 1, 10, 100]
normalize = ["True", "False"]
param_grid = dict(alpha=alpha, normalize=normalize, solver=solver, tol=tol)
grid = GridSearchCV(estimator=regr, param_grid=param_grid, n_jobs=1, verbose=2)
grid_result = cross_val_score(grid, train_X, train_y, cv=3, verbose=2)

[CV]  ................................................................
Fitting 3 folds for each of 84 candidates, totalling 252 fits
[CV] alpha=0, normalize=True, solver=auto ............................
[CV] ............. alpha=0, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0, normalize=True, solver=auto ............................
[CV] ............. alpha=0, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0, normalize=True, solver=auto ............................
[CV] ............. alpha=0, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0, normalize=True, solver=svd .............................
[CV] .............. alpha=0, normalize=True, solver=svd, total=   0.0s
[CV] alpha=0, normalize=True, solver=svd .............................
[CV] .............. alpha=0, normalize=True, solver=svd, total=   0.0s
[CV] alpha=0, normalize=True, solver=svd .............................
[CV] .............. alpha=0, normalize=True, solver=svd, total=   0.0s
[CV] alpha=0, n

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.3691468595811038e-17
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 4.984402967158582e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 9.698838837058141e-18
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.3691468595811038e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 4.984402967158582e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 9.698838837058141e-18
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.3691468595811038e-17
Ill-conditioned matrix detected. Result is

[CV] ............ alpha=0, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0, normalize=False, solver=saga ...........................
[CV] ............ alpha=0, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=auto ..........................
[CV] ........... alpha=0.1, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=auto ..........................
[CV] ........... alpha=0.1, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=auto ..........................
[CV] ........... alpha=0.1, normalize=True, solver=auto, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=svd ...........................
[CV] ............ alpha=0.1, normalize=True, solver=svd, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=svd ...........................
[CV] ............ alpha=0.1, normalize=True, solver=svd, total=   0.0s
[CV] alpha=0.1, normalize=True, solver=svd ...........................
[CV] .

[CV] alpha=0.3, normalize=False, solver=cholesky .....................
[CV] ...... alpha=0.3, normalize=False, solver=cholesky, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=cholesky .....................
[CV] ...... alpha=0.3, normalize=False, solver=cholesky, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.3, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.3, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.3, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=sparse_cg ....................
[CV] ..... alpha=0.3, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0.3, normalize=False, solver=sparse_cg ....................
[CV] ..... alpha=0.3, normalize=False, solver=sparse_cg, total=   0.0s
[CV] a

[Parallel(n_jobs=1)]: Done 252 out of 252 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s remaining:    0.0s
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 5.33399221201984e-17
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.917775239540206e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.690362697567043e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 5.33399221201984e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.917775239540206e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.690362697567043e-17
Ill-conditioned matrix

[CV] ............. alpha=0, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0, normalize=True, solver=sparse_cg .......................
[CV] ........ alpha=0, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0, normalize=True, solver=sparse_cg .......................
[CV] ........ alpha=0, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0, normalize=True, solver=sparse_cg .......................
[CV] ........ alpha=0, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0, normalize=True, solver=sag .............................
[CV] .............. alpha=0, normalize=True, solver=sag, total=   0.0s
[CV] alpha=0, normalize=True, solver=sag .............................
[CV] .............. alpha=0, normalize=True, solver=sag, total=   0.0s
[CV] alpha=0, normalize=True, solver=sag .............................
[CV] .............. alpha=0, normalize=True, solver=sag, total=   0.0s
[CV] alpha=0, normalize=True, solver=saga ............................
[CV] .

[CV] ..... alpha=0.2, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=sparse_cg ....................
[CV] ..... alpha=0.2, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=sparse_cg ....................
[CV] ..... alpha=0.2, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=sag ..........................
[CV] ........... alpha=0.2, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=sag ..........................
[CV] ........... alpha=0.2, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=sag ..........................
[CV] ........... alpha=0.2, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=saga .........................
[CV] .......... alpha=0.2, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0.2, normalize=False, solver=saga .........................
[CV] .

[CV] alpha=0.4, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.4, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.4, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.4, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.4, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.4, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.4, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0.4, normalize=True, solver=sag ...........................
[CV] ............ alpha=0.4, normalize=True, solver=sag, total=   0.0s
[CV] a

[CV] ........... alpha=0.5, normalize=False, solver=svd, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=cholesky .....................
[CV] ...... alpha=0.5, normalize=False, solver=cholesky, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=cholesky .....................
[CV] ...... alpha=0.5, normalize=False, solver=cholesky, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=cholesky .....................
[CV] ...... alpha=0.5, normalize=False, solver=cholesky, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.5, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.5, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=lsqr .........................
[CV] .......... alpha=0.5, normalize=False, solver=lsqr, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=sparse_cg ....................
[CV] .

[Parallel(n_jobs=1)]: Done 252 out of 252 | elapsed:    0.9s finished
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.3923660348917957e-17
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 5.332129518637505e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.1201904788289266e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 2.3923660348917957e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 5.332129518637505e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 3.1201904788289266e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition n

[CV] ....... alpha=0, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0, normalize=False, solver=sparse_cg ......................
[CV] ....... alpha=0, normalize=False, solver=sparse_cg, total=   0.0s
[CV] alpha=0, normalize=False, solver=sag ............................
[CV] ............. alpha=0, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0, normalize=False, solver=sag ............................
[CV] ............. alpha=0, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0, normalize=False, solver=sag ............................
[CV] ............. alpha=0, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0, normalize=False, solver=saga ...........................
[CV] ............ alpha=0, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0, normalize=False, solver=saga ...........................
[CV] ............ alpha=0, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0, normalize=False, solver=saga ...........................
[CV] .

[CV] alpha=0.3, normalize=True, solver=cholesky ......................
[CV] ....... alpha=0.3, normalize=True, solver=cholesky, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.3, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.3, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=lsqr ..........................
[CV] ........... alpha=0.3, normalize=True, solver=lsqr, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.3, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.3, normalize=True, solver=sparse_cg, total=   0.0s
[CV] alpha=0.3, normalize=True, solver=sparse_cg .....................
[CV] ...... alpha=0.3, normalize=True, solver=sparse_cg, total=   0.0s
[CV] a

[CV] ........... alpha=0.5, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=sag ..........................
[CV] ........... alpha=0.5, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=sag ..........................
[CV] ........... alpha=0.5, normalize=False, solver=sag, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=saga .........................
[CV] .......... alpha=0.5, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=saga .........................
[CV] .......... alpha=0.5, normalize=False, solver=saga, total=   0.0s
[CV] alpha=0.5, normalize=False, solver=saga .........................
[CV] .......... alpha=0.5, normalize=False, solver=saga, total=   0.0s
[CV] ................................................. , total=   1.0s


[Parallel(n_jobs=1)]: Done 252 out of 252 | elapsed:    1.0s finished
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    2.9s finished


In [38]:
grid_result

array([ 0.2203022 ,  0.25603354,  0.04076349])

In [44]:
regr = Ridge(max_iter=None, tol=0.001)

tol = [0.1, 0.01, 0.001, 0.0001]
solver = ["auto", "svd", "cholesky", "lsqr", "sparse_cg", "sag", "saga"]
alpha = [0, 0.1, 1, 10, 100]
normalize = ["True", "False"]
param_grid = dict(alpha=alpha, normalize=normalize, solver=solver, tol=tol)
grid = GridSearchCV(estimator=regr, param_grid=param_grid, n_jobs=1, verbose=0)
results = grid.fit(train_X, train_y)

Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 4.0549100875303e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.7316345362412704e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 6.080578761632325e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 4.0549100875303e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.7316345362412704e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 6.080578761632325e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 4.0549100875303e-17
Ill-conditioned matrix detected. Result is not guaranteed to be accurate.
Reciprocal condition number: 1.7316345362412704e-17
Ill

In [45]:
print("Best: %f using %s" % (results.best_score_, results.best_params_))
means = results.cv_results_['mean_test_score']
stds = results.cv_results_['std_test_score']
params = results.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
	print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.184811 using {'alpha': 0.1, 'normalize': 'False', 'solver': 'saga', 'tol': 0.1}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'auto', 'tol': 0.1}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'auto', 'tol': 0.01}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'auto', 'tol': 0.001}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'auto', 'tol': 0.0001}
0.118576 (0.155997) with: {'alpha': 0, 'normalize': 'True', 'solver': 'svd', 'tol': 0.1}
0.119227 (0.156741) with: {'alpha': 0, 'normalize': 'True', 'solver': 'svd', 'tol': 0.01}
0.118401 (0.155928) with: {'alpha': 0, 'normalize': 'True', 'solver': 'svd', 'tol': 0.001}
0.118698 (0.155829) with: {'alpha': 0, 'normalize': 'True', 'solver': 'svd', 'tol': 0.0001}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'cholesky', 'tol': 0.1}
0.121486 (0.153427) with: {'alpha': 0, 'normalize': 'True', 'solver': 'cholesky', 'tol': 0