### Memo
- small data sample with many features 
- approach with traditional machine learning 

### Key Element 
- BaseEstimator, TransformerMixin, ClassifierMixin
- ElasticNetCV, LassoLarsCV
- 

In [21]:

import numpy as np
from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.linear_model import ElasticNetCV, LassoLarsCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline, make_union
from sklearn.utils import check_array
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor

from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection

from sklearn.decomposition import PCA, FastICA
from sklearn.decomposition import TruncatedSVD

from sklearn.metrics import r2_score

X1~X8 is categorical data

In [22]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
id_test = test['ID']
train.pop('ID')
test.pop('ID')

for c in train.columns:
    if train[c].dtype == 'object':
        lbl = LabelEncoder()
        lbl.fit(list(train[c].values) + list(test[c].values))
        train[c] = lbl.transform(list(train[c].values))
        test[c] = lbl.transform(list(test[c].values))

In [23]:
n_comp = 12

# tSVD
tsvd = TruncatedSVD(n_components=n_comp, random_state=420)
tsvd_results_train = tsvd.fit_transform(train.drop(["y"], axis=1))
tsvd_results_test = tsvd.transform(test)

# PCA
pca = PCA(n_components=n_comp, random_state=420)
pca2_results_train = pca.fit_transform(train.drop(["y"], axis=1))
pca2_results_test = pca.transform(test)

# ICA
ica = FastICA(n_components=n_comp, random_state=420)
ica2_results_train = ica.fit_transform(train.drop(["y"], axis=1))
ica2_results_test = ica.transform(test)

# GRP
grp = GaussianRandomProjection(n_components=n_comp, eps=0.1, random_state=420)
grp_results_train = grp.fit_transform(train.drop(["y"], axis=1))
grp_results_test = grp.transform(test)

# SRP [Might cause -inf]
# srp = SparseRandomProjection(n_components=n_comp, dense_output=True, random_state=420)
# srp_results_train = srp.fit_transform(train.drop(["y"], axis=1))
# srp_results_test = srp.transform(test)

#save columns list before adding the decomposition components

usable_columns = list(set(train.columns) - set(['y']))

# Append decomposition components to datasets
for i in range(1, n_comp + 1):
    train['pca_' + str(i)] = pca2_results_train[:, i - 1]
    test['pca_' + str(i)] = pca2_results_test[:, i - 1]

    train['ica_' + str(i)] = ica2_results_train[:, i - 1]
    test['ica_' + str(i)] = ica2_results_test[:, i - 1]

    train['tsvd_' + str(i)] = tsvd_results_train[:, i - 1]
    test['tsvd_' + str(i)] = tsvd_results_test[:, i - 1]

    train['grp_' + str(i)] = grp_results_train[:, i - 1]
    test['grp_' + str(i)] = grp_results_test[:, i - 1]

#    train['srp_' + str(i)] = srp_results_train[:, i - 1]
#    test['srp_' + str(i)] = srp_results_test[:, i - 1]




In [379]:

train_y = train['y'].values
usable_columns = list(set(train.columns) - set(['y']))


feature_cols =[]
for i in range(1, n_comp + 1):
    feature_cols+=['pca_' + str(i), 'ica_' + str(i), 'tsvd_' + str(i), 
                        'grp_' + str(i)]

#feature_cols = usable_columns 
# use only projected features
train_x = train[feature_cols].copy()
test   = test[feature_cols].copy()

# zero-center and normalized with train_x and dtest


A = train_x.mean()
B = 1/train_x.max()
train_x -= A
train_x *= B

test -= A 
test *= B


# normalized y (aX+b)
b = train_y.mean()
a = 1/train_y.max()
train_y -= b
train_y *= a

def recover_y(value, a, b):
    value*=(1/a)
    value+= b
    return value 



In [380]:

'''Train the xgb model then predict the test data'''

xgb_params = {'n_trees': 400, 
    'eta': 0.0045,
    'max_depth': 4,
    'subsample': 0.9,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1}
# NOTE: Make sure that the class is labeled 'class' in the data file

dtrain = xgb.DMatrix(train_x, train_y)
dtest = xgb.DMatrix(test)

num_boost_rounds = 1250

# train model
model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds)
y_pred = model.predict(dtest)
y_pred = list(recover_y(y_pred, a, b))



In [381]:
#### Value Replacing with ill-conditioned in test data
# https://www.kaggle.com/c/mercedes-benz-greener-manufacturing/discussion/35271
# https://crowdstats.eu/topics/kaggle-mercedes-benz-greener-manufacturing-leaderboard-probing
memo = { 1 : 71.34112, 12 : 109.30903, 23 : 115.21953, 28 : 92.00675, 42 : 87.73572, 43 : 129.79876, 
        45 : 99.55671, 57 : 116.02167, 3977 : 132.08556}
for i in range(len(id_test)):
    if id_test[i] in memo.keys():
        y_pred[i] = memo[id_test[i]]
        print (i, id_test[i], memo[id_test[i]])

0 1 71.34112
8 12 109.30903
17 23 115.21953
19 28 92.00675
24 42 87.73572
25 43 129.79876
26 45 99.55671
32 57 116.02167
1985 3977 132.08556


In [382]:
sub = pd.DataFrame()
sub['ID'] = id_test
sub['y'] = y_pred

# sub['y'] = y_pred*0.75 + results*0.25
sub.to_csv('model_15.csv', index=False)
# stacked-models_04.csv = with outlier
# stacked-models_05.csv = without outlier second StackingEstimator max_depth=5
# stacked-models_06.csv = second StackingEstimator max_depth=3 
# stacked-models_07.csv = sub['y'] = y_pred*0.75 + results*0.25
# stacked-models_08.csv = n_component from 12 -> 20, second StackingEstimator max_depth=4
# stacked-models_09.csv = sub['y'] = y_pred*0.7145 + results*0.2855
'''
model_11.csv 
xgb_params = {
    'n_trees': 320, 
    'eta': 0.0045,
    'max_depth': 5,
    'subsample': 0.9,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1}
    
model_12.csv 
sub feature cols to unsample_col 

model_13.csv
use feature cols
xgb_params = {'n_trees': 400, 
    'eta': 0.0045,
    'max_depth': 4,
    'subsample': 0.9,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1}
    
LBacc = 0.53

model_14.csv
xgb_params = {'n_trees': 400, 
    'eta': 0.0045,
    'max_depth': 5,
    'subsample': 0.9,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1}

model_15.csv
replce prediction outlier, ill-condition
xgb_params = {'n_trees': 400, 
    'eta': 0.0045,
    'max_depth': 4,
    'subsample': 0.9,
    'objective': 'reg:linear',
    'eval_metric': 'rmse',
    'base_score': y_mean, # base prediction = mean(target)
    'silent': 1}
    
'''

"\nmodel_11.csv \nxgb_params = {\n    'n_trees': 320, \n    'eta': 0.0045,\n    'max_depth': 5,\n    'subsample': 0.9,\n    'objective': 'reg:linear',\n    'eval_metric': 'rmse',\n    'base_score': y_mean, # base prediction = mean(target)\n    'silent': 1}\n    \nmodel_12.csv \nsub feature cols to unsample_col \n\nmodel_13.csv\nuse feature cols\nxgb_params = {'n_trees': 400, \n    'eta': 0.0045,\n    'max_depth': 4,\n    'subsample': 0.9,\n    'objective': 'reg:linear',\n    'eval_metric': 'rmse',\n    'base_score': y_mean, # base prediction = mean(target)\n    'silent': 1}\n    \nLBacc = 0.53\n\nmodel_14.csv\nxgb_params = {'n_trees': 400, \n    'eta': 0.0045,\n    'max_depth': 5,\n    'subsample': 0.9,\n    'objective': 'reg:linear',\n    'eval_metric': 'rmse',\n    'base_score': y_mean, # base prediction = mean(target)\n    'silent': 1}\n\nmodel_15.csv\nreplce prediction outlier, ill-condition\nxgb_params = {'n_trees': 400, \n    'eta': 0.0045,\n    'max_depth': 4,\n    'subsample': 

In [4]:
from train_dp import AgentRegressor

In [5]:
train_y = train['y'].values
print (train_y)
usable_columns = list(set(train.columns) - set(['y']))

feature_cols =[]
for i in range(1, n_comp + 1):
    feature_cols+=['pca_' + str(i), 'ica_' + str(i), 'tsvd_' + str(i), 
                        'grp_' + str(i), 'srp_' + str(i) ]

train_x = train[feature_cols] 
test   = test[feature_cols]

agent = AgentRegressor(lr=1e-4, batch_size=2, train_x=train_x, train_y= train_y, test = test)


[ 130.81   88.53   76.26 ...,  109.22   87.48  110.85]
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_2 (Dense)              (None, 30)                1530      
_________________________________________________________________
dropout_1 (Dropout)          (None, 30)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                310       
_________________________________________________________________
activation_1 (Activation)    (None, 10)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 11        
Total params: 6,901.0
Trainable params: 6,901.0
Non-trainable params: 0.0
_____________

In [7]:
# add init  # add constrain 
# not use momentum https://stackoverflow.com/questions/37232782/nan-loss-when-training-regression-network
# still meas nan => gradient explod 
# fix net  => no use
# avoid neg-value in train => no use 
# Truncnla init => with lower std
# simplize the model
# in-regression problem, it is easy to explode the gradient ...
# use adam optimizer 
# before final layer => use softmax => give up this way
# give up normalize output => no use
# extremely low lr with patient 

## Keras Way 
# agent.train(lr=1e-8, callbacks=[prediction_history])
agent.tf_train(lr= 1e-3)

start training 
EP [5] Loss nan Predict [[ nan]
 [ nan]]
EP [10] Loss nan Predict [[ nan]
 [ nan]]
EP [15] Loss nan Predict [[ nan]
 [ nan]]
EP [20] Loss nan Predict [[ nan]
 [ nan]]
EP [25] Loss nan Predict [[ nan]
 [ nan]]
EP [30] Loss nan Predict [[ nan]
 [ nan]]
EP [35] Loss nan Predict [[ nan]
 [ nan]]
EP [40] Loss nan Predict [[ nan]
 [ nan]]
EP [45] Loss nan Predict [[ nan]
 [ nan]]
EP [50] Loss nan Predict [[ nan]
 [ nan]]
EP [55] Loss nan Predict [[ nan]
 [ nan]]
EP [60] Loss nan Predict [[ nan]
 [ nan]]
EP [65] Loss nan Predict [[ nan]
 [ nan]]
EP [70] Loss nan Predict [[ nan]
 [ nan]]
EP [75] Loss nan Predict [[ nan]
 [ nan]]
EP [80] Loss nan Predict [[ nan]
 [ nan]]
EP [85] Loss nan Predict [[ nan]
 [ nan]]
EP [90] Loss nan Predict [[ nan]
 [ nan]]
EP [95] Loss nan Predict [[ nan]
 [ nan]]
EP [100] Loss nan Predict [[ nan]
 [ nan]]
EP [105] Loss nan Predict [[ nan]
 [ nan]]
EP [110] Loss nan Predict [[ nan]
 [ nan]]
EP [115] Loss nan Predict [[ nan]
 [ nan]]
EP [120] Loss n

In [446]:
res  = agent.predict()

(4209, 100)


In [328]:
print (res[0])
print (train_x.shape, test.shape)

nan
(4209, 100) (4209, 100)


In [34]:
for i,j in enumerate(train_x.min()): 
    if i == 54:print (i,'---',j) 
train_x.keys()[54]

54 --- -inf


'srp_11'