In [1]:
import xgboost as xgb
# Imports
import numpy as np
import os
import pandas as pd
from utils.utilities import *
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Preparo i dati
feat_names = pd.read_csv("./UCIHAR/features.txt", sep=" ", names=["code","feature"])

# Converto i codici (0,1,2,3,4...) in (f0,f1,f2,f3,f4,...)
feat_names["code"] = ["f"+str(code) for _,code in enumerate(feat_names["code"])]

# Numero di features (561)
n_feats = feat_names["code"].size

#feat_names

# Load train data
X_train = pd.read_table('./UCIHAR/train/X_train.txt', delim_whitespace=True, names=["f"+str(code_number+1) for code_number in range(n_feats)])
y_train = pd.read_table('./UCIHAR/train/y_train.txt', delim_whitespace=True, names=["label"])

# Load test data
X_test = pd.read_table('./UCIHAR/test/X_test.txt', delim_whitespace=True, names=["f"+str(code_number+1) for code_number in range(n_feats)])
y_test = pd.read_table('./UCIHAR/test/y_test.txt', delim_whitespace=True, names=["label"])

X_train["is.train"] = 1
X_test["is.train"] = 0

# Concateno i due dataset
X_full = pd.concat([X_train, X_test], keys=['train', 'test'])


# Standardizzo i valori
from sklearn.preprocessing import StandardScaler

#definisco una maschera per selezionare solo le "features" (f1 -> f561)
feat_mask = ["f"+str(code_number+1) for code_number in range(n_feats)]
X_full[feat_mask] = StandardScaler().fit_transform(X_full[feat_mask])


n_feats = n_feats+1

X_tr = X_full.loc['train']
X_test = X_full.loc['test']
# XGboost wants levels to start with 0
y_train["label"] = [int(y_train["label"].iloc[row]-1) for row in range(y_train.size)]
y_test["label"] = [int(y_test["label"].iloc[row]-1) for row in range(y_test.size)]


  del sys.path[0]
  


In [3]:
#y_train = [val-1 for _,val in enumerate(y_train)]
#y_test = [val-1 for _,val in enumerate(y_test)]


y_train.label = y_train.label.astype(int)
y_test.label = y_test.label.astype(int)



# XGB style matrices
dtrain = xgb.DMatrix(X_tr, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
watchlist = {'train': dtrain, 'test': dtest}

In [4]:
from pandas import DataFrame

grid = {'eta': [0.3,0.5,0.7,0.3,0.5,0.7,0.3,0.5,0.7,0.3,0.5,0.7],
        'max_depth': [1,1,1,2,2,2,6,6,6,8,8,8,]
        }

cv_result = DataFrame(grid, columns= ['eta', 'max_depth'])
cv_result['nrounds'] = 0
cv_result['mll_train'] = 0
cv_result['mll_test'] = 0
cv_result


Unnamed: 0,eta,max_depth,nrounds,mll_train,mll_test
0,0.3,1,0,0,0
1,0.5,1,0,0,0
2,0.7,1,0,0,0
3,0.3,2,0,0,0
4,0.5,2,0,0,0
5,0.7,2,0,0,0
6,0.3,6,0,0,0
7,0.5,6,0,0,0
8,0.7,6,0,0,0
9,0.3,8,0,0,0


In [5]:
# Loop over hyperparameters
#for ind in range(len(cv_result)):
#    print(ind+1)

In [116]:
# Best Model Params
params = {}
params['booster'] = 'gbtree'
params['eval_metric'] = 'mlogloss'
params['objective'] =  'multi:softprob'
params['eta'] =  0.50
params['max_depth'] =  2
params['gamma'] =  0.0
params['min_child_weight'] =  0
params['colsample_bytree'] =  0.2
params['subsample'] =  1
params['num_class'] = 6
evallist = [(dtest, 'eval'), (dtrain, 'train')]


num_round = 499
bst = xgb.train(params, dtrain, num_round, evallist)


[0]	eval-mlogloss:1.06839	train-mlogloss:0.970493
[1]	eval-mlogloss:0.808269	train-mlogloss:0.676923
[2]	eval-mlogloss:0.64673	train-mlogloss:0.506609
[3]	eval-mlogloss:0.538737	train-mlogloss:0.401013
[4]	eval-mlogloss:0.466092	train-mlogloss:0.328126
[5]	eval-mlogloss:0.417114	train-mlogloss:0.279524
[6]	eval-mlogloss:0.371277	train-mlogloss:0.238161
[7]	eval-mlogloss:0.347702	train-mlogloss:0.207056
[8]	eval-mlogloss:0.315556	train-mlogloss:0.179926
[9]	eval-mlogloss:0.292729	train-mlogloss:0.158955
[10]	eval-mlogloss:0.274903	train-mlogloss:0.139692
[11]	eval-mlogloss:0.256199	train-mlogloss:0.124811
[12]	eval-mlogloss:0.242569	train-mlogloss:0.111666
[13]	eval-mlogloss:0.226805	train-mlogloss:0.101064
[14]	eval-mlogloss:0.219187	train-mlogloss:0.092486
[15]	eval-mlogloss:0.208108	train-mlogloss:0.084112
[16]	eval-mlogloss:0.202357	train-mlogloss:0.076638
[17]	eval-mlogloss:0.194766	train-mlogloss:0.071378
[18]	eval-mlogloss:0.187015	train-mlogloss:0.065845
[19]	eval-mlogloss:0.180

[158]	eval-mlogloss:0.12356	train-mlogloss:0.00053
[159]	eval-mlogloss:0.123945	train-mlogloss:0.000522
[160]	eval-mlogloss:0.123631	train-mlogloss:0.000514
[161]	eval-mlogloss:0.123436	train-mlogloss:0.000507
[162]	eval-mlogloss:0.123841	train-mlogloss:0.000498
[163]	eval-mlogloss:0.123831	train-mlogloss:0.000491
[164]	eval-mlogloss:0.123794	train-mlogloss:0.000482
[165]	eval-mlogloss:0.12413	train-mlogloss:0.000475
[166]	eval-mlogloss:0.124248	train-mlogloss:0.000468
[167]	eval-mlogloss:0.124502	train-mlogloss:0.000459
[168]	eval-mlogloss:0.124371	train-mlogloss:0.00045
[169]	eval-mlogloss:0.125201	train-mlogloss:0.000439
[170]	eval-mlogloss:0.125129	train-mlogloss:0.000434
[171]	eval-mlogloss:0.125066	train-mlogloss:0.000426
[172]	eval-mlogloss:0.124979	train-mlogloss:0.000419
[173]	eval-mlogloss:0.125074	train-mlogloss:0.000413
[174]	eval-mlogloss:0.125083	train-mlogloss:0.000406
[175]	eval-mlogloss:0.125164	train-mlogloss:0.0004
[176]	eval-mlogloss:0.125388	train-mlogloss:0.000393

[314]	eval-mlogloss:0.132086	train-mlogloss:0.000106
[315]	eval-mlogloss:0.132232	train-mlogloss:0.000106
[316]	eval-mlogloss:0.132224	train-mlogloss:0.000105
[317]	eval-mlogloss:0.132251	train-mlogloss:0.000104
[318]	eval-mlogloss:0.132414	train-mlogloss:0.000104
[319]	eval-mlogloss:0.132325	train-mlogloss:0.000103
[320]	eval-mlogloss:0.132473	train-mlogloss:0.000103
[321]	eval-mlogloss:0.132437	train-mlogloss:0.000102
[322]	eval-mlogloss:0.132532	train-mlogloss:0.000101
[323]	eval-mlogloss:0.132601	train-mlogloss:0.000101
[324]	eval-mlogloss:0.132373	train-mlogloss:0.0001
[325]	eval-mlogloss:0.13243	train-mlogloss:0.0001
[326]	eval-mlogloss:0.132468	train-mlogloss:9.9e-05
[327]	eval-mlogloss:0.132657	train-mlogloss:9.9e-05
[328]	eval-mlogloss:0.132625	train-mlogloss:9.8e-05
[329]	eval-mlogloss:0.132625	train-mlogloss:9.7e-05
[330]	eval-mlogloss:0.132611	train-mlogloss:9.7e-05
[331]	eval-mlogloss:0.132793	train-mlogloss:9.6e-05
[332]	eval-mlogloss:0.132711	train-mlogloss:9.6e-05
[333]

[473]	eval-mlogloss:0.137128	train-mlogloss:5.3e-05
[474]	eval-mlogloss:0.137126	train-mlogloss:5.3e-05
[475]	eval-mlogloss:0.137186	train-mlogloss:5.2e-05
[476]	eval-mlogloss:0.137259	train-mlogloss:5.2e-05
[477]	eval-mlogloss:0.137428	train-mlogloss:5.2e-05
[478]	eval-mlogloss:0.137397	train-mlogloss:5.2e-05
[479]	eval-mlogloss:0.137354	train-mlogloss:5.2e-05
[480]	eval-mlogloss:0.137493	train-mlogloss:5.2e-05
[481]	eval-mlogloss:0.137447	train-mlogloss:5.1e-05
[482]	eval-mlogloss:0.13756	train-mlogloss:5.1e-05
[483]	eval-mlogloss:0.137571	train-mlogloss:5.1e-05
[484]	eval-mlogloss:0.137461	train-mlogloss:5.1e-05
[485]	eval-mlogloss:0.137621	train-mlogloss:5.1e-05
[486]	eval-mlogloss:0.137584	train-mlogloss:5.1e-05
[487]	eval-mlogloss:0.137661	train-mlogloss:5.1e-05
[488]	eval-mlogloss:0.137709	train-mlogloss:5e-05
[489]	eval-mlogloss:0.137744	train-mlogloss:5e-05
[490]	eval-mlogloss:0.137994	train-mlogloss:5e-05
[491]	eval-mlogloss:0.13809	train-mlogloss:5e-05
[492]	eval-mlogloss:0.

In [7]:
# Predict
ypred = bst.predict(dtest)
y_pred = np.argmax(ypred, axis=1)
predicted_df = DataFrame(y_pred, columns= ['label'])

# Accuracy
acc = 100. * np.sum(predicted_df == y_test) / len(y_test)
print("Accuracy: {0:.2f}%".format(acc["label"]))

NameError: name 'bst' is not defined