**Model:**
- XGBoost

In [1]:
import os
import argparse
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt

In [5]:
from sklearn.metrics import make_scorer
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold, GridSearchCV, cross_val_score

from sklearn import linear_model
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor, XGBClassifier
from xgboost import plot_importance

In [6]:
name = 'helpdesk'
args = {
    'inputdir': '../input/{}/'.format(name),   
    'outputdir': './output_files/{0}/'.format(name)
}

args = argparse.Namespace(**args)

In [7]:
if not os.path.isdir(args.outputdir):
    os.makedirs(args.outputdir)

In [8]:
with open(args.inputdir + 'parameters.pkl', "rb") as f:
    maxlen = pickle.load(f)
    num_features = pickle.load(f)
    chartoindice = pickle.load(f)
    targetchartoindice = pickle.load(f)
    divisor = pickle.load(f)
    divisor2 = pickle.load(f)

In [9]:
with open(args.inputdir + 'preprocessed_data.pkl', "rb") as f:
    X = pickle.load(f)
    y_a = pickle.load(f)
    y_t = pickle.load(f)
    X_test = pickle.load(f)
    y_a_test = pickle.load(f)
    y_t_test = pickle.load(f)

# Regression for time

In [None]:
def modelTest(clf, train, labels):
    cv = KFold(n_splits=5,shuffle=True,random_state=45).split(train)
    mse = make_scorer(mean_absolute_error)
    mse_val_score = cross_val_score(clf, train, labels, cv=cv, scoring=mse)
    scores=[mse_val_score.mean()]
    return scores

In [None]:
def modelPlot(result_dict):
    result = pd.DataFrame.from_dict(result_dict, orient='index')
    result.columns = ["Mean Absolute Error"] 
    result = result.sort(columns=["Mean Absolute Error"],ascending=False)
    #print(result)
    result.plot(kind="bar",title="Model Scores")
    axes = plt.gca()
    axes.set_ylim([0.5,1])
    return result

In [None]:
def modelFit(train, labels):
    result_dict = {}
    '''
    clf = linear_model.LinearRegression()
    result_dict["Linear"] = modelTest(clf, train, labels)
    
    clf = linear_model.Lasso(alpha=1e-4)
    result_dict["Lasso"] = modelTest(clf, train, labels)
    
    clf = linear_model.Ridge()
    result_dict["Ridge"] = modelTest(clf, train, labels)
    
    clf = linear_model.BayesianRidge()
    result_dict["Bayesian Ridge"] = modelTest(clf, train, labels)
    
    clf = linear_model.HuberRegressor()
    result_dict["Huber"] = modelTest(clf, train, labels)
    
    clf = svm.SVR()
    result_dict["SVM RBF"] = modelTest(clf, train, labels)
    
    clf = svm.SVR(kernel="linear")
    result_dict["SVM Linear"] = modelTest(clf, train, labels)
    
    clf = BaggingRegressor()
    result_dict["Bagging"] = modelTest(clf, train, labels)
    
    clf = RandomForestRegressor()
    result_dict["RandomForest"] = modelTest(clf, train, labels)
    '''
    clf = AdaBoostRegressor()
    result_dict["AdaBoost"] = modelTest(clf, train, labels)
    
    clf = XGBRegressor()
    result_dict["XGBoost"] = modelTest(clf, train, labels)
    
    model_summary = modelPlot(result_dict)
    return model_summary

In [10]:
data = X.reshape((9181, 15*14))

In [11]:
X.shape, data.shape, y_t.shape

((9181, 15, 14), (9181, 210), (9181,))

In [None]:
modelFit(data, y_t)

In [12]:
data_test = X_test.reshape((X_test.shape[0], X_test.shape[1]*X_test.shape[2]))

# Predict

In [None]:
reg = XGBRegressor()
reg.fit(data, y_t)
y_pred = reg.predict(data_test)

In [None]:
y_t_test

In [None]:
y_pred

In [None]:
real = y_t_test*divisor
pred = y_pred*divisor

In [None]:
mean_absolute_error(pred, real)/86400

# Classification for activity

In [13]:
labels = np.argmax(y_a, axis=1)

In [None]:
clf = XGBClassifier()
clf.fit(data, labels)
pred_label = clf.predict(data_test)

In [None]:
acc = metrics.accuracy_score(true_label, pred_label)
log_loss = metrics.log_loss(true_label, pred_label)

In [None]:
acc

In [None]:
log_loss