In [None]:
!pip install Orange3
!pip install Orange3-spark
!pip install Orange3-Timeseries

In [None]:
import xgboost as xgb
from pandas import DataFrame,to_numeric
from pandas import concat
from pandas import set_option
from numpy import log
from numpy import sqrt
from scipy.stats import boxcox
from matplotlib import pyplot

In [None]:
def series2dataframe(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list(df)
	cols.append(df.shift(0))
	cols.append(df.shift(10))
	cols.append(df.shift(100))
	names += [x+'(t-10)' for x in names]+[x+'(t-100)' for x in names]
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg


In [None]:
data = DataFrame.from_csv('Train_FeatureExtracted_FullSamples.csv' )
set_option('display.max_rows', 2000)

In [None]:
data.head(5)

In [None]:
# Box-Cox transformation
data['V11'], V_lam = boxcox(data['V11'])
data['P7'], P_lam = boxcox(data['P7'])
print('V Lambda: %f' % V_lam)
print('P Lambda: %f' % P_lam)
pyplot.figure(1)
# line plot
pyplot.subplot(211)
pyplot.hist(data['V11'])
# histogram
pyplot.subplot(212)
pyplot.hist(data['P7'])
pyplot.show()

In [None]:
series = series2dataframe(data)

In [None]:
series = series.drop(['TrialID','TrialID(t-10)','TrialID(t-100)'], axis=1)
series = series.reset_index(drop=True)

In [None]:
merged = list()
merged.append(series)
names = list(series)

In [None]:
#rolling window statistics
temps = DataFrame(series[['V11','P7']].values)
shifted = temps.shift(50)
window = shifted.rolling(window=51)
means = window.mean()
dataframe = concat([means, temps], axis=1)
dataframe.columns = ['V11_mean(t-10,..,t-2,t)', 'V11_t','P7_mean(t-10,..,t-2,t-1)','P7_t']
dataframe.dropna(inplace=True)
dataframe = dataframe.reset_index(drop=True)
print(dataframe.head(10))

In [None]:
merged.append(dataframe)
names.append(list(dataframe))

In [None]:
formed = concat(merged,axis =1)
formed.head(5)

In [None]:
#expanding window statistics
window = temps.expanding()
dataframe = concat([window.min(), window.mean(), window.max(), temps.shift(-1)], axis=1)
dataframe.columns = ['Vmin', 'Vmean', 'Vmax', 'V(t)','Pmin', 'Pmean', 'Pmax', 'P(t)']
dataframe = dataframe.reset_index(drop=True)
print(dataframe.head(10))

In [None]:
merged.append(dataframe)
names.append(list(dataframe))
formed = concat(merged,axis =1)

In [None]:
formed.head(5)

In [None]:
#absolute differences 
diff_10 = data[['V11','P7']].diff(periods = 10)
#diff_10=diff_10.rename(index=str,columns={"TrialID": "TrialID_diff(t-10)", "E7": "E7_diff(t-10)","E8":"E8_diff(t-10)" ,"E9": "E9_diff(t-10)","V11":"V11_diff(t-10)","V10":"V10_diff(t-10)","P7":"P7_diff(t-10)","IsAlert":"IsAlert_diff(t-10)"})
diff_100 = data[['V11','P7']].diff(periods = 100)
#diff_100=diff_100.rename(index=str,columns={"TrialID": "TrialID_diff(t-100)", "E7": "E7_diff(t-100)","E8":"E8_diff(t-100)" ,"E9": "E9_diff(t-100)","V11":"V11_diff(t-100)","V10":"V10_diff(t-100)","P7":"P7_diff(t-100)","IsAlert":"IsAlert_diff(t-100)"})
result = concat([diff_10, diff_100], axis=1).abs()
result.dropna(inplace=True)
result.drop('ObsNum',inplace= True)
result.columns = ['V11_diff(t-10)','V11_diff(t-100)','P7_diff(t-10)','P7_diff(t-100)']
result.reset_index(drop=True,inplace= True)
print(result.head(10))


In [None]:
merged.append(result)
names.append(list(result))
formed = concat(merged,axis =1)

In [None]:
formed.drop(['P7_t','P(t)','V(t)','V11_t'],axis=1,inplace=True)

In [None]:
formed.info()

In [None]:
formed.to_csv('Ford_XGB_Train.csv')

In [None]:
import pickle
with open("Model.pkcls", "rb") as f:
    model = pickle.load(f)

In [None]:
test_numeric = formed.apply(to_numeric)

In [None]:
train_numeric = formed.apply(to_numeric)

In [None]:
train_numeric.sort_index(axis=1, inplace=True)

In [None]:
test_numeric.sort_index(axis=1,inplace=True)

In [None]:
x_train = train_numeric.drop('IsAlert',axis=1) 

In [None]:
x_test = test_numeric.drop('IsAlert', axis=1)

In [None]:
y_train = train_numeric['IsAlert']

In [None]:
y_test = test_numeric['IsAlert']

In [None]:
dtrain = xgb.DMatrix(train_numeric,label = y_train)

In [None]:
dtest = xgb.DMatrix(test_numeric,label = y_test)

In [None]:
param = {'max_depth': 2, 'eta': 1, 'silent': 1, 'objective': 'binary:logistic'}
param['nthread'] = 4
param['eval_metric'] = 'error'
plst = param.items()
evallist = [(dtest, 'eval'), (dtrain, 'train')]

In [None]:
num_round = 10
bst = xgb.train(plst, dtrain, num_round, evallist)

In [None]:
ypred = bst.predict(dtest)

In [None]:
ypred

In [None]:
xgb.plot_importance(bst)