In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv("../data/electricistan/train_prepped.csv")
df = df.set_index('datetime')
df.index = pd.to_datetime(df.index)
df.info()

# Preprocessing

In [None]:
# Splitting Dataset - should be improved e.g. through cross validation. 
train = df.loc[df.index < '01-01-2020']
test = df.loc[df.index >= '01-01-2020']

In [None]:
# Scaling Dataset

# Training

In [None]:
FEATURES = ['dayofyear', 'hour', 'dayofweek', 'quarter', 'month', 'year']
TARGET = 'power'

X_train = train[FEATURES]
y_train = train[TARGET]

X_test = test[FEATURES]
y_test = test[TARGET]

In [None]:
reg = xgb.XGBRegressor(base_score=0.5, booster='gbtree',    
                       n_estimators=1500,
                       early_stopping_rounds=50,
                       objective='reg:linear',
                       #max_depth=3,
                       learning_rate=0.01)
reg.fit(X_train, y_train,
        eval_set=[(X_train, y_train), (X_test, y_test)],
        verbose=100)

In [None]:
# predict on test
test['prediction'] = reg.predict(X_test)

In [None]:
score = np.sqrt(mean_squared_error(test['power'], test['prediction']))
print(f'RMSE Score on Test set: {score:0.2f}')

# Predict

In [None]:
# We import the test set
testSet = pd.read_csv("../data/electricistan/test.csv")
submission = pd.read_csv("../data/electricistan/sample_submission_csv.csv")

#index for TestSet
testSet = testSet.set_index('datetime')
testSet.index = pd.to_datetime(testSet.index)

In [None]:
# Feature creation
testSet['hour'] = testSet.index.hour
testSet['dayofweek'] = testSet.index.dayofweek
testSet['quarter'] = testSet.index.quarter
testSet['month'] = testSet.index.month
testSet['year'] = testSet.index.year
testSet['dayofyear'] = testSet.index.dayofyear
testSet['dayofmonth'] = testSet.index.day
testSet['weekofyear'] = testSet.index.isocalendar().week

In [None]:
testSet = testSet[FEATURES]

In [None]:
print(X_train.shape)
print(testSet.shape)

In [None]:
# Prediction with XGBoost
pred = reg.predict(testSet)

# Submission write

In [None]:
# Creation of the submission CSV
submission['power'] = pred

print(submission)

submission.to_csv('../data/electricistan/submission.csv',index=False)