In [167]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import r2_score

In [168]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [169]:
z = {'A': 0, 'B': 1, 'C': 2}
train_data.var2 = [z[item] for item in train_data.var2]
test_data.var2 = [z[item] for item in test_data.var2]

In [170]:
train_data['var2'].value_counts()

0    25239
2     1040
1      217
Name: var2, dtype: int64

In [171]:
test_data['var2'].value_counts()

0    8177
2     324
1      67
Name: var2, dtype: int64

In [172]:
train_data['datetime'] = pd.to_datetime(train_data['datetime'])
train_data.set_index('datetime', inplace=True)

test_data['datetime'] = pd.to_datetime(test_data['datetime'])
test_data.set_index('datetime', inplace=True)


In [173]:
X = train_data.drop(['ID','electricity_consumption'], axis = 1)
y = train_data['electricity_consumption']
test_data = test_data.drop('ID', axis = 1)

In [174]:
tscv = TimeSeriesSplit(n_splits = 100)
scaler = StandardScaler()
linear = LinearRegression()

In [175]:
poly_converter = PolynomialFeatures(degree = 2, include_bias=False)
poly_features = poly_converter.fit_transform(X)
poly_test = poly_converter.transform(test_data)

In [176]:
for train_index, test_index in tscv.split(poly_features):
    X_train, X_test = poly_features[train_index, :], poly_features[test_index,:]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    X_scaled_train = scaler.fit_transform(X_train)
    X_scaled_test = scaler.transform(X_test)
    linear.fit(X_scaled_train, y_train)

In [177]:
y_pred = linear.predict(X_scaled_test)
R2 = r2_score(y_test, y_pred)
print(f'R2 Score: {R2}')

R2 Score: 0.5938099442559046


In [178]:
test_pred = linear.predict(scaler.transform(poly_test))

In [179]:
test_data['consumption_predictions'] = test_pred

In [180]:
test_data

Unnamed: 0_level_0,temperature,var1,pressure,windspeed,var2,consumption_predictions
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-07-24 00:00:00,-10.0,-16.4,1011.0,263.280,0,188.261363
2013-07-24 01:00:00,-10.0,-20.7,1011.0,267.175,0,160.363935
2013-07-24 02:00:00,-10.7,-17.1,1003.0,269.555,0,181.249577
2013-07-24 03:00:00,-13.6,-20.7,1008.0,273.060,0,160.030587
2013-07-24 04:00:00,-10.7,-17.1,1006.0,1.765,0,326.566008
...,...,...,...,...,...,...
2017-06-30 19:00:00,-5.7,-18.6,998.0,233.595,0,176.001496
2017-06-30 20:00:00,-5.7,-17.1,995.0,238.780,0,181.481247
2017-06-30 21:00:00,-7.1,-19.3,1004.0,244.325,0,171.387984
2017-06-30 22:00:00,-6.4,-19.3,1008.0,247.470,0,173.056602
