# Loading libraries

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import acf, adfuller, pacf

  from pandas.core import datetools


# Predicting

## Demand

In [2]:
demand_train_actual = pd.read_csv('./Data/Demand_Train.csv', header=None)
demand_train_oracle = pd.read_csv('./Data/Demand_Train_pred.csv', header=None)
demand_plb_oracle = pd.read_csv('./Data/Demand_LB_pred.csv', header=None)

While prediction demand, we observed that we couldn't do better than the oracle. Therefore, our prediction for energy demand will be the oracle's prediction.

In [3]:
demand_plb_pred = demand_plb_oracle

## Solar output

In [4]:
solar_train_actual = pd.read_csv('./Data/Solar_Train.csv', header=None)
solar_train_oracle = pd.read_csv('./Data/Solar_Train_pred.csv', header=None)

In [5]:
series = solar_train_actual.values.flatten()
series = pd.Series(series).diff(24*365).dropna()

In [6]:
model = ARIMA(series.values, order=(2, 0, 5))
model_fit = model.fit(trend='c')
print(model_fit.summary())



                              ARMA Model Results                              
Dep. Variable:                      y   No. Observations:                12840
Model:                     ARMA(2, 5)   Log Likelihood               -6061.807
Method:                       css-mle   S.D. of innovations              0.388
Date:                Thu, 11 Jan 2018   AIC                          12141.613
Time:                        19:13:17   BIC                          12208.756
Sample:                             0   HQIC                         12164.061
                                                                              
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.3524      0.005    -73.573      0.000      -0.362      -0.343
ar.L1.y        1.9318   2.99e-05   6.45e+04      0.000       1.932       1.932
ar.L2.y       -1.0000   2.35e-05  -4.26e+04      0.0

In [7]:
solar_val_pred = model_fit.predict(start=len(series.values), end=len(series.values) + 24*50 - 1) + solar_train_actual.values.flatten()[-365*24 : -365*24 + 50*24]
solar_val_pred = solar_val_pred.reshape((50, 24))

# Market price

In [8]:
price_train_actual = pd.read_csv('./Data/Price_Train.csv', header=None)
price_train_oracle = pd.read_csv('./Data/Price_Train_pred.csv', header=None)

* In the training set's actual market price data, we first find the days with minimum market price for **HOUR 1** and denote them by **DAY 1**.
* Now for **DAY 1** to **DAY 31** we find the average market price for each hour.



* In test set's oracle's predicted market price data, we find the corresponding **DAY 1** by finding the day with minimum market price for **HOUR 1**.
* The predicted market price for the following days will be the average market price.

### Determining DAY 1 in actual data

In [9]:
day1_train_actual = list(price_train_actual[0][price_train_actual[0] < 0.8].index)

In [10]:
print(day1_train_actual)

[15, 46, 74, 105, 135, 166, 196, 227, 258, 288, 318, 348, 379, 410, 438, 469, 499, 530, 560, 591, 622, 652, 682, 712, 743, 774, 802, 833, 863, 894]


In [11]:
pd.Series((day1_train_actual)).diff().dropna().values

array([ 31.,  28.,  31.,  30.,  31.,  30.,  31.,  31.,  30.,  30.,  30.,
        31.,  31.,  28.,  31.,  30.,  31.,  30.,  31.,  31.,  30.,  30.,
        30.,  31.,  31.,  28.,  31.,  30.,  31.])

### Calculating average prices for DAY 1-31

In [12]:
average_price = []
for hour in range(24):
    number_occurence = np.zeros(31)
    sum_day = np.zeros(31)
    for i, j in zip(np.array(day1_train_actual)[:-1], (np.array(day1_train_actual))[1:]):
        sum_day[:j - i] += price_train_actual[hour][i:j].values
        number_occurence[:j - i] += np.ones(j - i)
    average_price.append(sum_day / number_occurence)

In [13]:
average_price = pd.DataFrame(np.array(average_price))
average_price.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,21,22,23,24,25,26,27,28,29,30
0,0.747586,1.712759,1.311379,1.313793,1.362414,1.288621,1.205862,1.797241,1.354483,1.743103,...,2.178276,2.108966,2.168276,2.06931,2.021724,1.99,1.912414,1.819615,1.781154,1.705333
1,0.529655,1.638621,1.314828,1.309655,1.199655,1.171034,1.182759,1.736897,1.356552,1.692069,...,2.191034,2.111379,2.240345,2.063793,1.96,1.982069,1.958966,1.845769,1.728462,1.711333
2,0.502759,1.208621,1.322759,1.316207,1.201724,1.088966,1.095172,1.684138,1.355172,1.571724,...,2.126552,2.033448,2.214138,2.008276,1.905517,1.981724,1.92931,1.801154,1.661923,1.661333
3,0.527586,1.010345,1.308966,1.312759,1.366897,1.153448,0.982414,1.717931,1.353103,1.358966,...,2.121379,2.031034,2.209655,1.964828,1.847586,1.975862,1.898276,1.733846,1.621923,1.599333
4,0.646897,1.667241,1.432414,1.315172,1.508621,1.472414,1.472069,1.774828,1.384483,1.757241,...,2.000345,1.868966,2.21,2.072414,1.906552,2.225862,2.223448,1.751538,1.680769,1.662667


### Determining DAY 1 in oracle's prediction

In [14]:
price_plb_oracle = pd.read_csv('./Data/Price_LB_pred.csv', header=None)

In [15]:
day1_plb_oracle = list(price_plb_oracle[0][price_plb_oracle[0] < 0.9].index)
print(day1_plb_oracle)

[16, 46]


### Manually formulating prediction

In [16]:
price_plb_pred = pd.concat([average_price[list(range(31))[-1 * day1_plb_oracle[0]:]], average_price[list(range(31))[:day1_plb_oracle[1]-day1_plb_oracle[0]]], average_price[list(range(31))[:50-day1_plb_oracle[1]]]], axis=1)
price_plb_pred.columns = np.arange(50)
price_plb_pred = price_plb_pred.T

# Generating Bid quantity and price

In [17]:
price_train_actual = pd.read_csv('./Data/Price_Train.csv', header=None)
price_train_oracle = pd.read_csv('./Data/Price_Train_pred.csv', header=None)

We find the error in the oracle's prediction for market prices when the bid was lost in the 50 correspoding days of last two years. We add its some statistic to our predicted value, so that we don't lose any bid.

In [18]:
error_1_back = price_train_actual[-365:-365 + 50] - price_train_oracle[-365:-365 + 50]
error_2_back = price_train_actual[-365*2:-365*2 + 50] - price_train_oracle[-365*2:-365*2 + 50]
error_statistic = (error_1_back[error_1_back > 0].describe().loc['75%'] + error_2_back[error_2_back > 0].describe().loc[['75%']]) / 2
error_statistic.values

array([[ 0.19875,  0.21   ,  0.16625,  0.18625,  0.19125,  0.2525 ,
         0.37625,  0.48625,  0.5025 ,  0.5525 ,  0.38125,  0.3975 ,
         0.37875,  0.33   ,  0.355  ,  0.315  ,  0.34625,  0.37   ,
         0.35   ,  0.355  ,  0.34   ,  0.31875,  0.2425 ,  0.1325 ]])

In [19]:
bid_quantity = (demand_plb_pred - solar_val_pred).values.flatten()
bid_price = pd.DataFrame(price_plb_pred.values + error_statistic.values).values.flatten()
submission = pd.DataFrame([bid_price, bid_quantity]).head()
submission.T.to_csv('23.csv', header=None, index=None)

In [20]:
def submit(name):
    """Submits name file to the online grader."""
    url = 'http://interiit.tech:3000/upload'
    files = {'file': (name, open(name, 'rb'), '.text/csv', {'Content-Disposition': 'form-data'})}
    r = requests.post(url, files=files)

In [21]:
def score(team_id):
    """Returns score of 'team_id'"""
    return pd.read_csv('http://interiit.tech:3000/results.csv').loc[team_id]['Score']

In [22]:
submit('23.csv')
score(23)

322250