In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from data_loader import IITG_DataLoader
from functions import Functions
from imputer import DataImputer
%matplotlib inline

In [7]:
functions = Functions()
imputer = DataImputer()

In [8]:
# Load the data
folder_path = 'power_import_data/'
apdcl_import = IITG_DataLoader()
raw_import_data = apdcl_import.load_all(folder_path, interpolate=False, power_header='MW', \
                                        start_time='2022-02-02', end_time='2022-12-31')
raw_import_data.loc[raw_import_data['Power'] > 10, 'Power'] = None
apdcl_import.describe()

Unnamed: 0,Params,Values
0,Columns,Power
1,Shape,"(6763, 1)"
2,Start Time,2022-02-02 01:00:00
3,End Time,2022-12-30 23:00:00
4,Mean Power,3.704663
5,NaN Powers,2272


In [9]:
# Load the data
folder_path = 'pv_gen_data/'
pv_gen = IITG_DataLoader()
solar_data = pv_gen.load_all(folder_path, interpolate=False, time_header='x1', power_header='ActivePower',\
                 start_time='2022-02-02', end_time='2022-12-31')
pv_gen.describe()

Unnamed: 0,Params,Values
0,Columns,Power
1,Shape,"(26492, 1)"
2,Start Time,2022-02-02 05:40:00
3,End Time,2022-12-30 17:10:00
4,Mean Power,230.395487
5,NaN Powers,0


In [10]:
solar_data = solar_data[solar_data['Power'] > 0]
solar_data.loc[:,'Power'] = solar_data['Power']/1000
solar_data.shape

(25530, 1)

In [11]:
demand_data = pd.merge(raw_import_data, solar_data, suffixes=('_import', '_solar'), \
                     left_index=True, right_index=True, how='left')
demand_data['Power_solar'].fillna(0, inplace=True)
demand_data['Power'] = demand_data['Power_import'] + 1.9*demand_data['Power_solar']
demand_data = demand_data.drop(columns=['Power_import','Power_solar'])
demand_data.head()

Unnamed: 0,Power
2022-02-02 01:00:00,
2022-02-02 02:00:00,
2022-02-02 03:00:00,
2022-02-02 04:00:00,
2022-02-02 05:00:00,2.3


In [12]:
functions = Functions()
trend, detrended_demand_data = functions.detrend_data(demand_data, order=15, column='Power')
detrended_demand_data.head()

Unnamed: 0,Power
2022-02-02 01:00:00,
2022-02-02 02:00:00,
2022-02-02 03:00:00,
2022-02-02 04:00:00,
2022-02-02 05:00:00,-0.010679


In [13]:
values = [-1,1,-2,2,-24,24,-48,48]
functions.do_power_shifts(detrended_demand_data, values=values)
detrended_demand_data['PredictedPower'] = [np.nan]*detrended_demand_data.shape[0]

In [14]:
def get_input_columns(time_set):
    input_columns = []
    for time in time_set:
        input_column = functions.time_to_word(time)
        input_columns.append(input_column)

In [15]:
def impute(data):
    daily = [-1,1,-2,2]
    seasonal = [-24,24,-48,48]
    time_sets = functions.generate_combinations(daily, seasonal)
    
    results = []
    for time_set in time_sets:
        if len(time_set)==0:
            continue
        input_columns = get_input_columns(time_set)

        trimmed_data, train_data = imputer.trim_data(data, input_columns)
        X_train, X_test, Y_train, Y_test, X_pred = imputer.train_test_pred_split(trimmed_data, train_data, input_columns)
        if X_train.shape[0]<1000 or X_pred.shape[0]<1:
            continue
        Y_pred, score = imputer.predict(X_train, X_test, Y_train, Y_test, X_pred)

        time_instants = trimmed_data.index
        if(score>0.85):
            results.append((score, time_instants, Y_pred))
        
    sorted(results, reverse=True)
    filled = 0
    for result in results:
        data, count = imputer.fill_values(data, result[1], result[2])
        filled+=count 
    return filled

In [16]:
while detrended_demand_data['Power'].isna().sum()>500:
    impute(detrended_demand_data)
    detrended_demand_data['Power'] = detrended_demand_data['PredictedPower']
    functions.do_power_shifts(detrended_demand_data, values)

TypeError: unsupported operand type(s) for +: 'NoneType' and 'list'

In [None]:
predicted_values = detrended_demand_data['PredictedPower'].values+trend
predicted_data = pd.DataFrame({'Power':predicted_values})
predicted_data.index = detrended_demand_data.index

In [None]:
start_time_ana = pd.to_datetime('2022-03-23')
end_time_ana = pd.to_datetime('2022-03-28')

filtered_pred = predicted_data[(predicted_data.index>start_time_ana) & (predicted_data.index<end_time_ana)]
filtered_actual = demand_data[(demand_data.index>start_time_ana) & (demand_data.index<end_time_ana)]

fig, ax1 = plt.subplots(figsize=(10, 6))
plt.plot(filtered_pred.index, filtered_pred['Power'])
plt.plot(filtered_actual.index, filtered_actual['Power'], color='red')
fig.autofmt_xdate()
plt.legend(['Predicted Demand', 'Actual Demand'])
plt.plot()