In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from lib.data_loader import IITG_DataLoader
from lib.functions import Functions
from lib.imputer import DataImputer
import os
%matplotlib inline

In [None]:
functions = Functions()
imputer = DataImputer()

In [None]:
# Load power import data
folder_path = 'power_import_data/'
apdcl_import = IITG_DataLoader()
raw_import_data = apdcl_import.load_all(folder_path, interpolate=False, power_header='MW', \
                                        start_time='2022-02-02', end_time='2022-12-31')
raw_import_data.loc[raw_import_data['Power'] > 10, 'Power'] = None

In [None]:
# Load pv generation (phase 2) data
folder_path = 'pv_gen_data/'
pv_gen = IITG_DataLoader()
solar_data = pv_gen.load_all(folder_path, interpolate=False, time_header='x1', power_header='ActivePower',\
                 start_time='2022-02-02', end_time='2022-12-31')
# Divide solar power values by 1000 to convert it from kWh to MWh
solar_data = solar_data[solar_data['Power'] > 0]
solar_data.loc[:,'Power'] = solar_data['Power']/1000

In [None]:
# Load weather data
folder_path = 'weather_data/'
weather = IITG_DataLoader()
weather_data = weather.load_all(folder_path, interpolate=False, power_header='Temperature')
weather_data.rename(columns={'Power':'Temperature'}, inplace=True)

In [None]:
# Compute total demand data
# Total Demand = Power Import + 1.9 * PV Generation (Phase 2)
demand_data = pd.merge(raw_import_data, solar_data, suffixes=('_import', '_solar'), \
                     left_index=True, right_index=True, how='left')
demand_data['Power_solar'].fillna(0, inplace=True)
demand_data['Power'] = demand_data['Power_import'] + 1.9*demand_data['Power_solar']
demand_data = demand_data.drop(columns=['Power_import','Power_solar'])
# Merge temperature data
demand_data = pd.merge(demand_data, weather_data, \
                     left_index=True, right_index=True, how='left')
demand_data.head()

In [None]:
# Detrend the data 
trend_power, detrended_demand_data = functions.detrend_data(demand_data, order=15, column='Power')
trend_temperature, detrended_demand_data = functions.detrend_data(detrended_demand_data, order=15, column='Temperature')
detrended_demand_data.head(5)

In [None]:
# Add Temperature^2 as a feature
detrended_demand_data['abs_Temperature'] = abs(detrended_demand_data['Temperature'])
detrended_demand_data['Temperature_2'] = detrended_demand_data['Temperature']*detrended_demand_data['abs_Temperature']
detrended_demand_data.drop(columns=['abs_Temperature'], inplace=True)
detrended_demand_data.head(5)

In [None]:
import copy
# Function for imputing the data
def impute(data, daily, periodic):
    results = []

    # Generate all combinations for columns with the daily and periodic values
    time_sets = functions.generate_combinations(daily, periodic)
    time_sets = copy.deepcopy(time_sets)+copy.deepcopy(time_sets)+copy.deepcopy(time_sets)
    for i in range(len(time_sets)//2):
        time_sets[i].append('Temperature')
        time_sets[i].append('Temperature_2')
    # print(time_sets)

    # Loop through each combination and compute its predictions and score 
    for time_set in time_sets:
        if len(time_set)==0:
            continue
        input_columns = functions.time_set_to_columns(time_set)
        # print(input_columns)

        trimmed_data, train_data = imputer.trim_data(data, input_columns)
        X_train, X_test, Y_train, Y_test, X_pred = imputer.train_test_pred_split(trimmed_data, train_data, input_columns)
        if X_train.shape[0]<1000 or X_pred.shape[0]<1:
            continue
        Y_pred, score = imputer.predict(X_train, X_test, Y_train, Y_test, X_pred)

        time_instants = trimmed_data.index
        if(score>0.85):
            results.append((score, time_instants, Y_pred, input_columns))
        
    # Sort the results wrt scores
    # sorted(results, key=lambda x: x[0], reverse=True)
    results.sort()
    results.reverse()
    filled = 0
    # print(results)
    # Fill the values. Values with higher score's model are filled first.
    for result in results:
        # print(result[0], result[3])
        data, count = imputer.fill_values(data, result[1], result[2])
        filled+=count 
    return filled

In [None]:
# Define daily and periodic time instants
daily = [1,-1,2,-2]
periodic = [24,-24,48,-48,-72,72]
detrended_demand_data['PredictedPower'] = [np.nan]*detrended_demand_data.shape[0]
while detrended_demand_data['Power'].isna().sum()>0:
    functions.do_power_shifts(detrended_demand_data, daily+periodic)
    impute(detrended_demand_data, daily, periodic)
    detrended_demand_data['Power'] = detrended_demand_data['PredictedPower']

In [None]:
# Get the final values by adding trend to the predicted data
predicted_values = detrended_demand_data['PredictedPower'].values+trend_power
predicted_data = pd.DataFrame({'Power':demand_data['Power'].values, 'PredictedPower':predicted_values})
predicted_data.index = detrended_demand_data.index

In [None]:
# Visualize the data
start_time_ana = pd.to_datetime('2022-07-12')
end_time_ana = pd.to_datetime('2022-07-18')

filtered_data = predicted_data[(predicted_data.index>start_time_ana) & (predicted_data.index<end_time_ana)]

fig, ax1 = plt.subplots(figsize=(10, 6))
plt.plot(filtered_data.index, filtered_data['PredictedPower'])
plt.plot(filtered_data.index, filtered_data['Power'], color='red')
fig.autofmt_xdate()
plt.legend(['Predicted Demand', 'Actual Demand'])
plt.plot()

In [None]:
# Score of the predicted values wrt actual
df = predicted_data.dropna(subset=['Power', 'PredictedPower'])
r2 = r2_score(df['Power'], df['PredictedPower'])
print("R-squared Score:", r2)

In [None]:
# Export the estimated data back to excel files
final_data = predicted_data.drop(columns=['Power'])
final_data.rename(columns={'PredictedPower':'Power'}, inplace = True)
functions.export_data(data = final_data, folder_path='power_demand_imputed_data', file_prefix='demand')