# Substation Class

In [15]:
import numpy as np
import pandas as pd
import scipy

In [14]:
class Substation:
    
    # Initializer / Instance Attributes
    def __init__(self, region, dataframe):
        self.dataframe = dataframe
        self.load_count = 0
        self.region = region
        self.mu = None
        self.sigma = None
        self.start = None
        self.end = None
        
    def update_dates(self, start, end):
        self.start = start
        self.end = end
        
    def calculate_norm(self):
        agg_load_list = self.dataframe['AggregatedLoad'].tolist()
        self.mu, self.sigma = scipy.stats.norm.fit(agg_load_list)
        
    def create_date_cols(self):
        self.dataframe['Year'] = self.dataframe.index.year
        self.dataframe['Month'] = self.dataframe.index.month
        self.dataframe['Weekday'] = self.dataframe.index.weekday_name
        
        
    def description(self):
        return ('Substation based on data from {} to {}. '.format(self.start,self.end)\
               + 'The substation contains {} loads and a total {} hours of data'.format(self.load_count,
                                                                                        self.dataframe.shape[0]-1)\
                + 'The average hourly comsumption is {} (-/+ {}) kWh.'.format(self.mu,self.sigma))
    
    # Function cutting dataframe to whole years.
    # By default jan-dec but can be changed to whole
    # years from first date index. 
    def filter_whole_years(self, jan_start = False):
        if jan_start:
            start_date, end_date = str(self.start.year+1) +'-01-01', str(self.end.year-1) +'-12-31'
        else:
            max_years = math.floor((last_date - first_date) / datetime.timedelta(days=365))
            start_date = str(first_date).split(' ',1)[0]
            end_date = str(first_date + datetime.timedelta(days = (365 * max_years))).split(' ',1)[0]
        self.update_dates(start_date,end_date)
        self.dataframe = self.dataframe[start_date:end_date]
    
    # Function for generating and printing different kinds of
    # information about the dataframe and load profiles. 
    def print_insights(self, 
                       duration_curve = True,
                       numerical_info = True,
                       month_plot = True, 
                       weekday_plot = True):
        
        if 'AggregatedLoad' not in self.dataframe.columns:
            self.add_aggregated_col()
        if 'Month' not in self.dataframe.columns: 
            self.create_date_cols()
            
        if duration_curve:
            dataframe_lst = from_df_to_sorted_lst(dataframe)
            plot_load_duration_curve(dataframe_lst)
    
        if month_plot:
            %matplotlib inline
            ax = sns.boxplot(data=self.dataframe, x='Month', y='AggregatedLoad')
            ax.set_ylabel('kWh')
            ax.set_title('Hourly comsumption of the substation')
        
        if weekday_plot:
            %matplotlib inline
            order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
            ax1 = sns.boxplot(data=self.dataframe, x='Weekday', y='AggregatedLoad', order=order)
            ax1.set_ylabel('kWh')
            ax1.set_title('Hourly comsumption of the substation')
        

In [7]:
# Returns an updated dataframe object with
# a new profile added according to the input parameters. 

def read_LP(dataframe,load_type, num, path_dict):
    
    num_list = [i for i in range(0,num +1)]         
    path = path_dict[load_type]
    
    if path != None:
        LP_cols = pd.read_csv(path, index_col = 0, 
                              parse_dates = True, 
                              usecols = num_list) # Fix for out of range. Use stochastic copy?
        LP_cols.columns = [load_type + str(i) for i in np.arange(1,len(LP_cols.columns)+1)]
        
        if dataframe.empty:
            dataframe = LP_cols
        else:  #Inner join for date compatability. Needs fixing if changing index structure!!
            dataframe = dataframe.merge(LP_cols, how = 'inner', left_index=True, right_index=True)          
    
    return dataframe

In [8]:
# Subfunction for creating a substation. 
# Triggering just input based on a predefined 
# list of load types.

def input_allocation_dict(load_type_list):
    allocation_dict = dict()
    for load in load_type_list:
        allocation_dict[load] = int(input('Number of '+ load + ': '))
    return allocation_dict

In [10]:
# A function to copy a load profile with stochastic deviation
# from the original. The values varies according to a gaussian 
# distribution, with default and mu = 0, sigma = 0.1. The
# funtion returns a copy of the dataframe with the new column. 

def copy_load_stochastic(dataframe, column_name, sigma=0.1):
    
    min_prob, max_prob = -sigma, sigma
    prob_array = (max_prob - min_prob) * np.random.random_sample(size=dataframe.shape[0]) + min_prob
    new_col_name = column_name + '_stoch_copy'
    dataframe[new_col_name] = dataframe[column_name] + dataframe[column_name].mul(prob_array)
    
    return dataframe

In [None]:
    # Class Attributes
    load_csv_path = {'house_new':'../data/new_houses.csv', 
                     'house_old':'../data/old_houses.csv', 
                     'house_mix_DH': '../data/mixed_ages_houses_district_heating.csv', 
                     'appart_DH': '../data/new_appartments_district_heating.csv'
                    } 