In [None]:
import time
import random
import pandas as pd


## Pandas Performance

Vi kommer ta led på följande guide: https://realpython.com/fast-flexible-pandas/

Läs igenom introduktionen där, minst! Du kan sedan grotta ner dig mer i detaljerna också.

In [None]:
df = pd.read_csv('../data/demand_profile.csv')

**Omvandla till datetime rakt på**

In [None]:
def convert(df, column_name):
    
    return pd.to_datetime(df[column_name])


In [None]:
%%timeit -r 5 -n 1

df['date_time'] = convert(df, 'date_time')

**Omvandla med på förhand angivet format**

In [None]:
def convert_with_format(df, column_name):
    return pd.to_datetime(df[column_name], format='%d/%m/%y %H:%M')

In [None]:
%%timeit -r 10 -n 1

df['date_time'] = convert_with_format(df, 'date_time')

**Att loopa över Pandas, generellt ej rekommenderat**

In [None]:
def apply_tariff(kwh, hour):
    """Calculates cost of electricity for given hour."""    
    
    if 0 <= hour < 7:
        rate = 12
    
    elif 7 <= hour < 17:
        rate = 20
    
    elif 17 <= hour < 24:
        rate = 28

    return rate * kwh


**for i in range(len(df))** 

**WARNING, DONT DO THIS IN PARTICULAR**

In [None]:
%%timeit -r 10 -n 1

# warning, DONT DO THIS ESPECIALLY

energy_cost_list = []

for i in range(len(df)):

    # Get electricity used and hour of day
    energy_used = df.iloc[i]['energy_kwh']
    hour = df.iloc[i]['date_time'].hour

    # Append cost list
    energy_cost = apply_tariff(energy_used, hour)
    energy_cost_list.append(energy_cost)

**for index, row in df.iterrows()**

In [None]:
%%timeit -r 10 -n 1

energy_cost_list = []
for index, row in df.iterrows():

   # Get electricity used and hour of day
   energy_used = row['energy_kwh']
   hour = row['date_time'].hour
   
   # Append cost list
   energy_cost = apply_tariff(energy_used, hour)
   energy_cost_list.append(energy_cost)

**med apply**

In [None]:
%%timeit -r 10 -n 1

df.apply(lambda row: apply_tariff(
                                  kwh=row['energy_kwh'],
                                  hour=row['date_time'].hour),
                                  axis=1
                                 )

**Att först välja ut specifika rader, och sedan utföra en vektoriserad operation på dem**

**Detta är bland de snabbaste metoderna tillgängliga direkt i Pandas**

In [None]:
%%timeit -r 10 -n 1

peak_hours_filter = df['date_time'].dt.hour.isin(range(17, 24))
shoulder_hours_filter = df['date_time'].dt.hour.isin(range(7, 17))
off_peak_hours_filter = df['date_time'].dt.hour.isin(range(0, 7))

#observera att vi skapar en ny kolumn här också, kallad cost_center

df.loc[peak_hours_filter, 'cost_center'] = df.loc[peak_hours_filter, 'energy_kwh'] * 28
df.loc[shoulder_hours_filter, 'cost_center'] = df.loc[shoulder_hours_filter, 'energy_kwh'] * 20
df.loc[off_peak_hours_filter, 'cost_center'] = df.loc[off_peak_hours_filter, 'energy_kwh'] * 12

