In [13]:
import requests
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime

from power.ml_ops.data import get_pv_data, clean_pv_data, get_data_with_cache, get_stats_table
from power.ml_ops.model import model_yesterday
from power.ml_ops.registry import load_model
from power.interface.main import pred


from pathlib import Path
from power.params import *

In [14]:
def postprocess(
  today: str,
  preprocessed_df: pd.DataFrame,
  stats_df: pd.DataFrame,
  pred_df: pd.DataFrame,
) -> pd.DataFrame:
  """
  Create a df that contains all information necessary for the plot in streamlit.
  Input:
    -
  Output:
    -
  """
  # define time period (3 days) for plotting
  today_timestamp = pd.Timestamp(today, tz='UTC')
  window_df= pd.date_range(
            start=today_timestamp - pd.Timedelta(days=2),
            end=  today_timestamp + pd.Timedelta(days=1),
            freq=pd.Timedelta(hours=1)).to_frame(index=False, name='utc_time')

  # create df with the preprocessed data in the time window

  plot_df = pd.merge(window_df, preprocessed_df, on='utc_time', how='inner')

  # add statistics in the time window
  plot_df['hour_of_year'] = plot_df.utc_time.\
                           apply(lambda x: x.strftime("%m%d%H"))
  stats_df.columns = stats_df.columns.droplevel(level=0)
  plot_df = pd.merge(plot_df, stats_df, on='hour_of_year', how='inner')

  # add prediction for day-ahead in time window
  input_pred = f"{today} 12:00:00" # '2013-05-08 12:00:00'
  pred_df = pred(input_pred)
  plot_df = pd.merge(plot_df, pred_df, on='utc_time', how='left')

  return plot_df

In [11]:
data_processed_cache_path = Path(LOCAL_DATA_PATH).joinpath("processed", f"processed_pv.csv")
query = f"""
    SELECT *
    FROM {GCP_PROJECT}.{BQ_DATASET}.processed_pv
    ORDER BY utc_time
"""

data_processed = get_data_with_cache(
    gcp_project=GCP_PROJECT,
    query=query,
    cache_path=data_processed_cache_path,
    data_has_header=True
)

def visualisation(input_date: str, power_source='pv'):
  today = input_date
  preprocessed_df = data_processed
  stats_df = get_stats_table(data_processed, capacity=False)
  # dummy (use predict function instead)
  pred_df = data_processed[['utc_time','electricity']]
  pred_df = pred_df.rename(columns={'electricity':'pred'})
  #
  plot_df = postprocess(today, preprocessed_df, stats_df, pred_df)
  # as dict for data transfer from backend to frontend
  plot_dict = plot_df.to_dict()

  return plot_dict

# API requests

In [None]:
input_prediction_date = "2019-12-10 00:00:00"
# make api call
base_url = "http://127.0.0.1:8000"

# model
params_model ={
    'input_date':input_prediction_date,
    'n_days': 2,
    'power_source': 'pv'
    }

endpoint_model = "/baseline_yesterday"
url_model= f"{base_url}{endpoint_model}"
response_model = requests.get(url_model, params_model).json()

# baseline
params_baseline ={
    'input_date':input_prediction_date,
    'n_days': 2,
    'power_source': 'pv'
    }

endpoint_baseline = "/baseline_yesterday"
url_baseline= f"{base_url}{endpoint_baseline}"
response_baseline = requests.get(url_baseline, params_baseline).json()

# data
params_data ={
    'input_date':input_prediction_date,
    'n_days': 10,
    'power_source': 'pv'
    }

endpoint_data = "/extract_data"
url_data = f"{base_url}{endpoint_data}"
response_data = requests.get(url_data, params_data).json()

# Visualisation
params_visu ={
    'input_date':input_prediction_date,   # today = '2000-05-15' # would come from streamlit user
    'power_source': 'pv'
    }
endpoint_visu = "/visualisation"
url_visu = f"{base_url}{endpoint_visu}"
response_visu = requests.get(url_visu, params_visu).json()


# Make plots

In [None]:
# set-up 4 DatFrames according to input date and type of model
X = pd.DataFrame(response_data.get(input_prediction_date)['days_before'])
y = pd.DataFrame(response_data.get(input_prediction_date)['day_after'])
y_baseline = pd.DataFrame(response_baseline.get(input_prediction_date))
y_predicted = pd.DataFrame(response_model.get('dataframe to predict'))

In [None]:
# convert date columns to datetime object
X.date = pd.to_datetime(X.date,utc=True)
y.date = pd.to_datetime(y.date, utc=True)
y_baseline.date = pd.to_datetime(y_baseline.date, utc=True) + datetime.timedelta(days=1)

In [None]:
fig, ax = plt.subplots()
ax.plot(X.date, X.power_source, label='current production data')
ax.plot(y.date, y.power_source, label='current production data')
ax.plot(y_baseline.date, y_baseline.power_source, label='current production data')
plt.legend()


In [None]:
fig, ax = plt.subplots(nrows = 3, sharex=True, sharey=True)
ax[0].plot(X.date, X.power_source, label='current production data')
ax[1].plot(y.date, y.power_source, label='current production data')
ax[2].plot(y_baseline.date, y_baseline.power_source, label='current production data')
plt.legend()


In [None]:
# Metrics
mean_training = X.power_source.mean()
mean_predicted = y_baseline.power_source.mean()
mean_diff = mean_predicted - mean_training
mean_training, mean_predicted,mean_diff

# Tests API endpoints

In [None]:
def predict_baseline_yesterday(input_date: str):
    data = data_pv_clean[data_pv_clean['utc_time'] < input_date][-24:]
    values = data.electricity.to_list()
    return {input_date: values}

predict_baseline_yesterday(input_prediction_date)

In [None]:
def extract_pv_data(input_date: str, n_days=10):
    """
    Returns the power data for the n_days before the input date
    Also returns the power data for the following day
    """
    n_rows = 24 * n_days
    days_before = data_pv_clean[data_pv_clean['utc_time'] < input_date] \
                                        ['electricity'][-n_rows:].to_list()
    day_after = data_pv_clean[data_pv_clean['utc_time'] >= input_date] \
                                        ['electricity'][:24].to_list()


    extracted_data = {
        'days_before':days_before,
        'day_after':day_after
        }

    return {input_date: extracted_data}

# extract_pv_data(input_prediction_date)

In [None]:
def predict(input_date: str, n_days=2):
    pv_data_clean = data_pv_clean
    X_pred = pv_data_clean[pv_data_clean['utc_time'] < input_date][-48:]

    return {'dataframe to predict': X_pred.electricity.to_list()}

# predict(input_prediction_date)