<a href="https://colab.research.google.com/github/hepuliu/Masters_Thesis/blob/sandbox_lime/sandbox_lime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Master Thesis Simulation Sandbox
Flood Prevention Dam Sizing with Machine Learining Approaches - Hepu Liu

### Overall Project Simulation Steps
1. Process discharge data from Waldangelbach Station

2. Process precipitation data from Baiertal  Station

3. Build Prediction Model (Model A)

4. Process precipitation data from Stifterhof Station

5. Process precipitation data from Waibstadt Station (optional)

6. Process precipitation data from Stetten Station (optional

7. Fit data to Model A to predict discharge

### Variable Naming Conventions

- Weather Stations Naming: ('p' for precipitation, 'd' for discharge, 'a' to 'd' for different stations, 'r' for result)

  - da: Waldangelbach Station
  - pa: Baiertal Station
  - pb: Stifterhof Station
  - pc: Waibstadt Station
  - pd: Stetten Station
  - pr: combined/resulting precipitation
  - dr: predicted/resulting discharge

- Variable Naming Coventions: 
  - df: data frame
  - trs: training set
  - tes: testing set
  - fbp: FB Prophet
  - lstm: LSTM
  - ann: ANN 



## Importing Libraries

In [137]:
# importing libraries
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from fbprophet import Prophet
from google.colab import drive
drive.mount

<function google.colab.drive.mount>

## Importing Datasets

In [138]:
# import datasets
da_df = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/da_df.csv')

Unnamed: 0,t,discharge [m3/s]
0,2007-01-01 00:00:00,0.226
1,2007-01-01 01:00:00,0.248
2,2007-01-01 02:00:00,0.248
3,2007-01-01 03:00:00,0.32
4,2007-01-01 04:00:00,0.346


In [139]:
# import datasets
pa_df = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/pa_df.csv', thousands='.', decimal=',')

Unnamed: 0,t,wetness of leaves \n[%],temp. In 2m,global radiation [Wh/m²],sum precipitation [mm]
0,2007-01-01 00:00:00,97.73,10.0,0.0,2.6
1,2007-01-01 01:00:00,97.57,10.58,0.0,0.8
2,2007-01-01 02:00:00,97.25,11.22,0.0,0.2
3,2007-01-01 03:00:00,97.25,11.42,0.0,0.6
4,2007-01-01 04:00:00,97.02,11.58,0.0,0.0


## Data Processing

In [141]:
df_fbp = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_fbp.csv')
df_fbp

Unnamed: 0,ds,y,temp,rad,preci
0,2007-01-01 00:00:00,0.226,10.00,0.0,2.6
1,2007-01-01 01:00:00,0.248,10.58,0.0,0.8
2,2007-01-01 02:00:00,0.248,11.22,0.0,0.2
3,2007-01-01 03:00:00,0.320,11.42,0.0,0.6
4,2007-01-01 04:00:00,0.346,11.58,0.0,0.0
...,...,...,...,...,...
105148,2018-12-31 19:00:00,0.232,6.19,0.0,0.0
105149,2018-12-31 20:00:00,0.248,6.23,0.0,0.1
105150,2018-12-31 21:00:00,0.232,6.25,0.0,0.1
105151,2018-12-31 22:00:00,0.226,6.26,0.0,0.0


## Prediction


In [142]:
## FBProphet

# Multi Variant Prediction Model
def multi_var_predictor(df, reg1, reg2, reg3):
  predictor = Prophet(interval_width=0.95)
  predictor.add_regressor(reg1)
  predictor.add_regressor(reg2)
  predictor.add_regressor(reg3)
  predictor.fit(df)
  return predictor

# Make Prediction Dataframe
def prediction_df(predictor,df):
  prediction_df = predictor.predict(df).loc[:,['ds','yhat']]
  prediction_df['ds'] = prediction_df['ds'].apply(lambda x:x)
  return prediction_df

# # Prediction for Discharge
# discharge_predictor = multi_var_predictor(df_fbp,'temp','rad','preci')
# da_dr_fbp = prediction_df(discharge_predictor, df_fbp)
# da_dr_fbp.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/da_dr_fbp.csv', index=False)
# da_dr_fbp

# Archive

In [143]:
# # Cleanup Discharge A DataFrame da_df
# da_df = pd.read_csv('/content/drive/MyDrive/thesis/dataset/Wiesloch_waldangelbach_hourly_20070101-20210501.csv')
# da_df = da_df.iloc[13:].reset_index(drop=True)
# da_df.columns = da_df.iloc[0]
# da_df = da_df.iloc[3:].reset_index(drop=True)
# da_df = da_df.iloc[:, 4:7] # precipitation unit [m3/s]
# da_df['Uhrzeit'] = da_df['Uhrzeit'].str.replace(' v', '')
# da_df['t'] = pd.to_datetime(da_df['Datum']+' '+da_df['Uhrzeit'], format=('%y-%m-%d %H:%M:%S'))
# da_df = da_df.iloc[:,2:]
# da_df.columns = ['discharge [m3/s]', 't']
# da_df = da_df[['t','discharge [m3/s]']]
# da_df.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/da_df.csv', index=False)

In [144]:
# # Cleanup Precipitation A DataFrame pa_df
# pa_df = pd.read_csv('/content/drive/MyDrive/thesis/dataset/Weather_station_Baiertal.csv')
# pa_df.columns = pa_df.iloc[0]
# pa_df = pa_df.iloc[1:].reset_index(drop=True)
# pa_df['t'] = pd.to_datetime(pa_df['date']+' '+pa_df['time'], format=('%y-%m-%d %H:%M'))
# pa_df = pa_df.iloc[:,2:]
# cols = list(pa_df.columns)
# cols = [cols[-1]] + cols[:-1]
# pa_df = pa_df[cols]
# pa_df.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/pa_df.csv', index=False)


In [145]:
# # Cleanup Precipitation B DataFrame pb_df
# pb_df = pd.read_csv('/content/drive/MyDrive/thesis/dataset/Weather_station_Stifterhof.csv')
# pb_df.columns = pb_df.iloc[0]
# pb_df = pb_df.iloc[1:].reset_index(drop=True)
# pb_df['t'] = pd.to_datetime(pb_df['date']+' '+pb_df['time'], format=('%y-%m-%d %H:%M'))
# pb_df = pb_df.iloc[:,2:]
# cols = list(pb_df.columns)
# cols = [cols[-1]] + cols[:-1]
# pb_df = pb_df[cols]
# pb_df.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/pb_df.csv', index=False)

In [146]:
# ## FBProphet

# # Single Variant Prediction Model
# def single_var_predictor(df):
#   predictor = Prophet(interval_width=0.95)
#   predictor.fit(df)
#   return predictor

# # Make Prediction Dataframe
# def prediction_df(predictor,df):
#   prediction_df = predictor.predict(df).loc[:,['ds','yhat']]
#   prediction_df['ds'] = prediction_df['ds'].apply(lambda x:x)
#   return prediction_df

# # Prediction for Discharge [15s for 2000 rows, 45s for 20000rows with GPU, 4m for all]
# discharge_predictor = single_var_predictor(da_df)
# da_dr = prediction_df(discharge_predictor, da_df)
# da_dr

In [147]:
# # Plot Line Graph 20000 row with GPU = 3mins
# def line_plot(df, title):
#   label_font = {'family':'serif', 'color':'black', 'size':'12'}
#   title_font = {'family':'serif', 'color':'black', 'size':'14'}
#   fig = plt.figure(figsize=(8,8))
#   plt.plot(df['ds'], df['yhat'])
#   plt.xlabel( 't', fontdict = label_font)
#   plt.ylabel( 'd', fontdict = label_font)
#   plt.title(title, fontdict = title_font)
   
# # line_plot(da_df, 'Discharge A')


In [140]:
# # Making Data Frame for FB Prophet

# df_fbp = da_df
# df_fbp = df_fbp.merge(pa_df, on = 't', how = 'left').dropna()
# df_fbp = df_fbp.drop(columns = ['wetness of leaves \n[%]'])
# df_fbp.columns = ['ds','y','temp','rad','preci']
# df_fbp
# df_fbp.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_fbp.csv', index=False)