In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import statsmodels.api as sm
from datetime import datetime, date, timedelta
from dateutil.relativedelta import relativedelta, SU
from scipy.fft import fft, ifft
from scipy.interpolate import PchipInterpolator
from scipy.signal import argrelextrema

from sklearn.preprocessing import StandardScaler
from functools import reduce
from pandas.api.types import is_numeric_dtype
from pandas.api.types import is_datetime64_any_dtype as is_datetime
from statsmodels.nonparametric.smoothers_lowess import lowess

In [2]:
working_directory = "C:/Users/wachic/OneDrive - Milwaukee School of Engineering/Documents/GitHub/Undergrad_Research/"

# Naming convention
# MMSD_sewerflow_all_dailyavg_df
# 1.  [MMSD, USGS] Where the source is
# 2.  [sewerflow, precip, streamflow] What the data measures
# 3.  [all, dry, wet] what season it includes
# 4-n What ever operation has been done to the data
# n+1 [df, periods, csv] data type

USGS_stream_flow_all_df = pd.read_csv(working_directory + "USGS 04087030 Streamflow Cleaned.csv")
MMSD_sewerflow_all_df = pd.read_csv(working_directory + "MMSD Sewer Flow Cleaned.csv")
MMSD_flow_and_precip_all_df = pd.read_csv(working_directory + "MMSD Flow and Precipitation Cleaned.csv")
MMSD_precip_all_df = pd.read_csv(working_directory + "MMSD Precipitation Raw Data Cleaned.csv")

df_list = [USGS_stream_flow_all_df,
           MMSD_sewerflow_all_df,
           MMSD_flow_and_precip_all_df,
           MMSD_precip_all_df]

In [3]:
for df in df_list:
    df['Date Time'] = pd.to_datetime(df['Date Time'])

In [4]:
def export(df, name='export_df'):
    csv = df.to_csv(f'{name}.csv', index = False) 

### Luedke model

In [20]:
def insert_mirrored_rows(df, num_rows=30):
    """
    Insert chronologically mirrored data point at head and tail of df
    """
    df = df.copy()
    mirrored_rows_head = df.iloc[:num_rows].copy()
    mirrored_rows_head = mirrored_rows_head.iloc[::-1].reset_index(drop=True)

    mirrored_rows_tail = df.iloc[-num_rows:].copy()
    mirrored_rows_tail = mirrored_rows_tail.iloc[::-1].reset_index(drop=True)

    df_extended = pd.concat([mirrored_rows_head, df, mirrored_rows_tail], ignore_index=True)
    
    return df_extended

In [34]:
def moving_avg(df, length=24):
    """ 
    Finds moving average for past 24 hour
    """
    df = insert_mirrored_rows(df.copy())
    out_df = df.set_index('Date Time').rolling(window=length).mean().reset_index()
    return out_df.iloc[30:-30].reset_index(drop=True)

def normalize(df):
    df_normalized = df.copy()
    for col in df.columns[1:]:  # Skip the first column (time series)
        df_normalized[col] = (df[col] - df[col].min()) / (df[col].max() - df[col].min())
    return df_normalized

In [35]:
def luedke_2_level(df, RWmax, time_step):
    df = df.copy()
    df_movingavg = moving_avg(df, time_step)
    df_mavg_normalized = normalize(df_movingavg)
    RWt = RWmax*df_mavg_normalized
    return RWt
def luedke_3_level(df, time_step=24):
    df = df.copy()
    df_movingavg = moving_avg(df, time_step)
    df_mavg_normalized = 70 - (normalize(df)*40)
    