In [19]:
import pandas as pd
import plotly.express as px
import plotly.graph_objs as go
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [20]:
df = pd.read_csv('ddb_output.csv')
df.rename(columns={'humidity.S': 'humidity',
                   'temperature.S':'temperature',
                   'timestamp.S':'timestamp'},inplace=True)
df.drop(columns=['Unnamed: 0'],inplace=True)
#df.reset_index(drop=True,inplace=True)
df.sort_values(by='timestamp',inplace=True)
df

Unnamed: 0,humidity,temperature,timestamp
0,54.00,18.04,2023-04-21 12:36:04.168213
1,54.00,18.13,2023-04-21 12:36:07.120651
2,54.00,18.01,2023-04-21 12:36:10.007948
3,54.00,18.09,2023-04-21 12:36:12.959915
4,52.00,18.08,2023-04-21 15:48:08.632782
...,...,...,...
3041,56.35,21.63,2023-05-10 20:30:04.178828
3042,56.91,21.57,2023-05-10 20:40:04.682990
3043,56.72,21.54,2023-05-10 20:50:04.106947
3044,56.77,21.50,2023-05-10 21:00:04.586745


In [21]:
fig = px.scatter(df, x="timestamp", y=["humidity","temperature"], title='All-time humidity and temperature in the grove!')
fig.show()

In [22]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
go.Scatter(x=df['timestamp'],y=df['temperature'], name="Temperature", mode='markers'),
secondary_y=False,
)

fig.add_trace(
go.Scatter(x=df['timestamp'],y=df['humidity'], name="Humidity", mode='markers'),
secondary_y=True,
)

fig.update_layout(
title_text="Temperature and humidity over time"
)

# Set x-axis title
fig.update_xaxes(title_text="Time")

# Set y-axes titles
fig.update_yaxes(title_text="<b>Temperature</b>", title_font_color='blue', secondary_y=False)
fig.update_yaxes(title_text="<b>Humidity</b>", title_font_color='red', secondary_y=True)

In [23]:
df['date'] = df['timestamp'].map(lambda x: pd.Timestamp(x).date())

In [24]:
df_last_week = df[df['timestamp'] > str(pd.Timestamp('2023-05-1'))]

In [25]:
fig = px.box(df_last_week, x="date", y="temperature")
fig.show()

## Building a forecasting model 

In [49]:
df_temp =  df[ df['timestamp'] > str(pd.Timestamp('2023-04-28')) ].drop(columns=['humidity','date'])
df_temp.reset_index(inplace=True,drop=True)
df_temp.head()

Unnamed: 0,temperature,timestamp
0,17.83,2023-04-28 08:24:58.170520
1,17.85,2023-04-28 08:26:15.786048
2,17.85,2023-04-28 08:27:35.117760
3,17.88,2023-04-28 08:31:49.965333
4,17.88,2023-04-28 09:18:11.133661


In [50]:
px.scatter(df_temp, x='timestamp', y='temperature', title='Temperature from 28 April to May 11th (~12 days)')

### Baseline - persistence forecast 1,2,4 hours in advance

In [51]:
df_temp

Unnamed: 0,temperature,timestamp
0,17.83,2023-04-28 08:24:58.170520
1,17.85,2023-04-28 08:26:15.786048
2,17.85,2023-04-28 08:27:35.117760
3,17.88,2023-04-28 08:31:49.965333
4,17.88,2023-04-28 09:18:11.133661
...,...,...
2087,21.63,2023-05-10 20:30:04.178828
2088,21.57,2023-05-10 20:40:04.682990
2089,21.54,2023-05-10 20:50:04.106947
2090,21.50,2023-05-10 21:00:04.586745


In [55]:
# Test - to predict temperature 1,2,4 hours in advance

def persistence_n_hours(n):

    timestamps_n_hr = df_temp['timestamp'].apply(lambda x: pd.Timestamp(x) + pd.Timedelta(hours=n))

    df_temp_n_hr_baseline = pd.DataFrame(list(zip(timestamps_n_hr,df_temp['temperature'])))
    df_temp_n_hr_baseline.rename(columns={0:'timestamp', 1:'value'},inplace=True)
    df_temp_n_hr_baseline['type'] = f'persist. {n}hrs'
    df_temp_n_hr_baseline = df_temp_n_hr_baseline[['value', 'timestamp', 'type']]
    df_temp_n_hr_baseline

    return df_temp_n_hr_baseline

persist_1_hr = persistence_n_hours(1)
persist_2_hr = persistence_n_hours(2)
persist_4_hr = persistence_n_hours(4)


In [73]:
df_temp['type'] = 'observation'
df_temp.rename(columns={'temperature':'value'},inplace=True)
df_temp

Unnamed: 0,value,timestamp,type
0,17.83,2023-04-28 08:24:58.170520,observation
1,17.85,2023-04-28 08:26:15.786048,observation
2,17.85,2023-04-28 08:27:35.117760,observation
3,17.88,2023-04-28 08:31:49.965333,observation
4,17.88,2023-04-28 09:18:11.133661,observation
...,...,...,...
2087,21.63,2023-05-10 20:30:04.178828,observation
2088,21.57,2023-05-10 20:40:04.682990,observation
2089,21.54,2023-05-10 20:50:04.106947,observation
2090,21.50,2023-05-10 21:00:04.586745,observation


In [74]:
df_1_hour_forecast[ df_1_hour_forecast['timestamp'] > str(pd.Timestamp('2023-05-10')) ]

Unnamed: 0,temperature,timestamp,value,type
1964,21.26,2023-05-10 00:00:04.258123,,
1952,,2023-05-10 00:00:04.321746,21.87,persist. 2hrs
1958,,2023-05-10 00:00:04.412354,21.58,persist. 1hrs
1940,,2023-05-10 00:00:04.417188,21.47,persist. 4hrs
1965,21.29,2023-05-10 00:10:03.736480,,
...,...,...,...,...
2087,,2023-05-11 00:30:04.178828,21.63,persist. 4hrs
2088,,2023-05-11 00:40:04.682990,21.57,persist. 4hrs
2089,,2023-05-11 00:50:04.106947,21.54,persist. 4hrs
2090,,2023-05-11 01:00:04.586745,21.50,persist. 4hrs


In [75]:
df_1_hour_forecast = pd.concat([df_temp, persist_1_hr, persist_2_hr, persist_4_hr], axis=0)



df_1_hour_forecast['timestamp'] = df_1_hour_forecast['timestamp'].apply(lambda x: pd.Timestamp(x)) 

df_1_hour_forecast.sort_values(by='timestamp',inplace=True)

df_1_hour_forecast_1_day = df_1_hour_forecast[ df_1_hour_forecast['timestamp'] > str(pd.Timestamp('2023-05-10')) ]

df_1_hour_forecast


Unnamed: 0,value,timestamp,type
0,17.83,2023-04-28 08:24:58.170520,observation
1,17.85,2023-04-28 08:26:15.786048,observation
2,17.85,2023-04-28 08:27:35.117760,observation
3,17.88,2023-04-28 08:31:49.965333,observation
4,17.88,2023-04-28 09:18:11.133661,observation
...,...,...,...
2087,21.63,2023-05-11 00:30:04.178828,persist. 4hrs
2088,21.57,2023-05-11 00:40:04.682990,persist. 4hrs
2089,21.54,2023-05-11 00:50:04.106947,persist. 4hrs
2090,21.50,2023-05-11 01:00:04.586745,persist. 4hrs


In [76]:

px.scatter(df_1_hour_forecast_1_day, x='timestamp', y='value', color='type', title='Temperature from 28 April to May 11th (~12 days)')

### Autocorrelation - test using Durbin-Watson test

In [30]:
# Keras - time series forecasting example 

# https://keras.io/examples/adsf/timeseries_weather_forecasting/