<a href="https://colab.research.google.com/github/alex-antonison-mtsu/demo-notebooks/blob/main/pittsburgh_data_analysis_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install skimpy==0.0.5

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
import pandas as pd
import skimpy
import numpy as np
import datetime
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap, theme, element_text, ylim

In [None]:
df_src = pd.read_csv("demo-sensor-data.csv", low_memory=False)
df_src.head()

In [None]:
df_src_clean_column = skimpy.clean_columns(df_src)
df_src_clean_column["field_date_et"] = pd.to_datetime(df_src_clean_column["field_date_time_et"]).dt.date
df_src_clean_column.head()

In [None]:
df_src_mean = df_src_clean_column.groupby(by=["area_of_concern", "parameter", "manufacturer_sensor", "station_name", "site_name", "field_date_et"]).mean()
df_src_mean = df_src_mean.reset_index()
df_src_mean.head()

In [None]:
df_pivot = pd.pivot_table(df_src_mean, index=["area_of_concern", "manufacturer_sensor", "site_name", "station_name", "field_date_et"], columns="parameter", values="value")
df_pivot = df_pivot.reset_index()
df_pivot_clean = skimpy.clean_columns(df_pivot)
df_pivot_clean.head()

In [None]:
#@title Parameter Input
#@markdown Please type in a parameter seen above

parameter_input = 'dissolved_oxygen_%'  #@param {type: "string"}
#@markdown ---

In [None]:
list(df_pivot_clean[~np.isnan(df_pivot_clean[parameter_input])]["station_name"].unique())

In [None]:
#@title Station Name Input
#@markdown Please copy and paste a Station Name seen above

station_name_input = 'DMR10'  #@param {type: "string"}
#@markdown ---

In [None]:
df_data_vis = df_pivot_clean[(df_pivot_clean["station_name"] == station_name_input) & (~np.isnan(df_pivot_clean[parameter_input]))]
df_data_vis.shape

In [None]:
(ggplot(df_data_vis, aes('field_date_et', parameter_input, color='station_name'))
 + geom_point()
 + theme(axis_text_x=element_text(rotation=75, hjust=1))
 + aes(ymin=0))

In [None]:
print(f"field_date_et values between {min(df_data_vis['field_date_et'])} and {max(df_data_vis['field_date_et'])}")

In [None]:
#@title Please select a min and max date between the values above
#@markdown Date Selector

min_date = '2021-08-10'  #@param {type: "date"}
min_date_param = datetime.datetime.strptime(min_date, "%Y-%m-%d").date()

max_date = '2021-09-10'  #@param {type: "date"}
max_date_param = datetime.datetime.strptime(max_date, "%Y-%m-%d").date()

#@markdown ---

In [None]:
df_data_vis_date_filtered = df_data_vis[(df_data_vis["field_date_et"] >= min_date_param) & (df_data_vis["field_date_et"] <= max_date_param)]

(ggplot(df_data_vis_date_filtered, aes('field_date_et', parameter_input, color='station_name'))
 + geom_point()
 + theme(axis_text_x=element_text(rotation=75, hjust=1))
 + aes(ymin=0))