<a href="https://colab.research.google.com/github/alex-antonison-mtsu/demo-notebooks/blob/main/pittsburgh_analyze_aggregated_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Welcome to my Demo Notebook!

You Need to run the first cell below to install an additional package and then select `RESTART RUNTIME` so that package can be loaded into the environment. After you restart the runtime, it will not harm anything if you re-run the cell.

In [None]:
%pip install skimpy==0.0.5

In [None]:
import pandas as pd
import skimpy
import numpy as np
import datetime
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap, theme, element_text, ylim

In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

In [None]:
# read in the uploaded file into a pandas dataframe
df_src = pd.read_csv(fn, low_memory=False)
df_src.head()

In [None]:
# skimpy.clean_columns() is a method that cleans up columns in a dataframe
# to make it easier to work with
df_src_clean_column = skimpy.clean_columns(df_src)
# removes the time from field_date_time_et
df_src_clean_column["field_date_et"] = pd.to_datetime(df_src_clean_column["field_date_et"]).dt.date
df_src_clean_column.head()

In [None]:
df_pivot = pd.pivot_table(df_src_clean_column, index=["area_of_concern", "manufacturer_sensor", "site_name", "station_name", "field_date_et"], columns="parameter", values="average")
df_pivot = df_pivot.reset_index()
df_pivot_clean = skimpy.clean_columns(df_pivot)

site_columns = ["area_of_concern", "manufacturer_sensor", "site_name", "station_name", "field_date_et"]

for column in df_pivot_clean.columns:
  if column not in site_columns:
    df_pivot_clean = df_pivot_clean.rename(columns={column: column + "_average"})

df_pivot_clean.head()

In [None]:
#@title Parameter Input
#@markdown Please type in a parameter seen above

parameter_input = 'p_h_average'  #@param {type: "string"}
#@markdown ---

In [None]:
df_data_vis = df_pivot_clean[(~np.isnan(df_pivot_clean[parameter_input]))]
df_data_vis.shape

In [None]:
list(df_pivot_clean[~np.isnan(df_pivot_clean[parameter_input])]["station_name"].unique())

In [None]:
#@title Station Name Input
#@markdown Please copy and paste a Station Name seen above

station_name_input = 'STR02'  #@param {type: "string"}
#@markdown ---

In [None]:
df_data_vis = df_pivot_clean[(df_pivot_clean["station_name"] == station_name_input) & (~np.isnan(df_pivot_clean[parameter_input]))]
df_data_vis.shape

In [None]:
(ggplot(df_data_vis, aes('field_date_et', parameter_input, color='station_name'))
 + geom_point()
 + theme(axis_text_x=element_text(rotation=90, hjust=1))
 + aes(ymin=0))