In [265]:
import requests
import pandas as pd
import os
import json
from datetime import datetime
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt 
import pickle
import seaborn as sns
from ipyleaflet import Map, Marker, LayerGroup
from datetime import datetime
from ipywidgets import interact 
import warnings
from ipywidgets import Output
from IPython.display import display

warnings.filterwarnings('ignore')

# Constants
URL = 'http://172.26.135.52:9090/'
HEADERS = {'HOST': 'fission'}
METRIC = 'Particles'
weather_columns = ['Temp', 'Humid', 'WindSpeed', 'Rain']

def filter_metric(results: list[dict], metric: str, station: str) -> list[dict]:
    """
    Filter specific metric and add station tag
    
    @param results is the raw data from ES
    @param metric is the metric to filter
    @param station is the station number as a string
    @return a list of dictionaries of data
    """
    out = []
    for result in results:
        try:
            if result['_source']['measure_name'] == metric:
                out.append(result['_source'])
        except:
            continue
    for o in out:
        o['Station'] = station
    return out

def get_readings(station: str, metric: str, year: int) -> list[dict]:
    """
    Get the readings from a station for a specific metric

    @param station is the station number to query
    @param metric is the metric to filter
    @param year is the year int
    @return list of dicts of filtered data
    """
    out = []
    # Get station location
    ret = requests.get(os.path.join(URL, 'epa', str(station), '1000', '50'), headers=HEADERS, params={'year': str(year)})
    # Load response as json
    try:
        epa_resp_data = json.loads(ret.text)
        if epa_resp_data['Status'] == 200:
            out.extend(epa_resp_data['Data'])
        else:
            return []
    except:
        return []
    while 'Token' in epa_resp_data.keys() and epa_resp_data['Token'] != 'END' and len(epa_resp_data['Data']) != 0:
        ret = requests.get(os.path.join(URL, 'stream', epa_resp_data['Token']), headers=HEADERS)
        try:
            epa_resp_data = json.loads(ret.text)
            out.extend(epa_resp_data['Data'])
        except:
            break
    return filter_metric(out, metric, station)

def get_weather(station: str, year: int) -> list[dict]:
    """
    Get the weather from a station for a given year range

    @param station is the string of the station number
    @param year is the year int
    @return list of dicts of weather
    """
    ret = requests.get(os.path.join(URL, 'weather', station, str(year), str(year + 1)), headers=HEADERS)
    try:
        weather_data_json = json.loads(ret.text)
        if weather_data_json['Status'] == 200:
            weather_data = weather_data_json['Data']
            for item in weather_data:
                item['Station'] = station
            return weather_data
    except:
        pass
    return []

def clean_data(epa_df: pd.DataFrame, weather_df: pd.DataFrame) -> pd.DataFrame:
    """
    Build a cleaned and combined DataFrame fo weather and EPA data.
    Combine on station and date

    @param epa_df is the DataFrame with epa data
    @param weather_df is the DataFrame with weather data
    @returns concatenated DataFrame
    """
    epa_df['location'] = epa_df['location'].apply(lambda x: (x[0], x[1]))
    epa_df = epa_df.drop_duplicates(subset=epa_df.columns.difference(['station']))
    epa_df['start'] = epa_df['start'].apply(lambda x: datetime.strptime(x.split('T')[0], '%Y-%m-%d'))
    epa_df['end'] = epa_df['end'].apply(lambda x: datetime.strptime(x.split('T')[0], '%Y-%m-%d'))
    epa_df['Date'] = epa_df['start']
    epa_df[METRIC] = epa_df['value']
    weather_df['Date'] = weather_df['Date'].apply(lambda x: datetime.strptime(x.split('T')[0], '%d/%m/%Y'))
    weather_df['Max Temp'] = pd.to_numeric(weather_df['Max Temp'])
    weather_df['Min Temp'] = pd.to_numeric(weather_df['Min Temp'])
    weather_df['Max Humid'] = pd.to_numeric(weather_df['Max Humid'])
    weather_df['Min Humid'] = pd.to_numeric(weather_df['Min Humid'])
    weather_df['Min Humid'] = pd.to_numeric(weather_df['Min Humid'])
    weather_df['Rain'] = pd.to_numeric(weather_df['Rain'])
    weather_df['WindSpeed'] = pd.to_numeric(weather_df['WindSpeed'])
    weather_df['Temp'] = (weather_df['Min Temp'] + weather_df['Max Temp']) / 2.0
    weather_df['Humid'] = (weather_df['Min Humid'] + weather_df['Max Humid']) / 2.0
    weather_df = weather_df[weather_df['Temp'] > -50]
    weather_df = weather_df[weather_df['WindSpeed'] >= 0]
    weather_df = weather_df[weather_df['Humid'] >= 0]
    weather_df = weather_df[weather_df['Rain'] >= 0]
    combined_df = pd.merge(epa_df, weather_df, on=['Date', 'Station'], how='inner')
    out_cols = weather_columns.copy()
    out_cols.append(METRIC)
    
    return combined_df[out_cols]

def update_user(coord):
    """
    Build the data display for a given location

    @param coord is the coordinates to use
    """
    # First find the closest station
    ret = requests.get(os.path.join(URL, 'stations', str(coords[1]), str(coords[0])))
    data = json.loads(ret.text)
    if data['Status'] == 200:
        station = str(data['Data']['Station ID'])
        # Get weather predixtion based on weather
        ret = requests.get(os.path.join(URL, 'current-weather'), params={'id': station}, headers=HEADERS)
        weather_json = json.loads(ret.text)
        if weather_json['Status'] == 200:
            try:
                # Build query
                weather = {}
                weather['Temp'] = weather_json['Data']['Temp']
                weather['Humid'] = weather_json['Data']['Humid']
                weather['Rain'] = weather_json['Data']['Rain']
                weather['WindSpeed'] = weather_json['Data']['Wind Speed (km/h)']
                query = ''
                for field in weather_columns:
                    query += str(weather[field]) + ','
                ret = requests.get(os.path.join(URL, 'models', 'epa_model'), params={'predictors': query[:-1]}, headers=HEADERS)
                print("Particle Prediction: ", str(json.loads(ret.text)['prediction']), 'Micro Grams per Cubic Meter')
            except:
                print('Error Getting Prediction')
        else :
            print('Error Getting Local Weather')
            
        # Get last year readings
        year = datetime.now().year
        epa_list = get_readings(station, METRIC, year)
        weather_list = get_weather(station, year)
        if len(epa_list) == 0 or len(weather_list) == 0:
            print('Not Enough Data, Please Choose New Location')
            return
        # Put into DataFrames then combine
        epa_readings = pd.DataFrame.from_records(epa_list)
        weather_readings = pd.DataFrame.from_records(weather_list)
        combined_df = clean_data(epa_readings, weather_readings)

        # Display data
        def display(Metric: str):
            """
            Display the scatter plot
            """
            plt.scatter(combined_df[Metric], combined_df[METRIC])
            # Make tik marks
            x_range = abs(combined_df[Metric].max() - combined_df[Metric].min())
            start_x = combined_df[Metric].min()
            interval = x_range / 8.0
            ticks = []
            for n in range(9):
                ticks.append(start_x + n * interval)
            plt.xticks(ticks)
            plt.xlabel(Metric)
            plt.ylabel(METRIC)
            plt.show()
            return Metric
        interact(display, Metric=weather_columns)

        # Create a correlation matrix
        correlation_columns = weather_columns.copy()
        correlation_columns.append(METRIC)
        corr_table_pers = combined_df[correlation_columns].corr(method='pearson')
        plt.figure(figsize=(8, 6))
        sns.heatmap(corr_table_pers, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
        plt.title('Correlation Matrix')
        plt.show()
    else:
        print('Error')


# Please Choose a Location (Limited to Victoria, but Not All of Victoria Has Data)

In [266]:
# Create an output widget to capture interaction
out = Output()

# Create a map centered at Australia's approximate center
center = (-38., 145.)
m = Map(center=center, zoom=6)

# Create a layer group to hold the marker
marker_layer = LayerGroup()
m.add_layer(marker_layer)
coords = None

# Handle marker placement
def handle_interaction(**kwargs):
    global coords, marker
    if kwargs.get('type') == 'click':
        coords = kwargs.get('coordinates')
        with out:
            out.clear_output()
            update_user(coords)

        marker_layer.clear_layers()
        # Add a new draggable marker at the clicked location
        marker = Marker(location=coords, draggable=True)
        marker.observe(handle_drag, names='location')
        marker_layer.add_layer(marker)

# Handle marker dragging - otherwise the coords will not update
def handle_drag(event):
    global coords
    coords = event['new']
    with out:
        out.clear_output()
        update_user(coords)
# Attach the function to the map
m.on_interaction(handle_interaction)

# Display the map with the selector widget
display(m, out)

Map(center=[-38.0, 145.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_o…

Output()