# Amperon Data Engineering Take Home Assignment

## 1.0 Imports and Setup

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.dates as mdates

import plotly.graph_objs as go
import ipywidgets as widgets
from IPython.display import display
import plotly.io as pio

from tomorrow.db_utils import setup_database
from tomorrow.config import get_database_url

In [2]:
db_url = get_database_url()
Session = setup_database(db_url)  
session = Session()  

In [3]:
df_minutely = pd.read_sql_table("weather_data_minutely", session.bind)
df_hourly = pd.read_sql_table("weather_data_hourly", session.bind)
df_daily = pd.read_sql_table("weather_data_daily", session.bind)

dataframes = { "minutely": df_minutely, "hourly": df_hourly, "daily": df_daily}

## 2.0 Visualisations

### 2.1 Time Series Graphs

In [4]:
# Dropdown for granularity
granularity_dropdown = widgets.Dropdown(
    options=list(dataframes.keys()),
    value='daily',
    description='Granularity:',
    disabled=False,
)

# Dropdown for location, initialized empty
location_dropdown = widgets.Dropdown(
    options=[],
    value=None,
    description='Location:',
    disabled=False,
)

# Dropdown for variables, initialized empty
variable_dropdown = widgets.Dropdown(
    options=[],
    value=None,
    description='Variable:',
    disabled=False,
)

# Function to update locations based on dataframe selection
def update_locations(*args):
    df = dataframes[granularity_dropdown.value]
    locations = sorted(df['location'].unique())
    location_dropdown.options = locations
    location_dropdown.value = locations[0] if locations else None

# Function to update variables based on dataframe selection
def update_variables(*args):
    df = dataframes[granularity_dropdown.value]
    variables = sorted(df.select_dtypes(include=[np.number]).columns.tolist())
    variable_dropdown.options = variables
    variable_dropdown.value = variables[0] if variables else None

# Attach the update function to granularity dropdown value changes
granularity_dropdown.observe(update_locations, 'value')
granularity_dropdown.observe(update_variables, 'value')

# Plotting function
def plot_data(granularity, location, variable):
    if location and variable:
        df = dataframes[granularity]
        data = df[df['location'] == location]
        data_sorted = data.sort_values('time')
        plt.style.use('ggplot')
        fig, ax = plt.subplots(figsize=(12, 6))
        ax.plot(data_sorted['time'], data_sorted[variable], marker='o', linestyle='-', label=variable)
        ax.set_title(f'{variable.capitalize()} over Time at {location} ({granularity.capitalize()})', fontsize=16)
        ax.set_xlabel('Time', fontsize=14)
        ax.set_ylabel(variable.replace('_', ' ').capitalize(), fontsize=14)
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M'))
        ax.xaxis.set_major_locator(mdates.AutoDateLocator())
        fig.autofmt_xdate()
        ax.grid(True)
        ax.legend()
        plt.tight_layout()
        plt.show()


In [5]:
interactive_plot = widgets.interactive_output(
    plot_data, {'granularity': granularity_dropdown, 'location': location_dropdown, 'variable': variable_dropdown}
)

update_locations()
update_variables()
display(granularity_dropdown, location_dropdown, variable_dropdown, interactive_plot)

Dropdown(description='Granularity:', index=2, options=('minutely', 'hourly', 'daily'), value='daily')

Dropdown(description='Location:', options=('25.8600,-97.4200', '25.9000,-97.4000', '25.9000,-97.4400', '25.900…

Dropdown(description='Variable:', options=('cloud_base_avg', 'cloud_base_max', 'cloud_base_min', 'cloud_ceilin…

Output()

## 2.1 Map Visualisation

In [35]:
pio.renderers.default = "iframe"

# Parse the location strings into numeric latitude and longitude
for df in dataframes.values():
    lat_lon = df['location'].str.split(',', expand=True).astype(float)
    df['latitude'] = lat_lon[0]
    df['longitude'] = lat_lon[1]

# Initialize widgets
granularity_dropdown = widgets.Dropdown(
    options=list(dataframes.keys()),
    value='daily',
    description='Granularity:',
)

variable_dropdown = widgets.Dropdown(
    description='Variable:',
)

# Function to update variable dropdown options based on selected dataframe
def update_variable_options(*args):
    df = dataframes[granularity_dropdown.value]
    numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
    variable_dropdown.options = numeric_cols
    variable_dropdown.value = numeric_cols[0]

granularity_dropdown.observe(update_variable_options, 'value')
update_variable_options()

# Plotting function using Plotly for bubble map
def plot_bubble_map(granularity, variable):
    df = dataframes[granularity].copy()

    lat_lon = df['location'].str.split(',', expand=True).astype(float)
    df['latitude'] = lat_lon[0]
    df['longitude'] = lat_lon[1]

    df = df.dropna(subset=['latitude', 'longitude'])

    df['time'] = df['time'].astype(str)
    df = df.sort_values('time')

    df[variable] = df.groupby('location')[variable].transform(lambda x: x.ffill())

    buffer = 0.5
    lon_min, lon_max = df['longitude'].min() - buffer, df['longitude'].max() + buffer
    lat_min, lat_max = df['latitude'].min() - buffer, df['latitude'].max() + buffer

    frames = []
    times = df['time'].unique()
    max_bubble_size = 200
    for time in times:
        frame_df = df[df['time'] == time]
        frames.append(
            go.Frame(
                data=[go.Scattergeo(
                    lon=frame_df['longitude'],
                    lat=frame_df['latitude'],
                    text=frame_df['location'],
                    mode='markers',
                    marker=dict(
                        size=np.sqrt(frame_df[variable]) * max_bubble_size / np.sqrt(frame_df[variable].max()),
                        color=frame_df[variable],
                        colorscale='Viridis',
                        showscale=True,
                        sizemode='area',
                    )
                )],
                name=time
            )
        )

    initial_data = frames[0].data

    steps = []
    for i, time in enumerate(times):
        step = dict(
            method="animate",
            args=[[time], {"frame": {"duration": 500, "redraw": True}, "transition": {"duration": 300}}],
            label=time
        )
        steps.append(step)

    sliders = [dict(
        steps=steps,
        active=0,
        currentvalue={"prefix": "Time: "},
        pad={"t": 50}
    )]

    layout = go.Layout(
        title=f'{variable.capitalize()} over Time',
        showlegend=False,
        width = 1000,
        height = 1000,
        geo=dict(
            scope='north america',
            showcountries=True,
            landcolor="rgb(212, 212, 212)",
            showocean=True,
            oceancolor="lightskyblue",
            countrycolor="black",
            countrywidth=2,
            projection=dict(
                type="mercator"
            ),
            center=dict(
                lat=(lat_min + lat_max) / 2,
                lon=(lon_min + lon_max) / 2
            ),
            lataxis={"range": [lat_min, lat_max]},
            lonaxis={"range": [lon_min, lon_max]},
        ),
        sliders=[sliders[0]],
        updatemenus=[{
            'buttons': [
                {
                    'args': [None, {'frame': {'duration': 500, 'redraw': True}, 'fromcurrent': True, 'transition': {'duration': 300}}],
                    'label': 'Play',
                    'method': 'animate'
                },
                {
                    'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate', 'transition': {'duration': 0}}],
                    'label': 'Pause',
                    'method': 'animate'
                }
            ],
            'direction': 'left',
            'pad': {'r': 10},
            'showactive': False,
            'type': 'buttons',
            'x': 0.1,
            'xanchor': 'right',
            'y': 0,
            'yanchor': 'top'
        }]
    )

    fig = go.Figure(data=initial_data, layout=layout, frames=frames)
    fig.show()

# Call the plot function with the interactive output widget
interactive_plot = widgets.interactive_output(
    plot_bubble_map, 
    {'granularity': granularity_dropdown, 'variable': variable_dropdown}
)

# Display widgets and interactive plot
display(granularity_dropdown, variable_dropdown, interactive_plot)


Dropdown(description='Granularity:', index=2, options=('minutely', 'hourly', 'daily'), value='daily')

Dropdown(description='Variable:', options=('cloud_base_avg', 'cloud_base_max', 'cloud_base_min', 'cloud_ceilin…

Output()