In [2]:
import pandas as pd
import geopandas as gpd
from vega_datasets import data
import altair as alt
from ipywidgets import widgets, VBox, interactive_output


In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/bfro_reports_fall2022.csv')

In [4]:
df

Unnamed: 0,observed,location_details,county,state,season,title,latitude,longitude,date,number,...,precip_intensity,precip_probability,precip_type,pressure,summary,uv_index,visibility,wind_bearing,wind_speed,location
0,Ed L. was salmon fishing with a companion in P...,East side of Prince William Sound,Valdez-Chitina-Whittier County,Alaska,Fall,,,,,1261.0,...,,,,,,,,,,
1,heh i kinda feel a little dumb that im reporti...,"the road is off us rt 80, i dont know the exit...",Warren County,New Jersey,Fall,,,,,438.0,...,,,,,,,,,,
2,I was on my way to Claremont from Lebanon on R...,Close to Claremont down 120 not far from Kings...,Sullivan County,New Hampshire,Summer,Report 55269: Dawn sighting at Stevens Brook o...,43.41549,-72.33093,2016-06-07,55269.0,...,0.001,0.7,rain,998.87,Mostly cloudy throughout the day.,6.0,9.70,262.0,0.49,POINT(-72.33093000000001 43.415490000000005)
3,I was northeast of Macy Nebraska along the Mis...,Latitude & Longitude : 42.158230 -96.344197,Thurston County,Nebraska,Spring,Report 59757: Possible daylight sighting of a ...,42.15685,-96.34203,2018-05-25,59757.0,...,0.000,0.0,,1008.07,Partly cloudy in the morning.,10.0,8.25,193.0,3.33,POINT(-96.34203000000001 42.15685)
4,"While this incident occurred a long time ago, ...","Ward County, Just outside of a the Minuteman T...",Ward County,North Dakota,Spring,Report 751: Hunter describes described being s...,48.25422,-101.31660,2000-04-21,751.0,...,,,rain,1011.47,Partly cloudy until evening.,6.0,10.00,237.0,11.14,POINT(-101.3166 48.254220000000004)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4742,My cousin and I were camping way out in the wo...,"Indiana, Brown County, Elkinsville, Lake Monro...",Brown County,Indiana,Spring,,,,,2460.0,...,,,,,,,,,,
4743,While backpacking near the horse trails and ac...,Near Bedford south of Brown County in the Hoos...,Brown County,Indiana,Winter,,,,,2461.0,...,,,,,,,,,,
4744,My wife and I were camping At Yellowood State ...,Yellowood State Park. Off of highway 46 in bet...,Brown County,Indiana,Summer,Report 49480: Campers hear possible vocalizati...,39.17909,-86.33560,2015-08-08,49480.0,...,0.000,0.0,,1014.02,Mostly cloudy in the evening.,9.0,9.22,256.0,0.34,POINT(-86.3356 39.17909)
4745,My wife and I were driving to Indianapolis to ...,On Interstate 65 in Indiana somewhere around t...,Boone County,Indiana,Winter,,,,,2459.0,...,,,,,,,,,,


In [5]:
df.columns

Index(['observed', 'location_details', 'county', 'state', 'season', 'title',
       'latitude', 'longitude', 'date', 'number', 'classification', 'geohash',
       'temperature_high', 'temperature_mid', 'temperature_low', 'dew_point',
       'humidity', 'cloud_cover', 'moon_phase', 'precip_intensity',
       'precip_probability', 'precip_type', 'pressure', 'summary', 'uv_index',
       'visibility', 'wind_bearing', 'wind_speed', 'location'],
      dtype='object')

In [6]:
df.state.unique()

array(['Alaska', 'New Jersey', 'New Hampshire', 'Nebraska',
       'North Dakota', 'North Carolina', 'Montana', 'Missouri',
       'Minnesota', 'Michigan', 'Maine', 'Maryland', 'Massachusetts',
       'Louisiana', 'New Mexico', 'Mississippi', 'Wyoming',
       'West Virginia', 'Vermont', 'Virginia', 'Texas', 'South Dakota',
       'South Carolina', 'Rhode Island', 'Pennsylvania', 'Oregon',
       'Oklahoma', 'Ohio', 'New York', 'Tennessee', 'Utah', 'Washington',
       'Wisconsin', 'Nevada', 'Kentucky', 'Kansas', 'Indiana', 'Illinois',
       'Idaho', 'Georgia', 'Florida', 'Delaware', 'Connecticut',
       'Colorado', 'California', 'Arizona', 'Arkansas', 'Alabama', 'Iowa'],
      dtype=object)

In [7]:
df_louisiana = df[df.state == 'Louisiana']

In [8]:
df_arizona = df[df.state == 'Arizona']

In [9]:
humid_concat = pd.concat([df_louisiana, df_arizona])

In [10]:
humid_concat.isna().sum()

observed               0
location_details      18
county                 0
state                  0
season                 0
title                 30
latitude              30
longitude             30
date                  30
number                 0
classification         0
geohash               30
temperature_high      55
temperature_mid       58
temperature_low       58
dew_point             55
humidity              55
cloud_cover           69
moon_phase            55
precip_intensity      64
precip_probability    64
precip_type           99
pressure              82
summary               55
uv_index              55
visibility            67
wind_bearing          55
wind_speed            55
location              30
dtype: int64

In [11]:
humid_clean = humid_concat[humid_concat['temperature_mid'].isna() == False]

In [14]:
humid_long = humid_clean.melt(
    id_vars=['humidity', 'state'],
    value_vars=['temperature_high', 'temperature_mid', 'temperature_low'],
    var_name='temperature_type',
    value_name='temperature'
)

labels = {
    'temperature_high': 'High',
    'temperature_mid': 'Mid',
    'temperature_low': 'Low'
}

checkboxes = {
    key: widgets.Checkbox(value=True, description=label)
    for key, label in labels.items()
}

def update_plot(**kwargs):
    selected_types = [key for key, show in kwargs.items() if show]
    filtered_data = humid_long[humid_long['temperature_type'].isin(selected_types)]
    
    chart = alt.Chart(filtered_data).mark_circle().encode(
        x=alt.X('humidity:Q', title='Humidity', scale=alt.Scale(domain=[0, 1])),
        y=alt.Y('temperature:Q', title='Temperature', scale=alt.Scale(domain=[0, 120])),
        color=alt.Color('state:N', legend=alt.Legend(title="State")),
        shape=alt.Shape('temperature_type:N', legend=alt.Legend(title="Temperature Type"))
    ).properties(
        width=600,
        height=400,
        title="Interactive Temperature Chart"
    )
    
    display(chart)

widgets.interactive(update_plot, **checkboxes)

interactive(children=(Checkbox(value=True, description='High'), Checkbox(value=True, description='Mid'), Check…

In [16]:
chart = alt.Chart(humid_long).mark_circle().encode(
        x=alt.X('humidity:Q', title='Humidity', scale=alt.Scale(domain=[0, 1])),
        y=alt.Y('temperature:Q', title='Temperature', scale=alt.Scale(domain=[0, 120])),
        color=alt.Color('state:N', legend=alt.Legend(title="State")),
        shape=alt.Shape('temperature_type:N', legend=alt.Legend(title="Temperature Type"))
    ).properties(
        width=600,
        height=400,
        title="Interactive Temperature Chart"
    )
chart

In [17]:
chart.properties(width = 'container').save('humidity.json')

In this visualization, we are being shown the temperature differences between the most and least humid states. Arizona is the least humid state while Louisiana is a close second to the most humid state (I thought Alaska would skew the data a lot). I wanted to record if there were big temperature differences between humid and not humid states. I used the default colors because they are easy to tell apart and since there are only two points, I thought a color map would not be necessary. This is completely different from my HW 5 visualizations since it is a different data set. I thought the interactivity with the different min, max, and mids would show more trends better and you can choose which one you want to see, because there can be meaningful trends by finding max or min temps. Finally, I used a little ChatGPT to help me with this code since I was unsure about how to do check boxes, here were my prompts and responses, I fact checked it with my data to see if it would work:

how to do ipywidgets checkboxes interactively witha  graph

what if i want to do 3 or more

Here is the code I was given as an example

In [13]:
# Sample data
functions = {
    "Sin(x)": np.sin(x),
    "Cos(x)": np.cos(x),
    "Tan(x)": np.tan(x),
    "Sin(x)^2": np.sin(x)**2,
    "Cos(x)^2": np.cos(x)**2
}

# Generate checkboxes dynamically
checkboxes = {name: widgets.Checkbox(value=True, description=name) for name in functions}

# Function to update the plot dynamically
def update_plot(**kwargs):
    plt.figure(figsize=(8, 5))
    for name, show in kwargs.items():
        if show:
            plt.plot(x, functions[name], label=name)
    plt.legend()
    plt.xlabel("x")
    plt.ylabel("y")
    plt.title("Interactive Trigonometric Functions")
    plt.grid(True)
    plt.show()

# Create interactive widget with dynamic checkboxes
interactive_plot = widgets.interactive(update_plot, **checkboxes)

# Display checkboxes and plot
display(widgets.VBox(list(checkboxes.values())))
interactive_plot


NameError: name 'np' is not defined

In [19]:
df.columns

Index(['observed', 'location_details', 'county', 'state', 'season', 'title',
       'latitude', 'longitude', 'date', 'number', 'classification', 'geohash',
       'temperature_high', 'temperature_mid', 'temperature_low', 'dew_point',
       'humidity', 'cloud_cover', 'moon_phase', 'precip_intensity',
       'precip_probability', 'precip_type', 'pressure', 'summary', 'uv_index',
       'visibility', 'wind_bearing', 'wind_speed', 'location'],
      dtype='object')

In [20]:
df_nebraska = df[df.state == 'Nebraska']

In [21]:
df_mississippi = df[df.state == 'Mississippi']

In [22]:
wind_concat = pd.concat([df_mississippi, df_nebraska])

In [23]:
wind_concat.isna().sum()

observed               1
location_details       8
county                 0
state                  0
season                 0
title                  2
latitude               2
longitude              2
date                   2
number                 0
classification         0
geohash                2
temperature_high       9
temperature_mid       11
temperature_low       11
dew_point              9
humidity               9
cloud_cover           11
moon_phase             8
precip_intensity      14
precip_probability    14
precip_type           25
pressure              14
summary                8
uv_index               8
visibility            12
wind_bearing           8
wind_speed             8
location               2
dtype: int64

In [24]:
wind_clean = wind_concat[wind_concat['temperature_mid'].isna() == False]

In [25]:
wind_long = wind_clean.melt(
    id_vars=['wind_speed', 'state'],
    value_vars=['temperature_high', 'temperature_mid', 'temperature_low'],
    var_name='temperature_type',
    value_name='temperature'
)

labels = {
    'temperature_high': 'High',
    'temperature_mid': 'Mid',
    'temperature_low': 'Low'
}

checkboxes = {
    key: widgets.Checkbox(value=True, description=label)
    for key, label in labels.items()
}

def update_plot(**kwargs):
    selected_types = [key for key, show in kwargs.items() if show]
    filtered_data = wind_long[wind_long['temperature_type'].isin(selected_types)]
    
    chart = alt.Chart(filtered_data).mark_circle().encode(
        x=alt.X('wind_speed:Q', title='Wind Speed', scale=alt.Scale(domain=[0, 15])),
        y=alt.Y('temperature:Q', title='Temperature', scale=alt.Scale(domain=[0, 120])),
        color=alt.Color('state:N', legend=alt.Legend(title="State")),
        shape=alt.Shape('temperature_type:N', legend=alt.Legend(title="Temperature Type"))
    ).properties(
        width=600,
        height=400,
        title="Interactive Temperature Chart"
    )
    
    display(chart)

widgets.interactive(update_plot, **checkboxes)

interactive(children=(Checkbox(value=True, description='High'), Checkbox(value=True, description='Mid'), Check…

In [27]:
chart2 = alt.Chart(wind_long).mark_circle().encode(
        x=alt.X('wind_speed:Q', title='Wind Speed', scale=alt.Scale(domain=[0, 15])),
        y=alt.Y('temperature:Q', title='Temperature', scale=alt.Scale(domain=[0, 120])),
        color=alt.Color('state:N', legend=alt.Legend(title="State")),
        shape=alt.Shape('temperature_type:N', legend=alt.Legend(title="Temperature Type"))
    ).properties(
        width=600,
        height=400,
        title="Interactive Temperature Chart"
    )
chart2



In [38]:
from altair_saver import save
save(chart2, 'wind.png')

NoMatchingVersions: No matches for version='5.17.0' among ['4.0.2', '4.8.1', '4.17.0'].
Often this can be fixed by updating altair_viewer:
    pip install -U altair_viewer

I decided to go for a similar type of graph but this time I was comapring wind speeds. Surprisingly, Nebraska is supposed to have a higher average wind speen than Mississippi, but it seems to be even if not Mississippi having higher wind speeds than Nebraska. I think the data is jjust not representative of the actual states and next time I should just compare the variables within the dataset by grouping them and finding the average. I also tried looking for temperature differences on this one. Similar idea with the color map aas well since you can tell which colors are which on this one pretty easily. I did not do any data transformations but I did do some data cleaning for NA values. There is no overlap from this to Homework 5. 