In [136]:
import pandas as pd
import altair as alt

df = pd.read_csv("https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/bfro_reports_fall2022.csv")

In [137]:
df.columns

Index(['observed', 'location_details', 'county', 'state', 'season', 'title',
       'latitude', 'longitude', 'date', 'number', 'classification', 'geohash',
       'temperature_high', 'temperature_mid', 'temperature_low', 'dew_point',
       'humidity', 'cloud_cover', 'moon_phase', 'precip_intensity',
       'precip_probability', 'precip_type', 'pressure', 'summary', 'uv_index',
       'visibility', 'wind_bearing', 'wind_speed', 'location'],
      dtype='object')

In [138]:
# drop columns with no latitude OR no longitude
df_dna = df.dropna(subset=['latitude', 'longitude'])
df_dna.head()

Unnamed: 0,observed,location_details,county,state,season,title,latitude,longitude,date,number,...,precip_intensity,precip_probability,precip_type,pressure,summary,uv_index,visibility,wind_bearing,wind_speed,location
2,I was on my way to Claremont from Lebanon on R...,Close to Claremont down 120 not far from Kings...,Sullivan County,New Hampshire,Summer,Report 55269: Dawn sighting at Stevens Brook o...,43.41549,-72.33093,2016-06-07,55269.0,...,0.001,0.7,rain,998.87,Mostly cloudy throughout the day.,6.0,9.7,262.0,0.49,POINT(-72.33093000000001 43.415490000000005)
3,I was northeast of Macy Nebraska along the Mis...,Latitude & Longitude : 42.158230 -96.344197,Thurston County,Nebraska,Spring,Report 59757: Possible daylight sighting of a ...,42.15685,-96.34203,2018-05-25,59757.0,...,0.0,0.0,,1008.07,Partly cloudy in the morning.,10.0,8.25,193.0,3.33,POINT(-96.34203000000001 42.15685)
4,"While this incident occurred a long time ago, ...","Ward County, Just outside of a the Minuteman T...",Ward County,North Dakota,Spring,Report 751: Hunter describes described being s...,48.25422,-101.3166,2000-04-21,751.0,...,,,rain,1011.47,Partly cloudy until evening.,6.0,10.0,237.0,11.14,POINT(-101.3166 48.254220000000004)
5,"In early spring 1988, some friends of mine and...","Yancey County, North Carolina, near the summit...",Yancey County,North Carolina,Spring,Report 3339: Deep impressions seen in the snow,35.74875,-82.26195,1988-03-15,3339.0,...,0.0,0.0,,1014.47,Partly cloudy until evening and breezy through...,7.0,9.5,348.0,16.94,POINT(-82.26195 35.74875)
6,This happened summertime early 70's (I think 7...,To get there take Highway 78 south out of Absa...,Stillwater County,Montana,Summer,Report 47215: Female fly fisherman's lucid rec...,45.31278,-109.6449,1971-12-15,47215.0,...,,,,,,,,,,POINT(-109.6449 45.31278)


In [139]:
from vega_datasets import data
us_states = alt.topo_feature(data.us_10m.url, feature='states')

# base USA chart
states = alt.Chart(us_states).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=500,
    height=400
)

states

In [140]:
sightings_by_state = df_dna.groupby('state').size().reset_index(name='sightings')
sightings_by_state

# FIPS codes generated by ChatGPT; could not find this data in vega_datasets
state_to_fips = {
    'Alabama': 1, 'Alaska': 2, 'Arizona': 4, 'Arkansas': 5, 'California': 6,
    'Colorado': 8, 'Connecticut': 9, 'Delaware': 10, 'Florida': 12, 'Georgia': 13,
    'Hawaii': 15, 'Idaho': 16, 'Illinois': 17, 'Indiana': 18, 'Iowa': 19,
    'Kansas': 20, 'Kentucky': 21, 'Louisiana': 22, 'Maine': 23, 'Maryland': 24,
    'Massachusetts': 25, 'Michigan': 26, 'Minnesota': 27, 'Mississippi': 28, 'Missouri': 29,
    'Montana': 30, 'Nebraska': 31, 'Nevada': 32, 'New Hampshire': 33, 'New Jersey': 34,
    'New Mexico': 35, 'New York': 36, 'North Carolina': 37, 'North Dakota': 38, 'Ohio': 39,
    'Oklahoma': 40, 'Oregon': 41, 'Pennsylvania': 42, 'Rhode Island': 44, 'South Carolina': 45,
    'South Dakota': 46, 'Tennessee': 47, 'Texas': 48, 'Utah': 49, 'Vermont': 50,
    'Virginia': 51, 'Washington': 53, 'West Virginia': 54, 'Wisconsin': 55, 'Wyoming': 56
}

# add FIPS column to sightings_by_state, which is needed for the colored choropleth
sightings_by_state['id'] = sightings_by_state['state'].map(state_to_fips)

In [149]:
sightings_by_state['id'] = sightings_by_state['id'].astype(str)
#print(sightings_by_state.head())

choropleth = alt.Chart(us_states).mark_geoshape().encode(
    color=alt.Color('sightings:Q', scale=alt.Scale(scheme='oranges')),
    tooltip=['state:N', 'sightings:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(sightings_by_state, 'id', ["state", "sightings"])
).project(
    type='albersUsa'
).properties(
    width=500,
    height=400,
    title='Bigfoot Sightings by State'
)

# choropleth

In [150]:
# group by seasons, so we can interact with the chart by season
sightings_by_seasons = df_dna.groupby('season').size().reset_index(name='sightings')

selection = alt.selection_point(fields=['season'], bind='legend')

season_dots = alt.Chart(df_dna).mark_circle().encode(
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.value(10),
    color=alt.Color('season:N', scale=alt.Scale(
        domain=['Winter', 'Spring', 'Summer', 'Fall', 'Unknown'],
        range=['blue', 'green', 'yellow', 'orange', 'purple']
    )),
    opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.12))
).project(
    "albersUsa"
).add_params(
    selection
).properties(
    title='Bigfoot Sightings by Season'
)


by_season = states + season_dots
# by_season
