In [12]:
import pandas as pd
import altair as alt

alt.data_transformers.disable_max_rows()

state_to_fips = {
    "Alabama": 1, "Alaska": 2, "Arizona": 4, "Arkansas": 5,
    "California": 6, "Colorado": 8, "Connecticut": 9, "Delaware": 10,
    "Florida": 12, "Georgia": 13, "Hawaii": 15, "Idaho": 16,
    "Illinois": 17, "Indiana": 18, "Iowa": 19, "Kansas": 20,
    "Kentucky": 21, "Louisiana": 22, "Maine": 23, "Maryland": 24,
    "Massachusetts": 25, "Michigan": 26, "Minnesota": 27, "Mississippi": 28,
    "Missouri": 29, "Montana": 30, "Nebraska": 31, "Nevada": 32,
    "New Hampshire": 33, "New Jersey": 34, "New Mexico": 35, "New York": 36,
    "North Carolina": 37, "North Dakota": 38, "Ohio": 39, "Oklahoma": 40,
    "Oregon": 41, "Pennsylvania": 42, "Rhode Island": 44, "South Carolina": 45,
    "South Dakota": 46, "Tennessee": 47, "Texas": 48, "Utah": 49,
    "Vermont": 50, "Virginia": 51, "Washington": 53, "West Virginia": 54,
    "Wisconsin": 55, "Wyoming": 56
}

state_to_code = {
    "Alabama": "AL", "Alaska": "AK", "Arizona": "AZ", "Arkansas": "AR",
    "California": "CA", "Colorado": "CO", "Connecticut": "CT", "Delaware": "DE",
    "Florida": "FL", "Georgia": "GA", "Hawaii": "HI", "Idaho": "ID",
    "Illinois": "IL", "Indiana": "IN", "Iowa": "IA", "Kansas": "KS",
    "Kentucky": "KY", "Louisiana": "LA", "Maine": "ME", "Maryland": "MD",
    "Massachusetts": "MA", "Michigan": "MI", "Minnesota": "MN", "Mississippi": "MS",
    "Missouri": "MO", "Montana": "MT", "Nebraska": "NE", "Nevada": "NV",
    "New Hampshire": "NH", "New Jersey": "NJ", "New Mexico": "NM", "New York": "NY",
    "North Carolina": "NC", "North Dakota": "ND", "Ohio": "OH", "Oklahoma": "OK",
    "Oregon": "OR", "Pennsylvania": "PA", "Rhode Island": "RI", "South Carolina": "SC",
    "South Dakota": "SD", "Tennessee": "TN", "Texas": "TX", "Utah": "UT",
    "Vermont": "VT", "Virginia": "VA", "Washington": "WA", "West Virginia": "WV",
    "Wisconsin": "WI", "Wyoming": "WY"
}

# Load data
url = "https://raw.githubusercontent.com/UIUC-iSchool-DataViz/is445_data/main/bfro_reports_fall2022.csv"
df = pd.read_csv(url)
print("Data loaded successfully. Head:")
print(df.head())

# Data transformation for choropleth
state_counts = (
    df.groupby('state', as_index=False)
      .size()
      .rename(columns={'size': 'count'})
)
state_counts['state_id'] = state_counts['state'].map(state_to_fips)
state_counts.to_csv("bfro_reports_processed.csv", index=False)
print("Processed state counts data saved to bfro_reports_processed.csv")

# Data transformation for line chart
df["state_code"] = df["state"].map(state_to_code)
df['year'] = pd.to_datetime(df['date'], errors='coerce').dt.year
yearly_counts = df.groupby(['state', 'year']).size().reset_index(name='count')
yearly_counts.to_csv("yearly_counts.csv", index=False)
print("Yearly counts data saved to yearly_counts.csv")

# Generate choropleth visualization
choropleth = alt.Chart(
    alt.topo_feature('https://cdn.jsdelivr.net/npm/vega-datasets@v1.29.0/data/us-10m.json', 'states')
).transform_lookup(
    lookup='id',
    from_=alt.LookupData('bfro_reports_processed.csv', 'state_id', ['count', 'state'])
).mark_geoshape().encode(
    color=alt.Color('count:Q', title='Number of Reports', scale=alt.Scale(scheme='blues')),
    tooltip=['state:N', 'count:Q']
).properties(
    width=800,
    height=400,
    title='Bigfoot Sightings by State (2000-2022)'
).project('albersUsa')

# Generate interactive line chart visualization
line = alt.Chart(yearly_counts).mark_line().encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('count:Q', title='Number of Reports'),
    color=alt.value('steelblue')
).properties(
    width=800,
    height=400,
    title='Bigfoot Sightings Over Time by State'
).interactive()

# Add state selector without a default value to avoid conflicts
state_select = alt.selection_single(
    name='Select',
    fields=['state'],
    bind=alt.binding_select(options=sorted(yearly_counts['state'].unique()), name='State:')
)
line = line.add_selection(state_select).transform_filter(state_select)

# Save visualizations to JSON files
choropleth.save('choropleth.json')
line.save('line_chart.json')
print("Charts saved as choropleth.json and line_chart.json")

Data loaded successfully. Head:
                                            observed  \
0  Ed L. was salmon fishing with a companion in P...   
1  heh i kinda feel a little dumb that im reporti...   
2  I was on my way to Claremont from Lebanon on R...   
3  I was northeast of Macy Nebraska along the Mis...   
4  While this incident occurred a long time ago, ...   

                                    location_details  \
0                  East side of Prince William Sound   
1  the road is off us rt 80, i dont know the exit...   
2  Close to Claremont down 120 not far from Kings...   
3      Latitude & Longitude :  42.158230  -96.344197   
4  Ward County, Just outside of a the Minuteman T...   

                           county          state  season  \
0  Valdez-Chitina-Whittier County         Alaska    Fall   
1                   Warren County     New Jersey    Fall   
2                 Sullivan County  New Hampshire  Summer   
3                 Thurston County       Nebraska  Spri

Deprecated since `altair=5.0.0`. Use selection_point instead.
  state_select = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  line = line.add_selection(state_select).transform_filter(state_select)
