DATA-SPECIFIC INFORMATION FOR: 
eaglei_outages_2023.csv 1.2 GB

1. Number of variables: 5

2. Number of cases/rows: 26101052

3. Variable List: 
 - Fips_code: The FIPS code of the county in which the power outages occurred, for example “12011”
 - County: The county name in which the power outages occurred spelled out in text, for example “Broward”
 - State: The state in which the power outage occurred, spelled out in full in text format. For example, “Florida”
 - Sum: The total number of customers without power for that county at that timestamp. This number is always an integer. Entries with 0 customers without power were not included in this dataset. Note that the number of “customers” does not necessarily equate to the number of people affected, as a “customer” reported by a utility could be one meter, one building, etc. Outages are collected at various different levels of resolution (county, point, zip code, and polygon) and are aggregated to county for consistent reporting.
 - Run_start_time: Date and timestamp provided in GMT in the format “MM/DD/YY 00:00”. EAGLE-I collects power outage information from all covered utilities at 15-minute intervals, and this timestamp marks the beginning of the collection run.


In [14]:
import os, sys

import pandas as pd
import numpy as np


import plotly.express as px
import json


In [15]:
def get_fips(state_name):
    for abbr, fips, name in state_fips:
        if name.upper() == state_name.upper():
            return fips
    return None, None  # Return None if not found

# Dictionary mapping state abbreviations to FIPS codes
# List of tuples containing (state_abbreviation, fips_code, full_state_name)
state_fips = [
    ('AL', '01', 'Alabama'),
    ('AK', '02', 'Alaska'),
    ('AZ', '04', 'Arizona'),
    ('AR', '05', 'Arkansas'),
    ('CA', '06', 'California'),
    ('CO', '08', 'Colorado'),
    ('CT', '09', 'Connecticut'),
    ('DE', '10', 'Delaware'),
    ('FL', '12', 'Florida'),
    ('GA', '13', 'Georgia'),
    ('HI', '15', 'Hawaii'),
    ('ID', '16', 'Idaho'),
    ('IL', '17', 'Illinois'),
    ('IN', '18', 'Indiana'),
    ('IA', '19', 'Iowa'),
    ('KS', '20', 'Kansas'),
    ('KY', '21', 'Kentucky'),
    ('LA', '22', 'Louisiana'),
    ('ME', '23', 'Maine'),
    ('MD', '24', 'Maryland'),
    ('MA', '25', 'Massachusetts'),
    ('MI', '26', 'Michigan'),
    ('MN', '27', 'Minnesota'),
    ('MS', '28', 'Mississippi'),
    ('MO', '29', 'Missouri'),
    ('MT', '30', 'Montana'),
    ('NE', '31', 'Nebraska'),
    ('NV', '32', 'Nevada'),
    ('NH', '33', 'New Hampshire'),
    ('NJ', '34', 'New Jersey'),
    ('NM', '35', 'New Mexico'),
    ('NY', '36', 'New York'),
    ('NC', '37', 'North Carolina'),
    ('ND', '38', 'North Dakota'),
    ('OH', '39', 'Ohio'),
    ('OK', '40', 'Oklahoma'),
    ('OR', '41', 'Oregon'),
    ('PA', '42', 'Pennsylvania'),
    ('RI', '44', 'Rhode Island'),
    ('SC', '45', 'South Carolina'),
    ('SD', '46', 'South Dakota'),
    ('TN', '47', 'Tennessee'),
    ('TX', '48', 'Texas'),
    ('UT', '49', 'Utah'),
    ('VT', '50', 'Vermont'),
    ('VA', '51', 'Virginia'),
    ('WA', '53', 'Washington'),
    ('WV', '54', 'West Virginia'),
    ('WI', '55', 'Wisconsin'),
    ('WY', '56', 'Wyoming'),
]

In [2]:
directory = '/Users/ryanmc/Documents/Conferences/Jack_Eddy_Symposium_2022/dev/outage_data/EAGLE-I/'
file = 'outage_data_2023.csv'

data = pd.read_csv(os.path.join(directory,file))
data.head()

data['datetime'] = pd.to_datetime(data['run_start_time'], errors='coerce')  # Handle any invalid dates

# Define start and end dates
start_date = '2023-03-26'
end_date = '2023-04-12'

# Filter rows that fall within the date range
filtered_data = data[(data['datetime'] >= start_date) & (data['datetime'] <= end_date)]




Unnamed: 0,fips_code,county,state,sum,run_start_time
0,1003,Baldwin,Alabama,1,2023-01-01 00:00:00
1,1011,Bullock,Alabama,9,2023-01-01 00:00:00
2,1015,Calhoun,Alabama,4,2023-01-01 00:00:00
3,1021,Chilton,Alabama,4,2023-01-01 00:00:00
4,1029,Cleburne,Alabama,142,2023-01-01 00:00:00


In [12]:
pd.unique(filtered_data['state'])

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Puerto Rico', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'United States Virgin Islands', 'Utah', 'Vermont',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming',
       'Rhode Island'], dtype=object)

In [16]:

# Load the US County GeoJSON file
geojson_url = "https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json"
geojson = json.loads(pd.read_json(geojson_url).to_json())


In [35]:
for key, feature in geojson['features'].items():  # Iterate through dictionary keys and values
    if "properties" in feature:  # Ensure "properties" exists
        state_fp = feature["properties"].get("STATEFP", "")
        county_fp = feature["properties"].get("COUNTYFP", "")
        if state_fp and county_fp:
            full_fips = f"{state_fp}{county_fp}"
            feature["properties"]["fips"] = full_fips  # Add "fips" field

In [40]:
geojson['features']['0']


{'type': 'Feature',
 'properties': {'GEO_ID': '0500000US01001',
  'STATE': '01',
  'COUNTY': '001',
  'NAME': 'Autauga',
  'LSAD': 'County',
  'CENSUSAREA': 594.436},
 'geometry': {'type': 'Polygon',
  'coordinates': [[[-86.496774, 32.344437],
    [-86.717897, 32.402814],
    [-86.814912, 32.340803],
    [-86.890581, 32.502974],
    [-86.917595, 32.664169],
    [-86.71339, 32.661732],
    [-86.714219, 32.705694],
    [-86.413116, 32.707386],
    [-86.411172, 32.409937],
    [-86.496774, 32.344437]]]},
 'id': '01001'}

In [42]:
# Convert datetime to string for Plotly animation
filtered_data['datetime_string'] = filtered_data['datetime'].astype(str)

# Create the animated choropleth map
fig = px.choropleth(
    filtered_data,
    geojson=geojson,
    locations='fips_code',  # Column with FIPS codes
    featureidkey="properties.fips",  # Matches FIPS in GeoJSON
    color='sum',  # Color based on outage severity
    color_continuous_scale="OrRd",  # Color scheme
    range_color=[0, filtered_data["sum"].max()],
    scope="usa",  # Focus on the US
    animation_frame='datetime_string',  # Time slider
    title="Power Outages Over Time in US Counties"
)

fig.update_geos(fitbounds="locations", visible=False)  # Auto-fit to counties
fig.show()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['datetime_string'] = filtered_data['datetime'].astype(str)


KeyboardInterrupt: 