# Powering Through

**Name(s)**: Andrea González Martín & Andrés Riera Ortiz

**Website Link**: https://andiigonzalez.github.io/Power_Outage_Analysis/index.html

In [18]:
import pandas as pd
import numpy as np
from pathlib import Path

import plotly.express as px
pd.options.plotting.backend = 'plotly'
import matplotlib as plt

from dsc80_utils import * 

## Step 1: Introduction

In [111]:
# Open the csv file and drop columns that are not needed for the analysis
pd.set_option('display.max_columns', None) # display all columns of the dataframe 
outages = pd.read_csv('/Users/andigonzalez/Desktop/DSC80/Power_Outage_Analysis/outages.csv') # read the csv file
outages.drop(['POSTAL.CODE', 'HURRICANE.NAMES', 'NERC.REGION', 'OUTAGE.RESTORATION.DATE', 'OUTAGE.RESTORATION.TIME', 'RES.PRICE','COM.PRICE', 'IND.PRICE', 'TOTAL.PRICE','RES.PERCEN', 'COM.PERCEN', 'IND.PERCEN','COM.CUSTOMERS', 'IND.CUSTOMERS', 'TOTAL.CUSTOMERS', 'RES.CUSTOMERS', 'RES.CUST.PCT', 'COM.CUST.PCT', 'IND.CUST.PCT', 'PC.REALGSP.STATE', 'PC.REALGSP.USA', 'PC.REALGSP.REL', 'PC.REALGSP.CHANGE', 'UTIL.REALGSP', 'TOTAL.REALGSP', 'UTIL.CONTRI', 'PI.UTIL.OFUSA', 'POPPCT_UC', 'POPDEN_UC', 'AREAPCT_UC', 'PCT_LAND', 'PCT_WATER_TOT', 'PCT_WATER_INLAND', 'AREAPCT_URBAN','POPPCT_URBAN' ], axis=1, inplace=True)

outages


Unnamed: 0,OBS,YEAR,MONTH,U.S._STATE,CLIMATE.REGION,ANOMALY.LEVEL,CLIMATE.CATEGORY,OUTAGE.START.DATE,OUTAGE.START.TIME,CAUSE.CATEGORY,CAUSE.CATEGORY.DETAIL,OUTAGE.DURATION,DEMAND.LOSS.MW,CUSTOMERS.AFFECTED,RES.SALES,COM.SALES,IND.SALES,TOTAL.SALES,POPULATION,POPDEN_URBAN,POPDEN_RURAL
0,1,2011,7.0,Minnesota,East North Central,-0.3,normal,2011-07-01,17:00:00,severe weather,,3060.0,,70000.0,2.33e+06,2.11e+06,2.11e+06,6.56e+06,5348119,2279.0,18.2
1,2,2014,5.0,Minnesota,East North Central,-0.1,normal,2014-05-11,18:38:00,intentional attack,vandalism,1.0,,,1.59e+06,1.81e+06,1.89e+06,5.28e+06,5457125,2279.0,18.2
2,3,2010,10.0,Minnesota,East North Central,-1.5,cold,2010-10-26,20:00:00,severe weather,heavy wind,3000.0,,70000.0,1.47e+06,1.80e+06,1.95e+06,5.22e+06,5310903,2279.0,18.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1531,1532,2009,8.0,South Dakota,West North Central,0.5,warm,2009-08-29,22:54:00,islanding,,59.0,84.0,,3.38e+05,3.71e+05,2.15e+05,9.24e+05,807067,2038.3,4.7
1532,1533,2009,8.0,South Dakota,West North Central,0.5,warm,2009-08-29,11:00:00,islanding,,181.0,373.0,,3.38e+05,3.71e+05,2.15e+05,9.24e+05,807067,2038.3,4.7
1533,1534,2000,,Alaska,,,,,,equipment failure,failure,,35.0,14273.0,,,,,627963,1802.6,0.4


In [158]:
# Save the top rows of dataframe head an html file to include in the webpage
html_file = "/Users/andigonzalez/Desktop/DSC80/Power_Outage_Analysis/assets/images/outages_head.html"
outages.head(10).to_html(html_file, index=False)


## Step 2: Data Cleaning and Exploratory Data Analysis

In [162]:
import matplotlib.pyplot as plt
import plotly.express as px 
# Group data by year to produce a univariate analysis of the number of outages per year. Create a bar plot with the corresponding data.
outages_by_year = outages.groupby('YEAR').count()['OBS']
outages_by_year_df = outages_by_year.reset_index()
fig = px.bar(outages_by_year_df, x='YEAR', y='OBS', title='Number of Power Outages by Year', labels={'YEAR': 'Year', 'OBS': 'Number of Outages'}, color_discrete_sequence=['orange'])

# Customize layout
fig.update_layout(
    title={
        'text': 'Number of Power Outages by Year',
        'x': 0.5,  # Center the title
        'y': 0.98,  # Add padding above the plot
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(family='Serif', size=18, color='black')  # Custom font for title
    },
    xaxis_title_font=dict(size=14),  # Font size for x-axis title
    yaxis_title_font=dict(size=14),  # Font size for y-axis title
    xaxis=dict(tickmode='linear'),  # Ensure all years are displayed
    width=700,  # Make the plot wider
    height=300   # Adjust height if needed
)

# Show the plot
fig.show()
pio.write_html(fig, file='assets/images/outages_by_year.html', auto_open=True)


In [122]:
outages_by_state = outages.groupby('U.S._STATE').count()['OBS'].reset_index()


In [168]:
# Group data by state variable to produce a univariate analysis of the number of total outages per state

outages_by_state = outages.groupby('U.S._STATE').count()['OBS'].reset_index()
outages_by_state_plot = px.line(outages_by_state, x='U.S._STATE', y='OBS', title='Number of Power Outages by State from 2000 to 2016', labels={'U.S._STATE': 'State', 'OBS': 'Number of Outages'}, color_discrete_sequence=['skyblue'])

outages_by_state_plot.update_layout(
    title={
        'text': 'Number of Power Outages by State from 2000 to 2016',
        'x': 0.5,  # Center the title
        'y': 0.98,  # Add padding above the plot
        'xanchor': 'center',
        'yanchor': 'top',
        'font': dict(family='Serif', size=18, color='black')  # Custom font for title
    },
    xaxis_title_font=dict(size=14),  # Font size for x-axis title
    yaxis_title_font=dict(size=14),  # Font size for y-axis title
    xaxis=dict(tickmode='linear'),  # Ensure all years are displayed
    yaxis=dict(
        tickmode='array',  # Specify custom tick values
        tickvals=list(range(0, outages_by_state['OBS'].max() + 50, 50)),  # Ticks at 50 increments
        title='Number of Outages'
    ),
    width=800,  # Make the plot wider
    height=400   # Adjust height if needed
)

# Show the plot
outages_by_state_plot.show()
pio.write_html(outages_by_state_plot, file='assets/images/outages_by_state.html', auto_open=True)


In [154]:
import plotly.express as px
import pandas as pd
import plotly.graph_objs as go

# Create a dictionary mapping regions to their states
region_states = {
    'Northeast': ['CT', 'ME', 'MA', 'NH', 'RI', 'VT', 'NJ', 'NY', 'PA'],
    'Southeast': ['FL', 'GA', 'AL', 'NC', 'SC', 'VA'],
    'Central': ['IL', 'MO', 'IN', 'KY', 'WV', 'OH', 'TN'],
    'Southwest': ['AZ', 'CO', 'UT', 'NM'],
    'West': ['NV', 'CA'],
    'Northwest': ['OR', 'WA', 'ID'],
    'South': ['KS', 'TX', 'OK', 'LA', 'AR', 'MS'],
    'West North Central': ['ID', 'MT', 'NE', 'ND', 'SD', 'WY'],
    'East North Central': ['MN', 'IA', 'WI', 'MI']
}

# Outages data
outages_data = {
    'Region': ['Central', 'East North Central', 'Northeast', 'Northwest', 
               'South', 'Southeast', 'Southwest', 'West', 
               'West North Central'],
    'Outages': [200, 138, 350, 132, 229, 153, 92, 217, 17]
}

# Create DataFrame
flattened_data = [(region, state) for region, states in region_states.items() for state in states]
df_regions = pd.DataFrame(flattened_data, columns=['Region', 'State'])

# Convert outages_data to a DataFrame
df_outages = pd.DataFrame(outages_data)

# Merge the regions DataFrame with the outages data
merged_df = df_regions.merge(df_outages, on='Region', how='left')
region_centers = {
    'Northeast': {'lat': 42.0, 'lon': -72.0},
    'Southeast': {'lat': 33.0, 'lon': -84.0},
    'Central': {'lat': 39.0, 'lon': -88.0},
    'Southwest': {'lat': 36.0, 'lon': -108.0},
    'West': {'lat': 37.0, 'lon': -119.0},
    'Northwest': {'lat': 45.0, 'lon': -120.0},
    'South': {'lat': 33.0, 'lon': -97.0},
    'West North Central': {'lat': 45.0, 'lon': -105.0},
    'East North Central': {'lat': 43.0, 'lon': -89.0}
}

# Create a base choropleth map
fig = px.choropleth(
    merged_df,
    locations="State",                # The column with state abbreviations
    locationmode="USA-states",        # Map mode for US states
    color="Outages",                  # Color by the total outages in the region
    hover_name="Region",              # Display the region name on hover
    title="Regional Outages in the USA",
    scope="usa",                      # Limit map to USA
    color_continuous_scale=px.colors.sequential.Plasma
)

# Add annotations for region names


# Update layout to ensure sans-serif font for the plot text
fig.update_layout(
    font=dict(family="sans-serif")  # Set the font family for the whole plot to sans-serif
)

# Show the map
fig.show()
pio.write_html(fig, file='assets/images/outages_by_region_map.html', auto_open=True)

## Step 3: Assessment of Missingness

In [None]:
# TODO

## Step 4: Hypothesis Testing

In [None]:
# TODO

## Step 5: Framing a Prediction Problem

In [None]:
# TODO

## Step 6: Baseline Model

In [None]:
# TODO

## Step 7: Final Model

In [None]:
# TODO

## Step 8: Fairness Analysis

In [None]:
# TODO