In [125]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import mpld3
import ipywidgets as widgets
from IPython.core.display import HTML, display
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
import plotly.io as pio
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython display



In [147]:
# Load the data from the CSV file
df = pd.read_csv('data/cleaned/confirmed_covid_cases_cleaned.csv')

# Convert the 'date' column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Filter the data for the years 2020, 2021, 2022, and 2023
df_filtered = df[df['date'].dt.year.isin([2020, 2021, 2022, 2023])]


In [148]:
# Define a function to create an interactive bar chart for a given year
def create_interactive_bar_chart(selected_year):
    # Filter the data based on the selected year
    df_year = df_filtered[df_filtered['date'].dt.year == selected_year]
    
    # Group the data by state and sum the cases
    grouped_df = df_year.groupby('state')['cases'].sum().reset_index()
    
    # Sort the data by cases in descending order for visualization
    grouped_df = grouped_df.sort_values(by='cases', ascending=False)
    
    # Generate a unique color for each bar using the viridis colormap
    num_states = len(grouped_df)
    colors = plt.cm.viridis(np.linspace(0, 1, num_states))
    
    # Create the bar chart using matplotlib
    plt.figure(figsize=(10, 6))
    bars = plt.bar(grouped_df['state'], grouped_df['cases'], color=colors)
    plt.title(f'COVID-19 Cases by State in {selected_year}')
    plt.xlabel('State')
    plt.ylabel('Number of Cases')
    plt.xticks(rotation=90)
    
    # Convert the matplotlib plot to an interactive plot using mpld3
    interactive_plot = mpld3.fig_to_html(plt.gcf())
    
    # Display the interactive plot
    display(HTML(interactive_plot))
    
    # Save the interactive plot as an HTML file
    with open(f"interactive_bar_chart_{selected_year}.html", "w") as f:
        f.write(interactive_plot)

# Define the dropdown widget with years 2020, 2021, 2022, and 2023
year_dropdown = widgets.Dropdown(
    options=[2020, 2021, 2022, 2023],
    value=2020,
    description='Year:',
    disabled=False,
)

# Call the create_interactive_bar_chart function when a different year is selected from the dropdown
widgets.interactive(create_interactive_bar_chart, selected_year=year_dropdown)


interactive(children=(Dropdown(description='Year:', options=(2020, 2021, 2022, 2023), value=2020), Output()), …

In [57]:
# load in vaccine and state data
vaccines = pd.read_csv("data/reference/COVID-19_Vaccinations_in_the_United_States_Jurisdiction_20240104.csv")
states = pd.read_csv("data/cleaned/states_cleaned.csv")
      

In [58]:
indices_to_keep = [vaccines["date"], vaccines["state_abv"], vaccines["Distributed"], vaccines["Distributed_Janssen"], vaccines["Distributed_Moderna"], vaccines["Distributed_Pfizer"], vaccines["Distributed_Novavax"], vaccines["Distributed_Unk_Manuf"], vaccines["Administered"], vaccines["Administered_Janssen"], vaccines["Administered_Moderna"], vaccines["Administered_Pfizer"], vaccines["Administered_Novavax"], vaccines["Administered_Unk_Manuf"]]
vaccines_filtered = pd.DataFrame({
    "date" : vaccines["date"], 
    "state_abv" : vaccines["state_abv"], 
    "Administered" : vaccines["Administered"], 
    "Administered Janssen" : vaccines["Administered_Janssen"], 
    "Administered Moderna" : vaccines["Administered_Moderna"], 
    "Administered Pfizer" : vaccines["Administered_Pfizer"], 
    "Administered Novavax" : vaccines["Administered_Novavax"], 
    "Administered Unknown" : vaccines["Administered_Unk_Manuf"]
})


In [59]:
vaccines_filtered["date"] = pd.to_datetime(vaccines_filtered["date"])

In [60]:
# List of columns for which you want to compute differences
administered_columns = [
    "Administered",
    "Administered Janssen",
    "Administered Moderna",
    "Administered Pfizer",
    "Administered Novavax",
    "Administered Unknown"
]

# Sort the DataFrame by state_abv and date
vaccines_filtered = vaccines_filtered.sort_values(by=["state_abv", "date"])

# Compute the differences for each administered column
for col in administered_columns:
    vaccines_filtered[f"{col}_diff"] = vaccines_filtered.groupby("state_abv")[col].diff()

# Drop NaN values resulting from the difference computation
vaccines_filtered.dropna(subset=[f"{col}_diff" for col in administered_columns], inplace=True)

# Now, vaccines_filtered contains the differences in each administered column per state_abv and date
vaccines_filtered

Unnamed: 0,date,state_abv,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,Administered_diff,Administered Janssen_diff,Administered Moderna_diff,Administered Pfizer_diff,Administered Novavax_diff,Administered Unknown_diff
556,2022-08-03,AK,1193878.0,45975.0,460982.0,685618.0,2.0,1301.0,4014.0,26.0,1679.0,2303.0,0.0,6.0
557,2022-08-10,AK,1197172.0,45988.0,462252.0,687608.0,14.0,1310.0,3294.0,13.0,1270.0,1990.0,12.0,9.0
558,2022-08-17,AK,1197356.0,45989.0,462357.0,687681.0,15.0,1314.0,184.0,1.0,105.0,73.0,1.0,4.0
559,2022-08-24,AK,1202497.0,46016.0,464132.0,690980.0,31.0,1338.0,5141.0,27.0,1775.0,3299.0,16.0,24.0
560,2022-08-31,AK,1204769.0,46043.0,464913.0,692423.0,41.0,1349.0,2272.0,27.0,781.0,1443.0,10.0,11.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38483,2023-04-12,WY,852206.0,29201.0,347483.0,410487.0,197.0,1250.0,604.0,0.0,53.0,160.0,1.0,29.0
38484,2023-04-19,WY,852573.0,29202.0,347497.0,410593.0,199.0,1255.0,367.0,1.0,14.0,106.0,2.0,5.0
38485,2023-04-26,WY,852892.0,29202.0,347507.0,410622.0,202.0,1259.0,319.0,0.0,10.0,29.0,3.0,4.0
38486,2023-05-03,WY,852945.0,29202.0,347508.0,410626.0,202.0,1259.0,53.0,0.0,1.0,4.0,0.0,0.0


In [61]:
# Drop the original administered columns
administered_columns_to_drop = [
    "Administered",
    "Administered Janssen",
    "Administered Moderna",
    "Administered Pfizer",
    "Administered Novavax",
    "Administered Unknown"
]

vaccines_filtered.drop(columns=administered_columns_to_drop, inplace=True)

rename_mapping = {
    "Administered_diff": "Administered",
    "Administered Janssen_diff": "Administered Janssen",
    "Administered Moderna_diff": "Administered Moderna",
    "Administered Pfizer_diff": "Administered Pfizer",
    "Administered Novavax_diff": "Administered Novavax",
    "Administered Unknown_diff": "Administered Unknown"
}

vaccines_filtered.rename(columns=rename_mapping, inplace=True)

vaccines_filtered

Unnamed: 0,date,state_abv,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown
556,2022-08-03,AK,4014.0,26.0,1679.0,2303.0,0.0,6.0
557,2022-08-10,AK,3294.0,13.0,1270.0,1990.0,12.0,9.0
558,2022-08-17,AK,184.0,1.0,105.0,73.0,1.0,4.0
559,2022-08-24,AK,5141.0,27.0,1775.0,3299.0,16.0,24.0
560,2022-08-31,AK,2272.0,27.0,781.0,1443.0,10.0,11.0
...,...,...,...,...,...,...,...,...
38483,2023-04-12,WY,604.0,0.0,53.0,160.0,1.0,29.0
38484,2023-04-19,WY,367.0,1.0,14.0,106.0,2.0,5.0
38485,2023-04-26,WY,319.0,0.0,10.0,29.0,3.0,4.0
38486,2023-05-03,WY,53.0,0.0,1.0,4.0,0.0,0.0


In [103]:
vaccines_filtered.to_csv("data/cleaned/administered_covid vaccines_cleaned.csv")

In [104]:
states_filtered = pd.DataFrame({
    "state_abv" : states["state_abv"],
    "state" : states["state"],
    "Population 2020" : states["pop_2020"],
    "Population 2021" : states["pop_2021"],
    "Population 2022" : states["pop_2022"],
    "Population 2023" : states["pop_2023"]
})
states_filtered

Unnamed: 0,state_abv,state,Population 2020,Population 2021,Population 2022,Population 2023
0,AL,Alabama,5031864,5050380,5073903,5108468
1,AK,Alaska,732964,734923,733276,733406
2,AZ,Arizona,7186683,7272487,7365684,7431344
3,AR,Arkansas,3014348,3028443,3046404,3067732
4,CA,California,39503200,39145060,39040616,38965193
5,CO,Colorado,5785219,5811596,5841039,5877610
6,CT,Connecticut,3577586,3603691,3608706,3617176
7,DE,Delaware,991862,1004881,1019459,1031890
8,FL,Florida,21591299,21830708,22245521,22610726
9,GA,Georgia,10732390,10790385,10913150,11029227


In [105]:
combined_states_df = pd.merge(vaccines_filtered, states_filtered, on='state_abv', how='left')
combined_states_df

Unnamed: 0,date,state_abv,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,state,Population 2020,Population 2021,Population 2022,Population 2023
0,2022-08-03,AK,4014.0,26.0,1679.0,2303.0,0.0,6.0,Alaska,732964.0,734923.0,733276.0,733406.0
1,2022-08-10,AK,3294.0,13.0,1270.0,1990.0,12.0,9.0,Alaska,732964.0,734923.0,733276.0,733406.0
2,2022-08-17,AK,184.0,1.0,105.0,73.0,1.0,4.0,Alaska,732964.0,734923.0,733276.0,733406.0
3,2022-08-24,AK,5141.0,27.0,1775.0,3299.0,16.0,24.0,Alaska,732964.0,734923.0,733276.0,733406.0
4,2022-08-31,AK,2272.0,27.0,781.0,1443.0,10.0,11.0,Alaska,732964.0,734923.0,733276.0,733406.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2125,2023-04-12,WY,604.0,0.0,53.0,160.0,1.0,29.0,Wyoming,577664.0,579548.0,581629.0,584057.0
2126,2023-04-19,WY,367.0,1.0,14.0,106.0,2.0,5.0,Wyoming,577664.0,579548.0,581629.0,584057.0
2127,2023-04-26,WY,319.0,0.0,10.0,29.0,3.0,4.0,Wyoming,577664.0,579548.0,581629.0,584057.0
2128,2023-05-03,WY,53.0,0.0,1.0,4.0,0.0,0.0,Wyoming,577664.0,579548.0,581629.0,584057.0


In [106]:
# List of population columns
population_columns = ['Population 2020', 'Population 2021', 'Population 2022', 'Population 2023']

# List of administered columns
administered_columns = ['Administered', 'Administered Janssen', 'Administered Moderna', 'Administered Pfizer', 'Administered Novavax', 'Administered Unknown']

# Create percentage columns
for pop_col in population_columns:
    for admin_col in administered_columns:
        # Create a new percentage column name
        percentage_col_name = f"{admin_col} % of {pop_col.split()[-1]}"
        
        # Calculate the percentage
        combined_states_df[percentage_col_name] = (combined_states_df[admin_col] / combined_states_df[pop_col]) * 100

# Display the DataFrame with the new percentage columns
combined_states_df

Unnamed: 0,date,state_abv,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,state,Population 2020,...,Administered Moderna % of 2022,Administered Pfizer % of 2022,Administered Novavax % of 2022,Administered Unknown % of 2022,Administered % of 2023,Administered Janssen % of 2023,Administered Moderna % of 2023,Administered Pfizer % of 2023,Administered Novavax % of 2023,Administered Unknown % of 2023
0,2022-08-03,AK,4014.0,26.0,1679.0,2303.0,0.0,6.0,Alaska,732964.0,...,0.228972,0.314070,0.000000,0.000818,0.547309,0.003545,0.228932,0.314014,0.000000,0.000818
1,2022-08-10,AK,3294.0,13.0,1270.0,1990.0,12.0,9.0,Alaska,732964.0,...,0.173195,0.271385,0.001636,0.001227,0.449137,0.001773,0.173165,0.271337,0.001636,0.001227
2,2022-08-17,AK,184.0,1.0,105.0,73.0,1.0,4.0,Alaska,732964.0,...,0.014319,0.009955,0.000136,0.000545,0.025088,0.000136,0.014317,0.009954,0.000136,0.000545
3,2022-08-24,AK,5141.0,27.0,1775.0,3299.0,16.0,24.0,Alaska,732964.0,...,0.242064,0.449899,0.002182,0.003273,0.700976,0.003681,0.242021,0.449819,0.002182,0.003272
4,2022-08-31,AK,2272.0,27.0,781.0,1443.0,10.0,11.0,Alaska,732964.0,...,0.106508,0.196788,0.001364,0.001500,0.309787,0.003681,0.106489,0.196753,0.001364,0.001500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2125,2023-04-12,WY,604.0,0.0,53.0,160.0,1.0,29.0,Wyoming,577664.0,...,0.009112,0.027509,0.000172,0.004986,0.103415,0.000000,0.009074,0.027395,0.000171,0.004965
2126,2023-04-19,WY,367.0,1.0,14.0,106.0,2.0,5.0,Wyoming,577664.0,...,0.002407,0.018225,0.000344,0.000860,0.062836,0.000171,0.002397,0.018149,0.000342,0.000856
2127,2023-04-26,WY,319.0,0.0,10.0,29.0,3.0,4.0,Wyoming,577664.0,...,0.001719,0.004986,0.000516,0.000688,0.054618,0.000000,0.001712,0.004965,0.000514,0.000685
2128,2023-05-03,WY,53.0,0.0,1.0,4.0,0.0,0.0,Wyoming,577664.0,...,0.000172,0.000688,0.000000,0.000000,0.009074,0.000000,0.000171,0.000685,0.000000,0.000000


In [107]:
combined_states_df["year"] = combined_states_df["date"].dt.year
combined_states_df

Unnamed: 0,date,state_abv,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,state,Population 2020,...,Administered Pfizer % of 2022,Administered Novavax % of 2022,Administered Unknown % of 2022,Administered % of 2023,Administered Janssen % of 2023,Administered Moderna % of 2023,Administered Pfizer % of 2023,Administered Novavax % of 2023,Administered Unknown % of 2023,year
0,2022-08-03,AK,4014.0,26.0,1679.0,2303.0,0.0,6.0,Alaska,732964.0,...,0.314070,0.000000,0.000818,0.547309,0.003545,0.228932,0.314014,0.000000,0.000818,2022
1,2022-08-10,AK,3294.0,13.0,1270.0,1990.0,12.0,9.0,Alaska,732964.0,...,0.271385,0.001636,0.001227,0.449137,0.001773,0.173165,0.271337,0.001636,0.001227,2022
2,2022-08-17,AK,184.0,1.0,105.0,73.0,1.0,4.0,Alaska,732964.0,...,0.009955,0.000136,0.000545,0.025088,0.000136,0.014317,0.009954,0.000136,0.000545,2022
3,2022-08-24,AK,5141.0,27.0,1775.0,3299.0,16.0,24.0,Alaska,732964.0,...,0.449899,0.002182,0.003273,0.700976,0.003681,0.242021,0.449819,0.002182,0.003272,2022
4,2022-08-31,AK,2272.0,27.0,781.0,1443.0,10.0,11.0,Alaska,732964.0,...,0.196788,0.001364,0.001500,0.309787,0.003681,0.106489,0.196753,0.001364,0.001500,2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2125,2023-04-12,WY,604.0,0.0,53.0,160.0,1.0,29.0,Wyoming,577664.0,...,0.027509,0.000172,0.004986,0.103415,0.000000,0.009074,0.027395,0.000171,0.004965,2023
2126,2023-04-19,WY,367.0,1.0,14.0,106.0,2.0,5.0,Wyoming,577664.0,...,0.018225,0.000344,0.000860,0.062836,0.000171,0.002397,0.018149,0.000342,0.000856,2023
2127,2023-04-26,WY,319.0,0.0,10.0,29.0,3.0,4.0,Wyoming,577664.0,...,0.004986,0.000516,0.000688,0.054618,0.000000,0.001712,0.004965,0.000514,0.000685,2023
2128,2023-05-03,WY,53.0,0.0,1.0,4.0,0.0,0.0,Wyoming,577664.0,...,0.000688,0.000000,0.000000,0.009074,0.000000,0.000171,0.000685,0.000000,0.000000,2023


In [108]:
print(combined_states_df.dtypes)


date                              datetime64[ns]
state_abv                                 object
Administered                             float64
Administered Janssen                     float64
Administered Moderna                     float64
Administered Pfizer                      float64
Administered Novavax                     float64
Administered Unknown                     float64
state                                     object
Population 2020                          float64
Population 2021                          float64
Population 2022                          float64
Population 2023                          float64
Administered % of 2020                   float64
Administered Janssen % of 2020           float64
Administered Moderna % of 2020           float64
Administered Pfizer % of 2020            float64
Administered Novavax % of 2020           float64
Administered Unknown % of 2020           float64
Administered % of 2021                   float64
Administered Janssen

In [109]:
numerical_columns = combined_states_df.select_dtypes(include=['int64', 'float64']).columns

# Group by state_abv and year and sum up the selected columns
grouped_df = combined_states_df.groupby(['state_abv', 'year'])[numerical_columns].sum().reset_index()

# Display the grouped DataFrame
grouped_df


Unnamed: 0,state_abv,year,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,Population 2020,Population 2021,...,Administered Moderna % of 2022,Administered Pfizer % of 2022,Administered Novavax % of 2022,Administered Unknown % of 2022,Administered % of 2023,Administered Janssen % of 2023,Administered Moderna % of 2023,Administered Pfizer % of 2023,Administered Novavax % of 2023,Administered Unknown % of 2023
0,AK,2022,114091.0,328.0,40201.0,73192.0,246.0,124.0,16125208.0,16168306.0,...,5.482383,9.981508,0.033548,0.016910,15.556322,0.044723,5.481411,9.979738,0.033542,0.016907
1,AK,2023,24266.0,87.0,-29409.0,-49925.0,93.0,160.0,13926316.0,13963537.0,...,-4.010632,-6.808487,0.012683,0.021820,3.308672,0.011862,-4.009921,-6.807280,0.012681,0.021816
2,AL,2022,440804.0,1494.0,170339.0,268191.0,601.0,179.0,110701008.0,111108360.0,...,3.357159,5.285694,0.011845,0.003528,8.628888,0.029246,3.334444,5.249930,0.011765,0.003504
3,AL,2023,88457.0,233.0,-120221.0,-174464.0,196.0,95.0,95605416.0,95957220.0,...,-2.369399,-3.438458,0.003863,0.001872,1.731576,0.004561,-2.353367,-3.415192,0.003837,0.001860
4,AR,2022,407704.0,1205.0,164683.0,238074.0,3159.0,583.0,66315656.0,66625746.0,...,5.405816,7.814919,0.103696,0.019137,13.290079,0.039280,5.368233,7.760587,0.102975,0.019004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,WI,2023,256191.0,620.0,-383674.0,-714683.0,372.0,914.0,112037300.0,111719582.0,...,-6.513389,-12.132718,0.006315,0.015516,4.334173,0.010489,-6.490897,-12.090821,0.006293,0.015463
100,WV,2022,235065.0,497.0,98518.0,135588.0,209.0,253.0,39414364.0,39275478.0,...,5.553329,7.642916,0.011781,0.014261,13.279976,0.028078,5.565765,7.660032,0.011807,0.014293
101,WV,2023,35061.0,177.0,-76608.0,-100521.0,66.0,40.0,34039678.0,33919731.0,...,-4.318291,-5.666235,0.003720,0.002255,1.980768,0.010000,-4.327962,-5.678925,0.003729,0.002260
102,WY,2022,73934.0,233.0,28102.0,45087.0,166.0,346.0,12708608.0,12750056.0,...,4.831602,7.751849,0.028541,0.059488,12.658696,0.039893,4.811517,7.719623,0.028422,0.059241


In [110]:
# List of population columns
population_columns = ['Population 2020', 'Population 2021', 'Population 2022', 'Population 2023']

# List of administered columns
administered_columns = ['Administered', 'Administered Janssen', 'Administered Moderna', 'Administered Pfizer', 'Administered Novavax', 'Administered Unknown']

# Update percentages using the grouped DataFrame
for year_col in population_columns:
    year = year_col.split()[-1]  # Extract the year from the column name
    for admin_col in administered_columns:
        # Create a new percentage column name
        percentage_col_name = f"{admin_col} % of {year}"
        
        # Calculate the percentage using the grouped DataFrame
        grouped_df[percentage_col_name] = (grouped_df[admin_col] / grouped_df[year_col]) * 100

# Display the DataFrame with the updated percentage columns
grouped_df


Unnamed: 0,state_abv,year,Administered,Administered Janssen,Administered Moderna,Administered Pfizer,Administered Novavax,Administered Unknown,Population 2020,Population 2021,...,Administered Moderna % of 2022,Administered Pfizer % of 2022,Administered Novavax % of 2022,Administered Unknown % of 2022,Administered % of 2023,Administered Janssen % of 2023,Administered Moderna % of 2023,Administered Pfizer % of 2023,Administered Novavax % of 2023,Administered Unknown % of 2023
0,AK,2022,114091.0,328.0,40201.0,73192.0,246.0,124.0,16125208.0,16168306.0,...,0.249199,0.453705,0.001525,0.000769,0.707106,0.002033,0.249155,0.453624,0.001525,0.000769
1,AK,2023,24266.0,87.0,-29409.0,-49925.0,93.0,160.0,13926316.0,13963537.0,...,-0.211086,-0.358341,0.000668,0.001148,0.174141,0.000624,-0.211048,-0.358278,0.000667,0.001148
2,AL,2022,440804.0,1494.0,170339.0,268191.0,601.0,179.0,110701008.0,111108360.0,...,0.152598,0.240259,0.000538,0.000160,0.392222,0.001329,0.151566,0.238633,0.000535,0.000159
3,AL,2023,88457.0,233.0,-120221.0,-174464.0,196.0,95.0,95605416.0,95957220.0,...,-0.124705,-0.180971,0.000203,0.000099,0.091136,0.000240,-0.123861,-0.179747,0.000202,0.000098
4,AR,2022,407704.0,1205.0,164683.0,238074.0,3159.0,583.0,66315656.0,66625746.0,...,0.245719,0.355224,0.004713,0.000870,0.604094,0.001785,0.244011,0.352754,0.004681,0.000864
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,WI,2023,256191.0,620.0,-383674.0,-714683.0,372.0,914.0,112037300.0,111719582.0,...,-0.342810,-0.638564,0.000332,0.000817,0.228114,0.000552,-0.341626,-0.636359,0.000331,0.000814
100,WV,2022,235065.0,497.0,98518.0,135588.0,209.0,253.0,39414364.0,39275478.0,...,0.252424,0.347405,0.000536,0.000648,0.603635,0.001276,0.252989,0.348183,0.000537,0.000650
101,WV,2023,35061.0,177.0,-76608.0,-100521.0,66.0,40.0,34039678.0,33919731.0,...,-0.227278,-0.298223,0.000196,0.000119,0.104251,0.000526,-0.227787,-0.298891,0.000196,0.000119
102,WY,2022,73934.0,233.0,28102.0,45087.0,166.0,346.0,12708608.0,12750056.0,...,0.219618,0.352357,0.001297,0.002704,0.575395,0.001813,0.218705,0.350892,0.001292,0.002693


In [111]:
grouped_df.to_csv("data/cleaned/administered_2022_2023_cleaned.csv")

In [146]:
# Sample data
data = {
    'state_abv': grouped_df["state_abv"],
    'year': grouped_df["year"],
    'Administered Janssen': grouped_df["Administered Janssen"],
    'Administered Moderna': grouped_df["Administered Moderna"],
    'Administered Pfizer': grouped_df["Administered Pfizer"],
    'Administered Novavax' : grouped_df["Administered Novavax"],
    'Administered Unknown' : grouped_df["Administered Unknown"]
    # ... (other columns can be added similarly)
}

vax_df = pd.DataFrame(data)

# Filter data for the year 2022
df_2022 = vax_df[vax_df['year'] == 2022]

# Vaccine columns
vaccine_columns = ['Administered Janssen', 'Administered Moderna', 'Administered Pfizer', 'Administered Novavax', 'Administered Unknown']

# Create pie charts for each state for the year 2022
for state in df_2022['state_abv'].unique():
    state_data = df_2022[df_2022['state_abv'] == state]
    state_data = state_data[vaccine_columns].iloc[0]  # Selecting only the row for that state
    
    # Create pie chart
    fig = px.pie(
        names=state_data.index,
        values=state_data.values,
        title=f'Vaccine Distribution in {state} for 2022'
    )
    fig.show()
fig.to_html("vaccinations_distribution_by_state.html")