**Reseach Question:** How did anxiety and depression levels differ between states following the outbreak of COVID-19 in the United States?

# Data Cleaning

Import libraries and dataset

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

df = pd.read_csv("./Datasets/Indicators_of_Anxiety_or_Depression_Based_on_Reported_Frequency_of_Symptoms_During_Last_7_Days.csv")

df.head()

Filter to only state data and drop unnecessary columns;Group and Subgroup are redundant, Time period and CI are just combinations of other column's data.

In [None]:
state_data = df[df['Group']=='By State']

state_data.drop(columns = ['Group', 'Subgroup', 'Time Period Label', 'Confidence Interval'], inplace = True)

Seperate Quartile Range into 2 Columns:

In [None]:
state_data[['Quartile_Lower', 'Quartile_Upper']] = state_data['Quartile Range'].str.split(' - ', expand=True)
state_data.drop(columns='Quartile Range')

Clean up the Phase column:

In [None]:
state_data['Phase'].unique()
# There are 2 values that contain dates which are already stored in other columns, so we can remove these dates

state_data['Phase'] = state_data['Phase'].str.split(' ', expand = True).get(0)

Change Data Types as needed

In [None]:
state_data['Indicator'] = pd.Categorical(state_data['Indicator'], categories = ['Symptoms of Depressive Disorder', 'Symptoms of Anxiety Disorder', 'Symptoms of Anxiety Disorder or Depressive Disorder'])

state_data['Phase'] = pd.Categorical(state_data['Phase'], categories=['1', '2', '3', '3.1', '3.2', '3.3', '3.4', '3.5', '3.6', '3.7', '3.8', '3.9', '3.10'])

state_data['Time Period Start Date'] = pd.to_datetime(state_data['Time Period Start Date']).dt.date
state_data['Time Period End Date'] = pd.to_datetime(state_data['Time Period End Date']).dt.date

# EDA 

Looking at some graphs

In [None]:
# Histogram of values for all states
plt1 = sns.histplot(state_data, x='Value', hue = 'Indicator', alpha = 0.5)
plt.title('Histogram of Value by Indicator')
plt.show()

In [None]:
# Pair Plot
pair_plot = sns.pairplot(state_data[['Indicator','Value','Time Period']], hue = 'Indicator')
plt.show()

In [None]:
national_avgs = state_data.groupby(['Time Period Start Date', 'Indicator']).agg(
    nat_means = ('Value', 'mean')
)
nat_avg_plt = sns.lineplot(national_avgs,
                            x='Time Period Start Date',
                            y='nat_means',
                            hue = 'Indicator')
plt.xticks(rotation=45)
plt.title(f"Values Over Time")
plt.ylabel('Values')
plt.show()

In [None]:
state_code_map = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "Virgin Islands, U.S.": "VI",
}

indicators = state_data['Indicator'].unique()
color_scales = ['blues','amp','purp']

for i,j in zip(indicators,color_scales):
    fig_data = state_data[(state_data['Indicator'] == i)]

    fig_data['State_Code'] = fig_data['State'].map(state_code_map)

    max = fig_data['Value'].max()
    min = fig_data['Value'].min()

    fig = px.choropleth(
        fig_data,
        locations='State_Code',
        locationmode='USA-states',
        color='Value',
        scope='usa',
        title=f'Map of {i} in US states',
        hover_name='State',
        color_continuous_scale=j,
        animation_frame='Time Period Start Date',
        range_color=[min,max]
    )
    fig.show()