In [None]:
# Import Dependencies
import pandas as pd
import datetime
import numpy as np
import matplotlib.pyplot as plt
# import scipy.stats as st

In [None]:
# Import data

csv_path = "Resources/Indicators_of_Anxiety_or_Depression_Based_on_Reported_Frequency_of_Symptoms_During_Last_7_Days.csv"
data_file = pd.read_csv(csv_path)

In [None]:
# Create data frame from the whole data
all_data_df = pd.DataFrame(data_file)
all_data_df.head()

In [None]:
# Sort the data frame to only have data related to: Symptoms of Anxiety Disorder or Depressive Disorder
IAD_df = all_data_df.loc[all_data_df["Indicator"]=="Symptoms of Anxiety Disorder or Depressive Disorder"]
IAD_df.head()

In [None]:
# Clean data by removing rows with NaN data for the Value column
    # Weeks Jul 22 - Aug 18 and Dec 22 - Jan 5 do have data.
    # Will remove these rows by removings rows where the Phase column equals -1
IAD_df2 = IAD_df[IAD_df["Phase"]!= -1]
# Check that rows where Phase equals -1 has been removed.
IAD_df2["Phase"].unique()

In [None]:
# Remove unwanted columns:
    # "Confidence Interval", "Quartile range"
IAD_df3 = IAD_df2.drop(["Confidence Interval", "Quartile range"], axis=1)
IAD_df3

In [None]:
# IAD_df4.dtypes

# Split the Time Period and convert to a date

# Copy the data frame to retain integrity
IAD_df4 = IAD_df3.copy()

# Split the Time Period Label
IAD_df4a = IAD_df4.astype({'Time Period Label': 'str'})
IAD_df4a[["Start Period", "End Period"]] = IAD_df4["Time Period Label"].str.split("-",n=1, expand=True)
IAD_df4a.head()

In [None]:
# Add year to the Start Period and End Period
# Copy the data frame for integrity
IAD_df4b = IAD_df4a.copy()
# Create a function to add the year
    # provide function some text of type string
def add_year(text: str):
    # Looks for month to either add 2020 or 2021
    year = "2021" if text.startswith("Jan") else "2020"
    # Adds the year to the text passed
    return f"{text.strip()}, {year}"

# Use the definition add_year
IAD_df4b["Start Period"] = IAD_df4b["Start Period"].apply(add_year)
IAD_df4b["End Period"] = IAD_df4b["End Period"].apply(add_year)
IAD_df4b.head(3)

In [None]:
# Convert columns "Start Period" and "End Period" from string to date
# Copy data frame fro integrity
IAD_df4c=IAD_df4b.copy()
# Create function to convert string to date
    # provide function a text of type string
def convert_to_date(text: str):
    # dateime.strptime converts the string to a datetime object
    # Data needs to be modifed to support the format it is looking for to change the string to a date object         
    text = text.replace("June", "Jun").replace("July", "Jul")
              # module.class.fuction
    date = datetime.datetime.strptime(text, "%b %d, %Y")
    return date

# Use the definition to convert the values
IAD_df4c["Start Period"] = IAD_df4c["Start Period"].apply(convert_to_date)
IAD_df4c["End Period"] = IAD_df4c["End Period"].apply(convert_to_date)
IAD_df4c.head()

In [None]:
# Clean modified data frame by removing the following columns
IAD_clean_df = IAD_df4c.drop(["Time Period", "Time Period Label", "Low CI", "High CI"], axis=1)
# Reset the index
IAD_clean_df = IAD_clean_df.reset_index(drop=True)
IAD_clean_df

In [None]:
# Create subgroup data frame to hold the information grouped by age.
IAD_age_df = IAD_clean_df.loc[IAD_clean_df["Group"]=="By Age"]
IAD_age_df

In [None]:
# Create subgroup data frame to hold the information grouped by state.
IAD_state_df=IAD_clean_df.loc[IAD_clean_df["Group"]=="By State"]
IAD_state_df

In [None]:
# Create subgroup data frame to hold the information for the National Estimate numbers
IAD_national_df=IAD_clean_df.loc[IAD_clean_df["Group"]=="National Estimate"]
IAD_national_df

In [None]:
# Create subgroup data frame to hold the information based on gender
IAD_gender_df=IAD_clean_df.loc[IAD_clean_df["Group"]=="By Gender"]
IAD_gender_df

In [None]:
# To Do: Plots:
    # Age groups
    # Use IAD_age_df to do line graphs for all age groups
        # This will show is our age group has suffered the most impact to their mental health
    
    # State grpups
    # Use IAD_state_df to look at impact of mental health in NY and CA, super impose National Average(different data frame)
        # Take a look to see if in this data frame we have the metropolitan areas (bonus work - i think)
    
    # National group
    # Use IAD_national_df to look at the national average
        # Used along the states and any other graphs we want to add it to
    
    # Gender groups
    # Use IAD_gender_df to look at the mental health impact by gender