## Mental Health Effects of COVID

In [1]:
import pandas as pd
import os
from sodapy import Socrata

# cvs_output_file_path =  os.path.join("cvs_output.csv")

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.cdc.gov", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.cdc.gov,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("8pt5-q6wp", limit=2000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)
results_df



Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
0,Symptoms of Depressive Disorder,National Estimate,United States,United States,1,Apr 23 - May 5,23.5,22.7,24.3,22.7 - 24.3,
1,Symptoms of Depressive Disorder,By Age,United States,18 - 29 years,1,Apr 23 - May 5,32.7,30.2,35.2,30.2 - 35.2,
2,Symptoms of Depressive Disorder,By Age,United States,30 - 39 years,1,Apr 23 - May 5,25.7,24.1,27.3,24.1 - 27.3,
3,Symptoms of Depressive Disorder,By Age,United States,40 - 49 years,1,Apr 23 - May 5,24.8,23.3,26.2,23.3 - 26.2,
4,Symptoms of Depressive Disorder,By Age,United States,50 - 59 years,1,Apr 23 - May 5,23.2,21.5,25,21.5 - 25.0,
...,...,...,...,...,...,...,...,...,...,...,...
1995,Symptoms of Anxiety Disorder,By State,Kansas,Kansas,10,July 2 - July 7,32.3,28.2,36.6,28.2 - 36.6,31.4-32.9
1996,Symptoms of Anxiety Disorder,By State,Kentucky,Kentucky,10,July 2 - July 7,36.3,31.2,41.6,31.2 - 41.6,35.4-40.6
1997,Symptoms of Anxiety Disorder,By State,Louisiana,Louisiana,10,July 2 - July 7,38.6,34.2,43.1,34.2 - 43.1,35.4-40.6
1998,Symptoms of Anxiety Disorder,By State,Maine,Maine,10,July 2 - July 7,32.8,29,36.7,29.0 - 36.7,31.4-32.9


## Age DataFrames

In [2]:
us_by_age_df = results_df.loc[(results_df["state"] == "United States") &
                              (results_df["group"] == "By Age")]
    
us_by_age_trim_df = us_by_age_df[['indicator', 'subgroup', 'week', 'value']].copy()
us_by_age_trim_df['value'] = us_by_age_df['value'].astype(float)
us_by_age_trim_df['week'] = us_by_age_df['week'].astype(int)
pd.set_option('display.max_rows', 500)
us_by_age_trim_df

Unnamed: 0,indicator,subgroup,week,value
1,Symptoms of Depressive Disorder,18 - 29 years,1,32.7
2,Symptoms of Depressive Disorder,30 - 39 years,1,25.7
3,Symptoms of Depressive Disorder,40 - 49 years,1,24.8
4,Symptoms of Depressive Disorder,50 - 59 years,1,23.2
5,Symptoms of Depressive Disorder,60 - 69 years,1,18.4
6,Symptoms of Depressive Disorder,70 - 79 years,1,13.6
7,Symptoms of Depressive Disorder,80 years and above,1,14.4
71,Symptoms of Anxiety Disorder,18 - 29 years,1,40.2
72,Symptoms of Anxiety Disorder,30 - 39 years,1,34.4
73,Symptoms of Anxiety Disorder,40 - 49 years,1,34.1


In [3]:
us_by_age_group = us_by_age_trim_df.groupby(["indicator", "week"])
us_by_age_mean_df = pd.DataFrame(us_by_age_group["value"].mean())
us_by_age_mean_df.reset_index(inplace = True) 
us_by_age_mean_df

Unnamed: 0,indicator,week,value
0,Symptoms of Anxiety Disorder,1,27.942857
1,Symptoms of Anxiety Disorder,2,26.985714
2,Symptoms of Anxiety Disorder,3,25.342857
3,Symptoms of Anxiety Disorder,4,27.042857
4,Symptoms of Anxiety Disorder,5,27.7
5,Symptoms of Anxiety Disorder,6,28.085714
6,Symptoms of Anxiety Disorder,7,28.771429
7,Symptoms of Anxiety Disorder,8,27.785714
8,Symptoms of Anxiety Disorder,9,30.057143
9,Symptoms of Anxiety Disorder,10,31.242857


## States DataFrames

In [4]:
# subgroup filtered by United States
usa = results_df[results_df["subgroup"].isin(["United States"])]
usa.head()

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
0,Symptoms of Depressive Disorder,National Estimate,United States,United States,1,Apr 23 - May 5,23.5,22.7,24.3,22.7 - 24.3,
70,Symptoms of Anxiety Disorder,National Estimate,United States,United States,1,Apr 23 - May 5,30.8,30.0,31.7,30.0 - 31.7,
140,Symptoms of Anxiety Disorder or Depressive Dis...,National Estimate,United States,United States,1,Apr 23 - May 5,35.9,35.0,36.8,35.0 - 36.8,
210,Symptoms of Depressive Disorder,National Estimate,United States,United States,2,May 7 - May 12,24.1,23.0,25.2,23.0 - 25.2,
280,Symptoms of Anxiety Disorder,National Estimate,United States,United States,2,May 7 - May 12,30.0,28.9,31.0,28.9 - 31.0,


In [5]:
# Checking the number of data for United States.
usa_total = len(usa["state"])
usa_total

29

In [6]:
# subgroup filtered by Kansas and Missouri
ks_mo = results_df[results_df["subgroup"].isin(["Kansas", "Missouri"])]
ks_mo.head()

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
35,Symptoms of Depressive Disorder,By State,Kansas,Kansas,1,Apr 23 - May 5,22.1,18.3,26.4,18.3 - 26.4,20.8 - 22.1
44,Symptoms of Depressive Disorder,By State,Missouri,Missouri,1,Apr 23 - May 5,21.0,17.9,24.5,17.9 - 24.5,20.8 - 22.1
105,Symptoms of Anxiety Disorder,By State,Kansas,Kansas,1,Apr 23 - May 5,26.4,22.0,31.2,22.0 - 31.2,21.7 - 27.8
114,Symptoms of Anxiety Disorder,By State,Missouri,Missouri,1,Apr 23 - May 5,30.4,25.8,35.4,25.8 - 35.4,30.4 - 31.6
175,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Kansas,Kansas,1,Apr 23 - May 5,32.5,28.4,36.9,28.4 - 36.9,25.6 - 33.3


In [7]:
# Checking the number of data for Kansas and Missouri.
ks_mo_total = len(ks_mo["state"])
ks_mo_total

57

In [8]:
# subgroup filtered by only Kansas 
ks = results_df[results_df["subgroup"].isin(["Kansas"])]
ks.head()

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
35,Symptoms of Depressive Disorder,By State,Kansas,Kansas,1,Apr 23 - May 5,22.1,18.3,26.4,18.3 - 26.4,20.8 - 22.1
105,Symptoms of Anxiety Disorder,By State,Kansas,Kansas,1,Apr 23 - May 5,26.4,22.0,31.2,22.0 - 31.2,21.7 - 27.8
175,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Kansas,Kansas,1,Apr 23 - May 5,32.5,28.4,36.9,28.4 - 36.9,25.6 - 33.3
245,Symptoms of Depressive Disorder,By State,Kansas,Kansas,2,May 7 - May 12,16.7,12.8,21.2,12.8 - 21.2,16.7 - 20.6
315,Symptoms of Anxiety Disorder,By State,Kansas,Kansas,2,May 7 - May 12,25.1,20.0,30.9,20.0 - 30.9,20.3 - 26.2


In [9]:
# Checking the number of data for Kansas.
ks_total = len(ks["state"])
ks_total

29

In [10]:
# subgroup filtered by only Missouri 
mo = results_df[results_df["subgroup"].isin(["Missouri"])]
mo.head()

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
44,Symptoms of Depressive Disorder,By State,Missouri,Missouri,1,Apr 23 - May 5,21.0,17.9,24.5,17.9 - 24.5,20.8 - 22.1
114,Symptoms of Anxiety Disorder,By State,Missouri,Missouri,1,Apr 23 - May 5,30.4,25.8,35.4,25.8 - 35.4,30.4 - 31.6
184,Symptoms of Anxiety Disorder or Depressive Dis...,By State,Missouri,Missouri,1,Apr 23 - May 5,35.2,30.2,40.4,30.2 - 40.4,34.8 - 36.7
254,Symptoms of Depressive Disorder,By State,Missouri,Missouri,2,May 7 - May 12,24.1,18.5,30.5,18.5 - 30.5,23.1 - 25.6
324,Symptoms of Anxiety Disorder,By State,Missouri,Missouri,2,May 7 - May 12,37.8,32.3,43.7,32.3 - 43.7,31.6 - 38.3


In [11]:
# Checking the number of data for Missouri.
mo_total = len(mo["state"])
mo_total

28

## KS & MO / National DataFrames

In [12]:
kansas_df = results_df.loc[results_df["state"] == "Kansas", :]
kansas_df['indicator'].value_counts()
kansas_df

missouri_df=results_df.loc[results_df['state']=='Missouri']
missouri_df

ks_mo=pd.merge(kansas_df,missouri_df, how='outer')
ks_mo

cleaned_KsMo=ks_mo[['indicator','state','week','value']].copy()
cleaned_KsMo

cleaned_KsMo['value']=cleaned_KsMo['value'].astype(float)
cleaned_KsMo['week'] = cleaned_KsMo['week'].astype(int)

cleaned=cleaned_KsMo.groupby(['week','indicator'])
cleaned.count().head(10)

state_mean=cleaned['value'].mean()
final=pd.DataFrame({'KS & MO Average(%)':state_mean})
final.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,KS & MO Average(%)
week,indicator,Unnamed: 2_level_1
1,Symptoms of Anxiety Disorder,28.4
1,Symptoms of Anxiety Disorder or Depressive Disorder,33.85
1,Symptoms of Depressive Disorder,21.55
2,Symptoms of Anxiety Disorder,31.45
2,Symptoms of Anxiety Disorder or Depressive Disorder,34.65


In [13]:
all_states=results_df.loc[(results_df['state'] !='United States')]
all_states

cleaned_all_states=all_states[['indicator','state','week','value']].copy()
cleaned_all_states

cleaned_all_states['value']=cleaned_all_states['value'].astype(float)
cleaned_all_states['week']=cleaned_all_states['week'].astype(int)

cleanedAll=cleaned_all_states.groupby(['week','indicator'])
cleanedAll.count().head(10)

country_mean=cleanedAll['value'].mean()
final_total=pd.DataFrame({'National Average(%)':country_mean})
final_total.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,National Average(%)
week,indicator,Unnamed: 2_level_1
1,Symptoms of Anxiety Disorder,29.609804
1,Symptoms of Anxiety Disorder or Depressive Disorder,34.564706
1,Symptoms of Depressive Disorder,22.247059
2,Symptoms of Anxiety Disorder,28.996078
2,Symptoms of Anxiety Disorder or Depressive Disorder,33.827451


## Gender DataFrames

Gender Demographics: *Total of Gender data entries *Total of females *Total of males *The average of each value of week set by gender (could not do by state - group data was only given United States)

In [14]:
#create gender list to pull gender information only
#https://www.youtube.com/watch?v=3Kl5oaT0dP0
genderdata_df = results_df.loc[(results_df["group"] == "By Gender")]
genderdata_df

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
8,Symptoms of Depressive Disorder,By Gender,United States,Male,1,Apr 23 - May 5,20.8,19.6,22.0,19.6 - 22.0,
9,Symptoms of Depressive Disorder,By Gender,United States,Female,1,Apr 23 - May 5,26.1,25.2,27.1,25.2 - 27.1,
78,Symptoms of Anxiety Disorder,By Gender,United States,Male,1,Apr 23 - May 5,26.1,24.7,27.5,24.7 - 27.5,
79,Symptoms of Anxiety Disorder,By Gender,United States,Female,1,Apr 23 - May 5,35.4,34.5,36.3,34.5 - 36.3,
148,Symptoms of Anxiety Disorder or Depressive Dis...,By Gender,United States,Male,1,Apr 23 - May 5,31.0,29.6,32.3,29.6 - 32.3,
149,Symptoms of Anxiety Disorder or Depressive Dis...,By Gender,United States,Female,1,Apr 23 - May 5,40.7,39.7,41.6,39.7 - 41.6,
218,Symptoms of Depressive Disorder,By Gender,United States,Male,2,May 7 - May 12,22.3,20.6,24.0,20.6 - 24.0,
219,Symptoms of Depressive Disorder,By Gender,United States,Female,2,May 7 - May 12,25.8,24.7,27.0,24.7 - 27.0,
288,Symptoms of Anxiety Disorder,By Gender,United States,Male,2,May 7 - May 12,26.7,24.9,28.5,24.9 - 28.5,
289,Symptoms of Anxiety Disorder,By Gender,United States,Female,2,May 7 - May 12,33.1,32.0,34.2,32.0 - 34.2,


In [15]:
genderdata_df.groupby("group").count()

Unnamed: 0_level_0,indicator,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
By Gender,58,58,58,58,58,58,58,58,58,0


In [16]:
#Create female df
femalegender_df = genderdata_df.loc[(genderdata_df["subgroup"] == "Female")]
femalegender_df.head(3)                                     

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
9,Symptoms of Depressive Disorder,By Gender,United States,Female,1,Apr 23 - May 5,26.1,25.2,27.1,25.2 - 27.1,
79,Symptoms of Anxiety Disorder,By Gender,United States,Female,1,Apr 23 - May 5,35.4,34.5,36.3,34.5 - 36.3,
149,Symptoms of Anxiety Disorder or Depressive Dis...,By Gender,United States,Female,1,Apr 23 - May 5,40.7,39.7,41.6,39.7 - 41.6,


In [17]:
#how many females
femalegender_df.count()["subgroup"]

29

In [18]:
#Find the average of values for females through each week label set so Jessica can graph
femalegender_df['week'] = femalegender_df['week'].astype(int)
femalegender_df['value']=femalegender_df['value'].astype(float)
femalegender_df.groupby('week').mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,value
week,Unnamed: 1_level_1
1,34.066667
2,32.033333
3,32.133333
4,32.733333
5,34.0
6,34.3
7,33.6
8,33.9
9,35.666667
10,32.8


In [19]:
#Create male data only
malegender_df = genderdata_df.loc[(genderdata_df["subgroup"] == "Male")]
malegender_df.head(3) 

Unnamed: 0,indicator,group,state,subgroup,week,week_label,value,lowci,highci,confidence_interval,quartile_range
8,Symptoms of Depressive Disorder,By Gender,United States,Male,1,Apr 23 - May 5,20.8,19.6,22.0,19.6 - 22.0,
78,Symptoms of Anxiety Disorder,By Gender,United States,Male,1,Apr 23 - May 5,26.1,24.7,27.5,24.7 - 27.5,
148,Symptoms of Anxiety Disorder or Depressive Dis...,By Gender,United States,Male,1,Apr 23 - May 5,31.0,29.6,32.3,29.6 - 32.3,


In [20]:
#how many males
malegender_df.count()["subgroup"]

29

In [21]:
malegender_df['week'] = malegender_df['week'].astype(int)
malegender_df['value']=malegender_df['value'].astype(float)
malegender_df.groupby('week').mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0_level_0,value
week,Unnamed: 1_level_1
1,25.966667
2,26.8
3,25.4
4,26.133333
5,26.7
6,27.466667
7,28.033333
8,27.866667
9,29.266667
10,28.65
