In [1]:
# import required libraries
import pandas as pd
import numpy as np
import statistics as st
import matplotlib.pyplot as plt
import statsmodels.api as sm

# read the HeroesPymoli data
covid_tiny = pd.read_csv("/Users/rodneydavermann/Desktop/project/merged_data.csv")
covid_tiny.head()


Unnamed: 0.1,Unnamed: 0,date,state,cases,deaths,mental disorder,frequency of mental disorder
0,0,4/23/2020,Alabama,5832,197,Symptoms of Depressive Disorder,18.6
1,1,4/23/2020,Alabama,5832,197,Symptoms of Anxiety Disorder,25.6
2,2,4/23/2020,Alabama,5832,197,Symptoms of Anxiety Disorder or Depressive Dis...,30.3
3,3,4/23/2020,Alaska,335,7,Symptoms of Depressive Disorder,19.2
4,4,4/23/2020,Alaska,335,7,Symptoms of Anxiety Disorder,27.7


In [2]:
# Frequency table by state
covid_tiny_group = covid_tiny.groupby("state")
covid_tiny_group

# total number of cases, deaths, and frequency of mental disorder by state
total_cases_state = covid_tiny_group["cases"].mean().map("{:,.0f}".format)
total_deaths_state = covid_tiny_group["deaths"].mean().map("{:,.0f}".format)
total_frequency_mental_disorder_state = covid_tiny_group["frequency of mental disorder"].mean().map("{:,.1f}".format)
total_cases_state
total_deaths_state
total_frequency_mental_disorder_state

# Display the results in a dataframe 
covid_tiny_group_results = pd.DataFrame({"Average Cases": total_cases_state  ,"Average Deaths": total_deaths_state,
                                        "Average Frequency of Mental Disorder": total_frequency_mental_disorder_state})

# sort the dataframe from the total cases column
covid_tiny_group_results = covid_tiny_group_results.sort_values(["Average Cases"], ascending=False)
covid_tiny_group_results





Unnamed: 0_level_0,Average Cases,Average Deaths,Average Frequency of Mental Disorder
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Kansas,96606,1336,30.3
Texas,890327,15310,34.2
Nebraska,72557,766,27.5
Florida,696439,12443,33.5
New York,662902,33856,31.9
New Mexico,59253,1268,35.2
Idaho,58779,629,29.9
Oregon,49915,749,35.3
Illinois,443957,10607,32.4
Rhode Island,43900,1218,31.3


In [3]:
# Frequency table by mental disorder
covid_tiny_group1 = covid_tiny.groupby("mental disorder")
covid_tiny_group1

# total number of cases, deaths, and frequency of mental disorder by mental disorder
total_cases_mental_disorder = covid_tiny_group1["cases"].mean().map("{:,.0f}".format)
total_deaths_mental_disorder = covid_tiny_group1["deaths"].mean().map("{:,.0f}".format)
total_frequency_mental_disorder = covid_tiny_group1["frequency of mental disorder"].mean().map("{:,.1f}".format)
total_cases_mental_disorder
total_deaths_mental_disorder
total_frequency_mental_disorder

# Display the results in a dataframe 
covid_tiny_group_mental_disorder_results = pd.DataFrame({"Average of Cases": total_cases_mental_disorder,"Average of Deaths":total_deaths_mental_disorder,
                                        "Average Frequency of Mental Disorder": total_frequency_mental_disorder})
covid_tiny_group_mental_disorder_results



Unnamed: 0_level_0,Average of Cases,Average of Deaths,Average Frequency of Mental Disorder
mental disorder,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Symptoms of Anxiety Disorder,191414,4369,32.1
Symptoms of Anxiety Disorder or Depressive Disorder,191414,4369,37.0
Symptoms of Depressive Disorder,191414,4369,25.6


In [4]:
# create a new lists with cases, deaths, and frequency of mental disorder columns
cases_column = covid_tiny["cases"].to_list()
deaths_column = covid_tiny["deaths"].to_list()
frequency_column = covid_tiny["frequency of mental disorder"].to_list()

# tuple from the above lists
new_covid_tiny = list(zip(cases_column,deaths_column,frequency_column))

# create a new dataframe to describe the numerical variables
new_covid_tiny_df = pd.DataFrame(new_covid_tiny, columns=["cases", "deaths", "Frequency of Mental Disorder"])
round(new_covid_tiny_df.describe(),2)



Unnamed: 0,cases,deaths,Frequency of Mental Disorder
count,4131.0,4131.0,4131.0
mean,191414.42,4368.86,31.56
std,365537.5,7356.73,6.45
min,335.0,7.0,14.3
25%,15053.0,414.0,26.8
50%,60820.0,1512.0,31.5
75%,202182.0,5004.0,36.2
max,3631735.0,56952.0,52.3


In [8]:
# simple linear regression with the OLS: cases vs frequency of mental disorder

#define the independent, and the dependent variables 
x=covid_tiny[["cases"]]
y=covid_tiny[["frequency of mental disorder"]]

# define the model 
model_covid = sm.OLS(y,x).fit()
print(model_covid.summary())

                                      OLS Regression Results                                     
Dep. Variable:     frequency of mental disorder   R-squared (uncentered):                   0.229
Model:                                      OLS   Adj. R-squared (uncentered):              0.229
Method:                           Least Squares   F-statistic:                              1229.
Date:                          Fri, 30 Apr 2021   Prob (F-statistic):                   7.46e-236
Time:                                  01:11:06   Log-Likelihood:                         -19669.
No. Observations:                          4131   AIC:                                  3.934e+04
Df Residuals:                              4130   BIC:                                  3.935e+04
Df Model:                                     1                                                  
Covariance Type:                      nonrobust                                                  
                 coe

In [6]:
# simple linear regression with the OLS: deaths vs frequency of mental disorder

#define the independent, and the dependent variables 
x=covid_tiny[["deaths"]]
y=covid_tiny[["frequency of mental disorder"]]

# define the model 
model_covid = sm.OLS(y,x).fit()
print(model_covid.summary())

                                      OLS Regression Results                                     
Dep. Variable:     frequency of mental disorder   R-squared (uncentered):                   0.270
Model:                                      OLS   Adj. R-squared (uncentered):              0.270
Method:                           Least Squares   F-statistic:                              1528.
Date:                          Fri, 30 Apr 2021   Prob (F-statistic):                   1.07e-284
Time:                                  01:10:21   Log-Likelihood:                         -19556.
No. Observations:                          4131   AIC:                                  3.911e+04
Df Residuals:                              4130   BIC:                                  3.912e+04
Df Model:                                     1                                                  
Covariance Type:                      nonrobust                                                  
                 coe

In [9]:
# multiple linear regression with the OLS: cases, deaths vs frequency of mental disorder

#define the independent, and the dependent variables 
x=covid_tiny[["cases","deaths"]]
y=covid_tiny[["frequency of mental disorder"]]

# define the model 
model_covid = sm.OLS(y,x).fit()
print(model_covid.summary())

                                      OLS Regression Results                                     
Dep. Variable:     frequency of mental disorder   R-squared (uncentered):                   0.272
Model:                                      OLS   Adj. R-squared (uncentered):              0.271
Method:                           Least Squares   F-statistic:                              770.3
Date:                          Fri, 30 Apr 2021   Prob (F-statistic):                   5.21e-285
Time:                                  01:11:27   Log-Likelihood:                         -19551.
No. Observations:                          4131   AIC:                                  3.911e+04
Df Residuals:                              4129   BIC:                                  3.912e+04
Df Model:                                     2                                                  
Covariance Type:                      nonrobust                                                  
                 coe