In [1]:
%matplotlib notebook

In [2]:
# import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from scipy.stats import linregress

In [3]:
# open dataframe to work with
# file path
csv = 'Cleaned_Data/Final.csv'
#read csv
plot_df = pd.read_csv(csv)
plot_df.head()

Unnamed: 0,State,Confirmed Cases,COVID Deaths,Population Density,Population,LandArea,Total Two Dose Regimen Moderna,Total Two Dose Regimen Pfizer,Combined Total Doses
0,Alabama,449086,7172,97.427,4934190,50645,273100,227175,500275
1,Alaska,51603,250,1.2694,724357,570641,84700,62400,147100
2,Arizona,738561,12643,66.2016,7520100,113594,387100,323700,710800
3,Arkansas,288964,4742,58.3059,3033950,52035,165800,141375,307175
4,California,3169935,38224,254.2929,39613500,155779,2178600,1806675,3985275


Total Doses Loocated vs Total Confirmed Cases

In [4]:
plot_df.dtypes

State                              object
Confirmed Cases                     int64
COVID Deaths                        int64
Population Density                float64
Population                          int64
LandArea                            int64
Total Two Dose Regimen Moderna      int64
Total Two Dose Regimen Pfizer       int64
Combined Total Doses                int64
dtype: object

In [5]:
# plotting Doses vs Confirmed Cases



#scatter plot
plot_df.plot(kind="scatter", x="Combined Total Doses", y="Confirmed Cases",figsize=(6,6),
             title="Total Doses Allocated vs Total Confirmed Cases")
plt.xlabel('Doses (Millions)')
plt.ylabel('Confirmed Cases (Millions)')

#regression
x_values = plot_df['Combined Total Doses']
y_values = plot_df['Confirmed Cases']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")




#correlation
correlation = st.pearsonr(plot_df['Combined Total Doses'], plot_df['Confirmed Cases'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Doses_v_Cases.png')
plt.show()

<IPython.core.display.Javascript object>

The correlation between both factors is 0.97
y = 0.8x + -12450.87


In [6]:
#Plotting Doses vs Pop Density
plot_df.plot(kind="scatter", x="Combined Total Doses", y="Population Density",figsize=(6,6),
              title="Total Doses Allocated vs Population Density")
plt.xlabel('Doses (Millions)')
plt.ylabel('People Per Square Mile')

#regression
x_values = plot_df['Combined Total Doses']
y_values = plot_df['Population Density']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")


#correlation
correlation = st.pearsonr(plot_df['Combined Total Doses'], plot_df['Population Density'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Doses_v_Density.png')
plt.show()

<IPython.core.display.Javascript object>

The correlation between both factors is 0.17
y = 0.0x + 163.55


In [7]:
# Plotting Doses vs Population
plot_df.plot(kind="scatter", x="Combined Total Doses", y="Population",figsize=(6,6), 
              title="Total Doses Allocated vs Population")
plt.xlabel('Doses (Millions)')
plt.ylabel('Population (Millions)')

#regression
x_values = plot_df['Combined Total Doses']
y_values = plot_df['Population']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")


#correlation
correlation = st.pearsonr(plot_df['Combined Total Doses'], plot_df['Population'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Doses_v_Population.png')
plt.show()

<IPython.core.display.Javascript object>

The correlation between both factors is 0.99
y = 10.43x + -41225.43


In [8]:
# Plotting Cases vs Population
plot_df.plot(kind="scatter", x="Confirmed Cases", y="Population",figsize=(6,6), 
              title="Confirmed Cases vs Population")
plt.xlabel('Confirmed (Millions)')
plt.ylabel('Population (Millions)')

#regression
x_values = plot_df['Confirmed Cases']
y_values = plot_df['Population']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")


#correlation
correlation = st.pearsonr(plot_df['Confirmed Cases'], plot_df['Population'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Cases_v_Population.png')
plt.show()

<IPython.core.display.Javascript object>

The correlation between both factors is 0.98
y = 12.62x + 315198.3


In [9]:
# cases vs population density
plot_df.plot(kind="scatter", x="Confirmed Cases", y="Population Density",figsize=(6,6),
              title="Confirmed Cases vs Population Density")
plt.xlabel('Confirmed Cases (Millions)')
plt.ylabel('People Per Square Mile')

#regression
x_values = plot_df['Confirmed Cases']
y_values = plot_df['Population Density']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

#correlation
correlation = st.pearsonr(plot_df['Confirmed Cases'], plot_df['Population Density'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Cases_v_Density.png')
plt.show()





<IPython.core.display.Javascript object>

The correlation between both factors is 0.14
y = 0.0x + 171.56


In [16]:
#scatter plot
plot_df.plot(kind="scatter", x="Total Two Dose Regimen Moderna", y="Confirmed Cases",figsize=(6,6),
             title="Total Moderna Allocated vs Total Confirmed Cases")
plt.xlabel('Moderna (Millions)')
plt.ylabel('Confirmed Cases (Millions)')

#regression
x_values = plot_df['Total Two Dose Regimen Moderna']
y_values = plot_df['Confirmed Cases']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")




#correlation
correlation = st.pearsonr(plot_df['Total Two Dose Regimen Moderna'], plot_df['Confirmed Cases'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Moderna_v_Cases.png')
plt.show()




<IPython.core.display.Javascript object>

The correlation between both factors is 0.97
y = 1.47x + -10360.38


In [17]:
#scatter plot
plot_df.plot(kind="scatter", x="Total Two Dose Regimen Pfizer", y="Confirmed Cases",figsize=(6,6),
             title="Total Pfizer Allocated vs Total Confirmed Cases")
plt.xlabel('Pfizer (Millions)')
plt.ylabel('Confirmed Cases (Millions)')

#regression
x_values = plot_df['Total Two Dose Regimen Pfizer']
y_values = plot_df['Confirmed Cases']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")




#correlation
correlation = st.pearsonr(plot_df['Total Two Dose Regimen Pfizer'], plot_df['Confirmed Cases'])
print(f"The correlation between both factors is {round(correlation[0],2)}")
print(line_eq)
plt.savefig('Cleaned_Data/Pfizer_v_Cases.png')
plt.show()


<IPython.core.display.Javascript object>

The correlation between both factors is 0.97
y = 1.77x + -14953.04
