In [3]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from scipy import stats

In [None]:
#Preparing data to answer the following questions:
#What are the trends in types of Hate Crimes throughout 2019?
#How does demographic makeup of a state affect types of Hate Crimes committed?
#How does demographic makeup of a state affect # of Hate Crimes committed?

In [4]:
#Called in Hate Crime dataset
HateCrime2019 = pd.read_csv("Data Files/Hate_Crime_Incidents_per_Bias_Motivation_and_Quarter_by_State_Federal_and_Agency_2019.csv")
HateCrime2019.head()

Unnamed: 0,State,Agency Type,Agency,Race/\nEthnicity/\nAncestry,Religion,Sexual\norientation,Disability,Gender,Gender\nidentity,1st\nquarter,2nd\nquarter,3rd\nquarter,4th\nquarter,Population
0,Alaska,Cities,Anchorage,4.0,0,2.0,0.0,0.0,0.0,1.0,2.0,3.0,0.0,287731.0
1,Alaska,Cities,Fairbanks,1.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,31493.0
2,Alaska,Cities,Kotzebue,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3272.0
3,Alaska,Cities,North Pole,1.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,2111.0
4,Alaska,Cities,Soldotna,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4756.0


In [5]:
#Data Wrangling
#Cleaned up headers (removed spaces and symbols, and simplified/clarified).
HateCrime2019.rename(columns={'Agency Type' : 'AgencyType', 'Race/\nEthnicity/\nAncestry' : 'Race', 
                              'Sexual\norientation' : 'SexualOrientation', 'Gender\nidentity' : 'GenderIdentity', 
                              '1st\nquarter' : '1stQtr', '2nd\nquarter' : '2ndQtr', '3rd\nquarter' : '3rdQtr', 
                              '4th\nquarter' : '4thQtr'}, inplace=True)

In [6]:
#Viewed headers to confirm code worked.
HateCrime2019.head()

Unnamed: 0,State,AgencyType,Agency,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity,1stQtr,2ndQtr,3rdQtr,4thQtr,Population
0,Alaska,Cities,Anchorage,4.0,0,2.0,0.0,0.0,0.0,1.0,2.0,3.0,0.0,287731.0
1,Alaska,Cities,Fairbanks,1.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,31493.0
2,Alaska,Cities,Kotzebue,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,3272.0
3,Alaska,Cities,North Pole,1.0,0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,,2111.0
4,Alaska,Cities,Soldotna,0.0,0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4756.0


In [399]:
#Since questions are focused on the state level, breakdown according to local agency (city data) is not needed,
#meaning "AgencyType" and "Agency" columns are unnecessary.
#Questions focus on # and type of Hate Crimes and the interaction with state demographics, 
#so "1stQtr","2ndQtr","3rdQtr","4thQtr" columns are unnecessary.
HateCrime2019A = HateCrime2019[['State', 'Race', 'Religion', 'SexualOrientation', 'Disability', 'Gender', 
                                'GenderIdentity', 'Population']]

In [400]:
HateCrime2019A.head()

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity,Population
0,Alaska,4.0,0,2.0,0.0,0.0,0.0,287731.0
1,Alaska,1.0,0,0.0,0.0,0.0,0.0,31493.0
2,Alaska,1.0,0,0.0,0.0,0.0,0.0,3272.0
3,Alaska,1.0,0,0.0,0.0,0.0,0.0,2111.0
4,Alaska,0.0,0,2.0,0.0,0.0,0.0,4756.0


In [401]:
#Need to see if there are any unique values in the State column that need to be dealt with.
HateCrime2019A["State"].unique()
#This shows "Federal" and "NaN" as values in the "State" column

array(['Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
       'Connecticut', 'Delaware', 'District of Columbia', 'Florida', nan,
       'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
       'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming', 'Federal'], dtype=object)

In [402]:
#Since questions are focused on the state level only, cannot use Federal data.
#Removed any rows from the "HateCrime2019" dataset where the State value is "Federal".
HateCrime2019B = HateCrime2019A[HateCrime2019A.State != 'Federal']

In [403]:
#Also appear to have NaN values in the State column - can't use those rows.
#Dropped rows missing "State" data
HateCrime2019NoNA = HateCrime2019B[HateCrime2019B['State'].notna()]

In [404]:
#Looked again for unique values in State column to confirm code worked.
HateCrime2019NoNA["State"].unique()

array(['Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
       'Connecticut', 'Delaware', 'District of Columbia', 'Florida',
       'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
       'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
       'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
       'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
       'New Jersey', 'New Mexico', 'New York', 'North Carolina',
       'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
       'West Virginia', 'Wisconsin', 'Wyoming'], dtype=object)

In [405]:
#Viewed tail to see how many records in dataset
HateCrime2019NoNA.tail()
#Total of 2125 records/rows

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity,Population
2120,Wyoming,1.0,0,0.0,0.0,0.0,0.0,9865.0
2121,Wyoming,1.0,0,0.0,0.0,0.0,0.0,11624.0
2122,Wyoming,0.0,0,1.0,0.0,0.0,0.0,32669.0
2123,Wyoming,1.0,0,0.0,0.0,0.0,0.0,17895.0
2124,Wyoming,1.0,0,0.0,0.0,0.0,0.0,13963.0


In [406]:
#Want to see if there are any other nulls in the HateCrime2019 dataset.
HateCrime2019NoNA[HateCrime2019NoNA.isnull().any(1)]

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity,Population
26,Arkansas,1.0,0.0,0.0,0.0,0.0,0.0,
27,Arkansas,0.0,0.0,1.0,0.0,0.0,0.0,
200,California,10.0,1.0,3.0,0.0,1.0,0.0,
201,California,2.0,2.0,0.0,0.0,0.0,0.0,
202,California,1.0,0.0,0.0,0.0,0.0,0.0,
203,California,1.0,0.0,0.0,0.0,0.0,0.0,
204,California,1.0,0.0,1.0,0.0,0.0,0.0,
205,California,6.0,1.0,1.0,1.0,0.0,0.0,
206,California,3.0,1.0,1.0,0.0,0.0,0.0,
207,California,2.0,0.0,0.0,0.0,0.0,2.0,


In [407]:
#There appear to be a fair number of NaN values, especially in "Population" column. How many nulls still in dataset?
HateCrime2019NoNA.isnull().sum().sum()

523

In [408]:
#523 out of 2125 records is approximately a quarter of the data, so I need to either source the data from elsewhere
#or transform the data here somehow. Since I've already decided to build a demographics dataset and the source includes
#state population data, I will include that in the dataset build and drop this population data.
del HateCrime2019NoNA['Population']
HateCrime2019NoNA.head()

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity
0,Alaska,4.0,0,2.0,0.0,0.0,0.0
1,Alaska,1.0,0,0.0,0.0,0.0,0.0
2,Alaska,1.0,0,0.0,0.0,0.0,0.0
3,Alaska,1.0,0,0.0,0.0,0.0,0.0
4,Alaska,0.0,0,2.0,0.0,0.0,0.0


In [409]:
#Viewed any nulls to confirm code worked.
HateCrime2019NoNA[HateCrime2019NoNA.isnull().any(1)]

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity
1232,New Jersey,,,,,,


In [410]:
#This row contains no useful data, so we need to drop it. Also a good time to give this cleaned up set a name.
HateCrime2019NoNA = HateCrime2019NoNA.dropna(how='any',axis=0)

In [411]:
#Now check to see if code worked.
HateCrime2019NoNA[HateCrime2019NoNA.isnull().any(1)]

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity


In [412]:
#Just to be sure, check a different way to make sure there are no other nulls.
HateCrime2019NoNA.isnull().sum().sum()

0

In [413]:
HateCrime2019NoNA.head()

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity
0,Alaska,4.0,0,2.0,0.0,0.0,0.0
1,Alaska,1.0,0,0.0,0.0,0.0,0.0
2,Alaska,1.0,0,0.0,0.0,0.0,0.0
3,Alaska,1.0,0,0.0,0.0,0.0,0.0
4,Alaska,0.0,0,2.0,0.0,0.0,0.0


In [414]:
HateCrime2019NoNA.tail()

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity
2120,Wyoming,1.0,0,0.0,0.0,0.0,0.0
2121,Wyoming,1.0,0,0.0,0.0,0.0,0.0
2122,Wyoming,0.0,0,1.0,0.0,0.0,0.0
2123,Wyoming,1.0,0,0.0,0.0,0.0,0.0
2124,Wyoming,1.0,0,0.0,0.0,0.0,0.0


In [169]:
#Now need to collapse all rows of "HateCrime2019_NoNA" dataset for the same state into one row for ease of working with dataset.
#Since questions focused on the state level, do not need data broken out by local agency.
#This will also make it possible to combine with demographics dataset from census site which is one row per state.

In [387]:
#Aggregated Race column by State
HateCrime2019NoNA1 = HateCrime2019NoNA.groupby(['State']).agg({'Race': ['sum']}).reset_index()

In [388]:
#Verified aggregation occurred.
HateCrime2019NoNA1.head()

Unnamed: 0_level_0,State,Race
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,7.0
1,Arizona,143.0
2,Arkansas,8.0
3,California,524.0
4,Colorado,117.0


In [338]:
#Aggregated all other columns into their own tables
HateCrime2019NoNA2 = HateCrime2019NoNA.groupby(['State']).agg({'Religion': ['sum']}).reset_index()
HateCrime2019NoNA3 = HateCrime2019NoNA.groupby(['State']).agg({'SexualOrientation': ['sum']}).reset_index()
HateCrime2019NoNA4 = HateCrime2019NoNA.groupby(['State']).agg({'Disability': ['sum']}).reset_index()
HateCrime2019NoNA5 = HateCrime2019NoNA.groupby(['State']).agg({'Gender': ['sum']}).reset_index()
HateCrime2019NoNA6 = HateCrime2019NoNA.groupby(['State']).agg({'GenderIdentity': ['sum']}).reset_index()

In [339]:
#Verified code worked
HateCrime2019NoNA2.head()

Unnamed: 0_level_0,State,Religion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,00000
1,Arizona,100120002001010604
2,Arkansas,000000
3,California,0101000000026014000010010120010000000010000100...
4,Colorado,0110100220151000200100000111000100100002010110...


In [340]:
HateCrime2019NoNA3.head()

Unnamed: 0_level_0,State,SexualOrientation
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,4.0
1,Arizona,30.0
2,Arkansas,1.0
3,California,235.0
4,Colorado,47.0


In [341]:
HateCrime2019NoNA4.head()

Unnamed: 0_level_0,State,Disability
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,0.0
1,Arizona,3.0
2,Arkansas,0.0
3,California,10.0
4,Colorado,7.0


In [342]:
HateCrime2019NoNA5.head()

Unnamed: 0_level_0,State,Gender
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,0.0
1,Arizona,2.0
2,Arkansas,0.0
3,California,7.0
4,Colorado,0.0


In [343]:
HateCrime2019NoNA6.head()

Unnamed: 0_level_0,State,GenderIdentity
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,0.0
1,Arizona,3.0
2,Arkansas,0.0
3,California,33.0
4,Colorado,5.0


In [356]:
#Religion column data did not sum correctly - data type is likely not correct.
#Revealed data type in dataset of one of the columns that summed correctly compared to Religion data.
HateCrime2019NoNA0 = HateCrime2019NoNA['Race']
HateCrime2019NoNA0.head()

0    4.0
1    1.0
2    1.0
3    1.0
4    0.0
Name: Race, dtype: float64

In [357]:
HateCrime2019NoNA0 = HateCrime2019NoNA['Religion']
HateCrime2019NoNA0.head()

0    0
1    0
2    0
3    0
4    0
Name: Religion, dtype: object

In [358]:
#Religion data needs to be converted from object to float; results placed in new dataframe.
modelFeatures = ['Religion']
HateCrime2019NoNA0 = HateCrime2019NoNA[modelFeatures].astype(float) 
HateCrime2019NoNA0.head()

Unnamed: 0,Religion
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [359]:
#To aggregate the Religion data to match the rest, I need the State data alongside the Religion data.
#Placed State data in new dataframe.
HateCrime2019NoNA00 = HateCrime2019NoNA['State']
HateCrime2019NoNA00.head()

0    Alaska
1    Alaska
2    Alaska
3    Alaska
4    Alaska
Name: State, dtype: object

In [363]:
#Combined State and Religion dataframes into a third dataframe.
HateCrime2019NoNA000 = pd.concat([HateCrime2019NoNA00, HateCrime2019NoNA0],axis=1)
HateCrime2019NoNA000.head()

Unnamed: 0,State,Religion
0,Alaska,0.0
1,Alaska,0.0
2,Alaska,0.0
3,Alaska,0.0
4,Alaska,0.0


In [364]:
#Aggregated Religion data.
HateCrime2019NoNA2 = HateCrime2019NoNA000.groupby(['State']).agg({'Religion': ['sum']}).reset_index()

In [365]:
#Verifying conversion and aggregate worked.
HateCrime2019NoNA2.head()

Unnamed: 0_level_0,State,Religion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum
0,Alaska,0.0
1,Arizona,36.0
2,Arkansas,0.0
3,California,208.0
4,Colorado,36.0


In [372]:
#Now that everything is aggregated into one row per state, will join all the separate tables into one dataset
#using the State column. Will test with the first 2
HateCrime2019NoNA1and2 = pd.merge(HateCrime2019NoNA1, HateCrime2019NoNA2)
HateCrime2019NoNA1and2.head()

Unnamed: 0_level_0,State,Race,Religion
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum
0,Alaska,7.0,0.0
1,Arizona,143.0,36.0
2,Arkansas,8.0,0.0
3,California,524.0,208.0
4,Colorado,117.0,36.0


In [373]:
#Works! Now to join all the other tables created from original HateCrime2019 dataset
HateCrime2019NoNA123 = pd.merge(HateCrime2019NoNA1and2, HateCrime2019NoNA3)
HateCrime2019NoNA1to4 = pd.merge(HateCrime2019NoNA123, HateCrime2019NoNA4)
HateCrime2019NoNA1to5 = pd.merge(HateCrime2019NoNA1to4, HateCrime2019NoNA5)
HateCrime2019NoNATotal = pd.merge(HateCrime2019NoNA1to5, HateCrime2019NoNA6)
HateCrime2019NoNATotal.head()

Unnamed: 0_level_0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum,sum,sum,sum,sum
0,Alaska,7.0,0.0,4.0,0.0,0.0,0.0
1,Arizona,143.0,36.0,30.0,3.0,2.0,3.0
2,Arkansas,8.0,0.0,1.0,0.0,0.0,0.0
3,California,524.0,208.0,235.0,10.0,7.0,33.0
4,Colorado,117.0,36.0,47.0,7.0,0.0,5.0


In [374]:
#Now can combine with Demographics dataset.
#Called in Demographics dataset. Original race data was percentages; had spreadsheet calculate actual number for each race
#using percentage and population data.
USCensus2020 = pd.read_csv("Data Files/2020 US Census - State Racial Demographics.csv")
USCensus2020.head()

Unnamed: 0,State,Population,WhitePct,WhitePop,BlackPct,BlackPop,AmerIndianAlaskaNativePct,AmerIndianAlaskaNativePop,AsianPct,AsianPop,NativeHawaiianOtherPacificIslanderPct,NativeHawaiianOtherPacificIslanderPop,OtherRacePct,OtherRacePop,TwoOrMoreRacePct,TwoOrMoreRacePop,HispanicPct,HispanicPop
0,Alaska,733391,59.4,435634,3.0,22002,15.2,111475,6.0,44003,1.7,12468,2.5,18335,12.2,89474,6.8,49871
1,Arizona,7151502,60.4,4319507,4.7,336121,4.5,321818,3.6,257454,0.2,14303,12.6,901089,13.9,994059,30.7,2195511
2,Arkansas,3011524,70.2,2114090,15.1,454740,0.9,27104,1.7,51196,0.5,15058,4.5,135519,7.1,213818,8.5,255980
3,California,39538223,41.2,16289748,5.7,2253679,1.6,632612,15.4,6088886,0.4,158153,21.2,8382103,14.6,5772581,39.4,15578060
4,Colorado,5773714,70.7,4082016,4.1,236722,1.3,75058,3.5,202080,0.2,11547,8.0,461897,12.3,710167,21.9,1264443


In [375]:
#Merge the Hate Crimes and Demographics datasets
HateCrime2019_USCensus2020 = pd.merge(HateCrime2019NoNATotal, USCensus2020, on='State')
HateCrime2019_USCensus2020.head()



Unnamed: 0,State,"(State, )","(Race, sum)","(Religion, sum)","(SexualOrientation, sum)","(Disability, sum)","(Gender, sum)","(GenderIdentity, sum)",Population,WhitePct,...,AsianPct,AsianPop,NativeHawaiianOtherPacificIslanderPct,NativeHawaiianOtherPacificIslanderPop,OtherRacePct,OtherRacePop,TwoOrMoreRacePct,TwoOrMoreRacePop,HispanicPct,HispanicPop
0,Alaska,Alaska,7.0,0.0,4.0,0.0,0.0,0.0,733391,59.4,...,6.0,44003,1.7,12468,2.5,18335,12.2,89474,6.8,49871
1,Arizona,Arizona,143.0,36.0,30.0,3.0,2.0,3.0,7151502,60.4,...,3.6,257454,0.2,14303,12.6,901089,13.9,994059,30.7,2195511
2,Arkansas,Arkansas,8.0,0.0,1.0,0.0,0.0,0.0,3011524,70.2,...,1.7,51196,0.5,15058,4.5,135519,7.1,213818,8.5,255980
3,California,California,524.0,208.0,235.0,10.0,7.0,33.0,39538223,41.2,...,15.4,6088886,0.4,158153,21.2,8382103,14.6,5772581,39.4,15578060
4,Colorado,Colorado,117.0,36.0,47.0,7.0,0.0,5.0,5773714,70.7,...,3.5,202080,0.2,11547,8.0,461897,12.3,710167,21.9,1264443


In [389]:
#Drop the excess State column
HC2019_USC2020 = HateCrime2019_USCensus2020.drop(HateCrime2019_USCensus2020.columns[1], axis=1)
HC2019_USC2020.head()

Unnamed: 0,State,"(Race, sum)","(Religion, sum)","(SexualOrientation, sum)","(Disability, sum)","(Gender, sum)","(GenderIdentity, sum)",Population,WhitePct,WhitePop,...,AsianPct,AsianPop,NativeHawaiianOtherPacificIslanderPct,NativeHawaiianOtherPacificIslanderPop,OtherRacePct,OtherRacePop,TwoOrMoreRacePct,TwoOrMoreRacePop,HispanicPct,HispanicPop
0,Alaska,7.0,0.0,4.0,0.0,0.0,0.0,733391,59.4,435634,...,6.0,44003,1.7,12468,2.5,18335,12.2,89474,6.8,49871
1,Arizona,143.0,36.0,30.0,3.0,2.0,3.0,7151502,60.4,4319507,...,3.6,257454,0.2,14303,12.6,901089,13.9,994059,30.7,2195511
2,Arkansas,8.0,0.0,1.0,0.0,0.0,0.0,3011524,70.2,2114090,...,1.7,51196,0.5,15058,4.5,135519,7.1,213818,8.5,255980
3,California,524.0,208.0,235.0,10.0,7.0,33.0,39538223,41.2,16289748,...,15.4,6088886,0.4,158153,21.2,8382103,14.6,5772581,39.4,15578060
4,Colorado,117.0,36.0,47.0,7.0,0.0,5.0,5773714,70.7,4082016,...,3.5,202080,0.2,11547,8.0,461897,12.3,710167,21.9,1264443


In [390]:
#Now that Data Wrangling is completed, will export dataframe to GitHub
HC2019_USC2020.to_csv(r'Data Files/HC2019_USC2020.csv')

In [11]:
#Exploratory Analyses
#Which of variables are highly related to each other? Correlation Matrix
#To create Correlation Matrix, need to drop Non-Continuous Variables
HC2019_USC2020 = pd.read_csv("Data Files/HC2019_USC2020.csv")
HC2019_USC2020.head()

Unnamed: 0,State,Race,Religion,SexualOrientation,Disability,Gender,GenderIdentity,Population,WhitePct,WhitePop,...,AsianPct,AsianPop,NativeHawaiianOtherPacificIslanderPct,NativeHawaiianOtherPacificIslanderPop,OtherRacePct,OtherRacePop,TwoOrMoreRacePct,TwoOrMoreRacePop,HispanicPct,HispanicPop
0,Alaska,7,0,4,0,0,0,733391,59.4,435634,...,6.0,44003,1.7,12468,2.5,18335,12.2,89474,6.8,49871
1,Arizona,143,36,30,3,2,3,7151502,60.4,4319507,...,3.6,257454,0.2,14303,12.6,901089,13.9,994059,30.7,2195511
2,Arkansas,8,0,1,0,0,0,3011524,70.2,2114090,...,1.7,51196,0.5,15058,4.5,135519,7.1,213818,8.5,255980
3,California,524,208,235,10,7,33,39538223,41.2,16289748,...,15.4,6088886,0.4,158153,21.2,8382103,14.6,5772581,39.4,15578060
4,Colorado,117,36,47,7,0,5,5773714,70.7,4082016,...,3.5,202080,0.2,11547,8.0,461897,12.3,710167,21.9,1264443


In [10]:
del HC2019_USC2020 [0]

KeyError: 0

In [None]:
#Descriptive Stats - mean of the data, histograms for continuous variables, frequencies and box plots for categorical variables


In [None]:
#Tableau - some quick graphs

In [None]:
#Data Analysis