### Imports

In [None]:
!gdown 128UP6X4kbWVjjOKt4vB9bqqPVeT16cwL -O "covid.csv"
!gdown 1yj5Pa_Zck6VNf1JgkdCuErUKl5FLuoAd -O "hatecrime.csv"
!gdown 1yigT-1eM5Ki-uJA4FGpnt5bQDM0PtlKr -O "15m_cleaned_tweets.csv"
!gdown 19WLK_YzFvPnaEko-WllwClS0ZMVRdjHk -O  "stringency.csv"

Downloading...
From: https://drive.google.com/uc?id=128UP6X4kbWVjjOKt4vB9bqqPVeT16cwL
To: /content/covid.csv
100% 5.10M/5.10M [00:00<00:00, 154MB/s]
Downloading...
From: https://drive.google.com/uc?id=1yj5Pa_Zck6VNf1JgkdCuErUKl5FLuoAd
To: /content/hatecrime.csv
100% 54.6M/54.6M [00:01<00:00, 53.6MB/s]
Downloading...
From: https://drive.google.com/uc?id=1yigT-1eM5Ki-uJA4FGpnt5bQDM0PtlKr
To: /content/15m_cleaned_tweets.csv
100% 86.5M/86.5M [00:01<00:00, 60.3MB/s]
Downloading...
From: https://drive.google.com/uc?id=19WLK_YzFvPnaEko-WllwClS0ZMVRdjHk
To: /content/stringency.csv
100% 43.4k/43.4k [00:00<00:00, 23.6MB/s]


In [None]:
import pandas as pd

### Crime

This section regards the Anti-Asian hate crimes dataframe. The dataset was extracted from XXX. 

The first step was to drop a number of columns that were not relevant to the analysis. Then the dataframe was filtered to only include rows where the value in the column bias description was equal to Anti-Asian. The incident date column was then converted to a datetime object and the data was grouped by state abbreviations and incident date, with values being summed, and the values sorted by incident date. The dataframe was then pivoted by index incident date and columns state abbreviations, filtering the total individual victims column and filling any missing values with 0. The index was reset and filtered to include only rows between the same date range as the covid cases dataset. The data was then resampled to a monthly sum by state and any location that was not part of the 50 US states were dropped. The data was melted by date, var_name="state", and value_name="hate_crimes".



In [None]:
crime = pd.read_csv("hatecrime.csv")
crime.head()

Unnamed: 0,incident_id,data_year,ori,pug_agency_name,pub_agency_unit,agency_type_name,state_abbr,state_name,division_name,region_name,...,offender_race,offender_ethnicity,victim_count,offense_name,total_individual_victims,location_name,bias_desc,victim_types,multiple_offense,multiple_bias
0,43,1991,AR0350100,Pine Bluff,,City,AR,Arkansas,West South Central,South,...,Black or African American,Not Specified,1,Aggravated Assault,1.0,Residence/Home,Anti-Black or African American,Individual,S,S
1,44,1991,AR0350100,Pine Bluff,,City,AR,Arkansas,West South Central,South,...,Black or African American,Not Specified,2,Aggravated Assault;Destruction/Damage/Vandalis...,1.0,Highway/Road/Alley/Street/Sidewalk,Anti-White,Individual,M,S
2,45,1991,AR0600300,North Little Rock,,City,AR,Arkansas,West South Central,South,...,Black or African American,Not Specified,2,Aggravated Assault;Murder and Nonnegligent Man...,2.0,Residence/Home,Anti-White,Individual,M,S
3,46,1991,AR0600300,North Little Rock,,City,AR,Arkansas,West South Central,South,...,Black or African American,Not Specified,1,Intimidation,1.0,Residence/Home,Anti-White,Individual,S,S
4,47,1991,AR0670000,Sevier,,County,AR,Arkansas,West South Central,South,...,White,Not Specified,1,Intimidation,1.0,School/College,Anti-Black or African American,Individual,S,S


In [None]:
crime.drop(columns=['incident_id', 'ori', 'pug_agency_name', 'pub_agency_unit',
       'agency_type_name', 'state_name', 'division_name',
       'region_name','population_group_code', 'population_group_description',
       'adult_victim_count', 'juvenile_victim_count',
       'adult_offender_count',
       'juvenile_offender_count', 'offender_ethnicity',
       'offense_name', 
       'location_name', 'victim_types', 'multiple_offense',
       'multiple_bias'],inplace=True)

In [None]:
crime = crime[crime["bias_desc"]=="Anti-Asian"]

In [None]:
crime.drop(columns=["total_offender_count","offender_race","bias_desc","data_year","victim_count"],inplace=True)

In [None]:
crime["incident_date"] = pd.to_datetime(crime["incident_date"])

In [None]:
crime = crime.groupby(["state_abbr","incident_date"]).sum().reset_index().sort_values(by=["incident_date"])

In [None]:
crime = crime.pivot(index='incident_date', columns='state_abbr')['total_individual_victims']
crime.fillna(0,inplace=True)

In [None]:
crime  = crime.reset_index().rename_axis(None,axis=1)

In [None]:
crime = crime[crime["incident_date"]>='2020-01-01']
crime.head()

Unnamed: 0,incident_date,AK,AL,AR,AZ,CA,CO,CT,DC,DE,...,SD,TN,TX,UT,VA,VT,WA,WI,WV,WY
4459,2020-01-01,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4460,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4461,2020-01-03,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4462,2020-01-04,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
4463,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0


In [None]:
crime  = crime.reset_index().drop(columns=["index"])

In [None]:
crime.head()

Unnamed: 0,incident_date,AK,AL,AR,AZ,CA,CO,CT,DC,DE,...,SD,TN,TX,UT,VA,VT,WA,WI,WV,WY
0,2020-01-01,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,2020-01-03,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2020-01-04,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
4,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0


In [None]:
crime.rename(columns={"incident_date":"date"},inplace=True)

In [None]:
crime = crime[(crime["date"]>='2020-01-01') & (crime["date"]<="2021-03-31")]

In [None]:
crime.set_index('date',inplace=True) 
crime_by_month = crime.resample('M').sum()

In [None]:
crime_by_month.rename(columns={"NB":"NE"},inplace=True)

*Disclaimer:  in 1969, the Canadian postal administration requested that the abbreviation for the state of Nebraska, which was originally NB, be changed to NE to prevent confusion with the province of New Brunswick in Canada.*

In [None]:
crime_by_month.drop(columns=["DC","FS","GM"],inplace=True)

In [None]:
crime_by_month.reset_index(inplace=True)

In [None]:
crime_final = crime_by_month.melt(id_vars="date",var_name="state",value_name="hate_crimes")

In [None]:
crime_final.head()

Unnamed: 0,date,state,hate_crimes
0,2020-01-31,AK,0.0
1,2020-02-29,AK,0.0
2,2020-03-31,AK,0.0
3,2020-04-30,AK,0.0
4,2020-05-31,AK,0.0


In [None]:
crime_final.shape

(750, 3)

In [None]:
crime_final["hate_crimes"].value_counts()

0.0     583
1.0      80
2.0      43
3.0      13
4.0       9
5.0       8
6.0       3
7.0       3
11.0      2
10.0      2
18.0      1
16.0      1
8.0       1
15.0      1
Name: hate_crimes, dtype: int64