In [None]:
import pandas as pd
import plotly

# Reading a csv file

In [4]:
# reading data from csv file
file = 'NYPD_Hate_Crimes_20250213.csv'
data_csv = pd.read_csv(file)

# printing number of rows and columns
print(data_csv.shape)

data_csv.head()

(3255, 14)


Unnamed: 0,Full Complaint ID,Complaint Year Number,Month Number,Record Create Date,Complaint Precinct Code,Patrol Borough Name,County,Law Code Category Description,Offense Description,PD Code Description,Bias Motive Description,Offense Category,Arrest Date,Arrest Id
0,202105012245817,2021,5,05/01/2021,50,PATROL BORO BRONX,BRONX,FELONY,BURGLARY,"BURGLARY,UNCLASSIFIED,NIGHT",ANTI-JEWISH,Religion/Religious Practice,05/01/2021,B33683676
1,202105012668317,2021,12,12/28/2021,50,PATROL BORO BRONX,BRONX,FELONY,MISCELLANEOUS PENAL LAW,AGGRAVATED HARASSMENT 1,ANTI-JEWISH,Religion/Religious Practice,09/28/2022,B34705870
2,202204912792117,2022,10,10/11/2022,49,PATROL BORO BRONX,BRONX,FELONY,FELONY ASSAULT,"ASSAULT 2,1,UNCLASSIFIED",ANTI-MALE HOMOSEXUAL (GAY),Sexual Orientation,10/11/2022,B34707656
3,201906112101017,2019,1,01/15/2019,61,PATROL BORO BKLYN SOUTH,KINGS,FELONY,MURDER & NON-NEGL. MANSLAUGHTE,"MURDER,UNCLASSIFIED",ANTI-ASIAN,Race/Color,01/16/2019,K31675023
4,201907112148117,2019,2,02/08/2019,71,PATROL BORO BKLYN SOUTH,KINGS,MISDEMEANOR,OFF. AGNST PUB ORD SENSBLTY &,AGGRAVATED HARASSMENT 2,ANTI-JEWISH,Religion/Religious Practice,02/08/2019,K31679592


## Plotting a summary statistic

In [23]:
def add_date(data,year_col,month_col):
    data['date'] = pd.to_datetime(
        data[month_col].astype(str) + '/01/' + \
        data[year_col].astype(str)
    )
    return data


def plot_hate_crimes(data_csv,year_col,month_col,id_col):

  data_csv = add_date(data_csv,year_col,month_col)

  # getting counts by date
  data_agg = data_csv.groupby('date')[id_col].size().reset_index()
  # renaming columns
  data_agg.columns = ['date','Number of monthly hate crimes']
  data_agg = data_agg.sort_values('date')

  # plotting results using plotly
  fig = plotly.express.line(data_agg,x='date',y='Number of monthly hate crimes')
  fig.show()



year_col = 'Complaint Year Number'
month_col = 'Month Number'
id_col = 'Full Complaint ID'
plot_hate_crimes(data_csv,year_col,month_col,id_col)

## Using the API

In [11]:
import requests
url = 'https://data.cityofnewyork.us/resource/bqiq-cu78.json'

In [13]:
r = requests.get(url)
data = pd.json_normalize(r.json())
print(data.shape)

(1000, 14)


We only got 1000 rows

## Using pagination

In [18]:
offset = 0
limit = 1000
n_rows = 1000
data_frames = []
while n_rows >0:
  params_ = {'$offset':offset,'$limit':limit}
  r = requests.get(url,params=params_)
  data = pd.json_normalize(r.json())
  n_rows = data.shape[0]
  data_frames.append(data)
  offset += limit

whole_dataset = pd.concat(data_frames)
print(whole_dataset.shape)
whole_dataset.head()

(3255, 14)


Unnamed: 0,full_complaint_id,complaint_year_number,month_number,record_create_date,complaint_precinct_code,patrol_borough_name,county,law_code_category_description,offense_description,pd_code_description,bias_motive_description,offense_category,arrest_date,arrest_id
0,202105012245817,2021,5,2021-05-01T00:00:00.000,50,PATROL BORO BRONX,BRONX,FELONY,BURGLARY,"BURGLARY,UNCLASSIFIED,NIGHT",ANTI-JEWISH,Religion/Religious Practice,2021-05-01T00:00:00.000,B33683676
1,202105012668317,2021,12,2021-12-28T00:00:00.000,50,PATROL BORO BRONX,BRONX,FELONY,MISCELLANEOUS PENAL LAW,AGGRAVATED HARASSMENT 1,ANTI-JEWISH,Religion/Religious Practice,2022-09-28T00:00:00.000,B34705870
2,202204912792117,2022,10,2022-10-11T00:00:00.000,49,PATROL BORO BRONX,BRONX,FELONY,FELONY ASSAULT,"ASSAULT 2,1,UNCLASSIFIED",ANTI-MALE HOMOSEXUAL (GAY),Sexual Orientation,2022-10-11T00:00:00.000,B34707656
3,201906112101017,2019,1,2019-01-15T00:00:00.000,61,PATROL BORO BKLYN SOUTH,KINGS,FELONY,MURDER & NON-NEGL. MANSLAUGHTE,"MURDER,UNCLASSIFIED",ANTI-ASIAN,Race/Color,2019-01-16T00:00:00.000,K31675023
4,201907112148117,2019,2,2019-02-08T00:00:00.000,71,PATROL BORO BKLYN SOUTH,KINGS,MISDEMEANOR,OFF. AGNST PUB ORD SENSBLTY &,AGGRAVATED HARASSMENT 2,ANTI-JEWISH,Religion/Religious Practice,2019-02-08T00:00:00.000,K31679592


We got the whole dataset!

In [26]:
year_col = 'complaint_year_number'
month_col = 'month_number'
id_col = 'full_complaint_id'
plot_hate_crimes(whole_dataset,year_col,month_col,id_col)