In [25]:
import sys
sys.path.append("..")

In [26]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import when, col, count
from datetime import date
from helpers import SnowflakeHelper
import json
import os

In [27]:
schema_name = "SAFEGUARDING_NYC_SCHEMA_SILVER"
snowflake_helper = SnowflakeHelper(schema_name)
snowflake_config = './../helpers/snowflake_config.json'
session = snowflake_helper.create_snowpark_session(snowflake_config)

[INFO] Using the schema SAFEGUARDING_NYC_SCHEMA_SILVER for the session
[SUCCESS] Config file loaded successfully!
[SUCCESS] Snowspark Session created successfully on schema SAFEGUARDING_NYC_SCHEMA_SILVER!


## Extracting Data

In [28]:
shooting_incidents = session.table('shooting_incidents')
sqf_data = session.table('SQF')

In [29]:
shooting_incidents.show()

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"OCCUR_DATE"  |"OCCUR_TIME"  |"INCIDENT_KEY"  |"LATITUDE"   |"STATISTICAL_MURDER_FLAG"  |"LONGITUDE"   |"BORO"    |"LOCATION_DESC"          |"PRECINCT"  |"PERP_AGE_GROUP"  |"VIC_AGE_GROUP"  |"VIC_RACE"      |"PERP_RACE"  |"PERP_SEX"  |"VIC_SEX"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|2021-05-27    |21:30:00      |228798151       |40.66296462  |FALSE                      |-73.73083869  |QUEENS    |NULL                     |105         |NULL              |18-24            |BLACK           |NULL         |NULL        |MALE       |
|201

In [23]:
check_columns = ["VIC_RACE", "VIC_AGE_GROUP", "PERP_RACE", "STATISTICAL_MURDER_FLAG", "VIC_SEX", "BORO", "PERP_SEX", "LOCATION_DESC", "PERP_AGE_GROUP"]

for column in check_columns:
    distinct_values = shooting_incidents.select(column).distinct()
    print(f"Distinct values in {column}:")
    distinct_values.show() 

Distinct values in VIC_RACE:
----------------------------------
|"VIC_RACE"                      |
----------------------------------
|WHITE HISPANIC                  |
|BLACK HISPANIC                  |
|AMERICAN INDIAN/ALASKAN NATIVE  |
|BLACK                           |
|WHITE                           |
|ASIAN/PACIFIC ISLANDER          |
|NULL                            |
----------------------------------

Distinct values in VIC_AGE_GROUP:
-------------------
|"VIC_AGE_GROUP"  |
-------------------
|25-44            |
|45-64            |
|<18              |
|18-24            |
|65+              |
|NULL             |
-------------------

Distinct values in PERP_RACE:
----------------------------------
|"PERP_RACE"                     |
----------------------------------
|BLACK HISPANIC                  |
|WHITE HISPANIC                  |
|AMERICAN INDIAN/ALASKAN NATIVE  |
|BLACK                           |
|NULL                            |
|ASIAN/PACIFIC ISLANDER          |
|WHIT

In [33]:
sqf_data.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

## Joining Shooting incidents with SQF

In [30]:
shooting_sqf_on_date = shooting_incidents.join(
    sqf_data, 
    shooting_incidents["OCCUR_DATE"] == sqf_data["STOP_FRISK_DATE"], 
    how="inner"
)
shooting_sqf_on_date.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [34]:
shooting_sqf_on_boro = shooting_incidents.join(
    sqf_data, 
    shooting_incidents["BORO"] == sqf_data["STOP_LOCATION_BORO_NAME"], 
    how="inner"
)
shooting_sqf_on_boro.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [35]:
shooting_sqf_on_prec = shooting_incidents.join(
    sqf_data,
    shooting_incidents["PRECINCT"] == sqf_data["STOP_LOCATION_PRECINCT"], 
    how="inner"
)
shooting_sqf_on_prec.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [48]:
print("\tData Counts\nOriginal Shooting:", shooting_incidents.count())
print("Original SQF:", sqf_data.count())
print("Join On Date:", shooting_sqf_on_date.count())
print("Join On Boro:", shooting_sqf_on_boro.count())
print("Join On Prec:", shooting_sqf_on_prec.count())

	Data Counts
Original Shooting: 27312
Original SQF: 101989
Join On Date: 378340
Join On Boro: 699267344
Join On Prec: 45732875
