## Imports

In [54]:
import sys
sys.path.append("..")

In [55]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import when, col, count, upper
from datetime import date
from helpers import SnowflakeHelper
import json
import os

In [56]:
snowflake_helper = SnowflakeHelper()
snowflake_config = './../helpers/snowflake_config.json'
session = snowflake_helper.create_snowpark_session(snowflake_config)

[INFO] No schema passed, using default schema SAFEGUARDING_NYC_SCHEMA_BRONZE for the session
[SUCCESS] Config file loaded successfully!
[SUCCESS] Snowspark Session created successfully on schema SAFEGUARDING_NYC_SCHEMA_BRONZE!


## Extracting Data

In [57]:
use_of_force_combined = session.table('SAFEGUARDING_NYC_SCHEMA_GOLD.use_of_force_combined')
sqf = session.table('SAFEGUARDING_NYC_SCHEMA_SILVER.SQF')

In [58]:
use_of_force_combined.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FORCETYPE"        |"INCIDENT PCT"  |"TRI INCIDENT NUMBER"  |"PATROL BOROUGH"  |"OCCURRENCE DATE"  |"FORCE AGAINST MOS"  |"AGE"  |"SUBJECT GENDER"  |"SUBJECT RACE"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Physical Force     |1               |2020020962067          |MANHATTAN         |2020-05-06         |Physical Force       |NULL   |MALE              |BLACK           |
|Physical Force     |1               |2020020962143          |MANHATTAN         |2020-10-11         |Physical Force       |NULL   |MALE              |BLACK           |
|Physical Force     |5               |2020024962080          |MANHATTAN         |2020-06-30         |Physical Force       |NULL   |MALE              |BLACK     

In [59]:
sqf.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [60]:
use_of_force_combined.count()

29723

In [61]:
sqf.count()

101989

In [78]:
# Aggregate use_of_force_combined by INCIDENT PCT
agg_use_of_force = use_of_force_combined.groupBy("INCIDENT PCT").agg(count("*").alias("incident_count"))

# Aggregate sqf by STOP_LOCATION_PRECINCT
agg_sqf = sqf.groupBy("STOP_LOCATION_PRECINCT").agg(count("*").alias("stop_count"))

# Join the aggregated results
use_of_force_and_sqf_agg = agg_use_of_force.join(
    agg_sqf, 
    agg_use_of_force["INCIDENT PCT"] == agg_sqf["STOP_LOCATION_PRECINCT"], 
    how="inner"
)

# View the results
use_of_force_and_sqf_agg.show()

-------------------------------------------------------------------------------
|"INCIDENT PCT"  |"INCIDENT_COUNT"  |"STOP_LOCATION_PRECINCT"  |"STOP_COUNT"  |
-------------------------------------------------------------------------------
|5               |320               |5                         |704           |
|6               |329               |6                         |847           |
|10              |213               |10                        |656           |
|14              |877               |14                        |1342          |
|17              |139               |17                        |363           |
|18              |384               |18                        |972           |
|19              |280               |19                        |1544          |
|33              |327               |33                        |958           |
|34              |366               |34                        |1394          |
|41              |354               |41 

In [77]:
agg_use_of_force.count()

77

## Joining Use Of Forces Tables

### On PRECINCT

In [73]:
use_of_force_and_sqf_on_precinct = use_of_force_combined.join(
    sqf, 
    use_of_force_combined['INCIDENT PCT'] == sqf['STOP_LOCATION_PRECINCT'], 
    how="inner"
)
use_of_force_and_sqf_on_precinct.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [74]:
use_of_force_and_sqf_on_precinct.count()

45736869

### On Borough

In [64]:
use_of_force_and_sqf_on_borough = use_of_force_combined.join(
    sqf, 
    use_of_force_combined['PATROL BOROUGH'] == sqf['STOP_LOCATION_PATROL_BORO_NAME'], 
    how="inner"
)
use_of_force_and_sqf_on_borough.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [65]:
use_of_force_and_sqf_on_borough.count()

725171410

#### On Date

In [66]:
use_of_force_and_sqf_on_date = use_of_force_combined.join(
    sqf, 
    use_of_force_combined['OCCURRENCE DATE'] == sqf['STOP_FRISK_DATE'], 
    how="inner"
)
use_of_force_and_sqf_on_date.show()

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [68]:
use_of_force_and_sqf_on_date.count()

1206145

## Exporting Data to Gold Medallion

In [23]:
table_name = "SAFEGUARDING_NYC_SCHEMA_GOLD.use_of_force_combined"
use_of_force_combined.write.saveAsTable(table_name, mode="overwrite")