## Imports

In [11]:
import sys
sys.path.append("..")

In [12]:
from snowflake.snowpark import Session
from snowflake.snowpark.functions import when, col, count
from datetime import date
from helpers import SnowflakeHelper
import json
import os

In [13]:
snowflake_helper = SnowflakeHelper()
snowflake_config = './../helpers/snowflake_config.json'
session = snowflake_helper.create_snowpark_session(snowflake_config)
session.use_schema("SAFEGUARDING_NYC_SCHEMA_SILVER")

[INFO] No schema passed, using default schema SAFEGUARDING_NYC_SCHEMA_BRONZE for the session
[SUCCESS] Config file loaded successfully!
[SUCCESS] Snowspark Session created successfully on schema SAFEGUARDING_NYC_SCHEMA_BRONZE!


## Extracting Data

In [14]:
use_of_force_incidents = session.table('use_of_force_incidents')
use_of_force_subjects = session.table('use_of_force_subjects')

In [15]:
use_of_force_incidents.show()

--------------------------------------------------------------------------------------------------
|"FORCETYPE"     |"INCIDENT PCT"  |"TRI INCIDENT NUMBER"  |"PATROL BOROUGH"  |"OCCURRENCE DATE"  |
--------------------------------------------------------------------------------------------------
|Physical Force  |75              |2023094962399          |BROOKLYN          |2023-09-21         |
|Physical Force  |40              |2023059962344          |BRONX             |2023-09-13         |
|Physical Force  |45              |2023064962120          |BRONX             |2023-09-13         |
|Physical Force  |121             |2023140962087          |STATEN ISLAND     |2023-09-08         |
|Physical Force  |34              |2023053962127          |MANHATTAN         |2023-08-27         |
|Physical Force  |48              |2023067962183          |BRONX             |2023-08-24         |
|Physical Force  |42              |2023061962210          |BRONX             |2023-08-23         |
|Physical 

In [16]:
use_of_force_subjects.show()

-------------------------------------------------------------------------------------------
|"FORCE AGAINST MOS"  |"TRI INCIDENT NUMBER"  |"AGE"  |"SUBJECT GENDER"  |"SUBJECT RACE"  |
-------------------------------------------------------------------------------------------
|Physical Force       |2020020962067          |NULL   |MALE              |BLACK           |
|Physical Force       |2020020962143          |NULL   |MALE              |BLACK           |
|Physical Force       |2020024962080          |NULL   |MALE              |BLACK           |
|Physical Force       |2020025962038          |NULL   |MALE              |BLACK           |
|Physical Force       |2020025962049          |NULL   |MALE              |BLACK           |
|Physical Force       |2020025962140          |NULL   |MALE              |BLACK           |
|Physical Force       |2020026962081          |NULL   |MALE              |BLACK           |
|Physical Force       |2020032962033          |NULL   |MALE              |BLACK 

## Joining Use Of Forces Tables

In [17]:
use_of_force_combined = use_of_force_incidents.join(
    use_of_force_subjects, 
    use_of_force_incidents['TRI INCIDENT NUMBER'] == use_of_force_subjects['TRI INCIDENT NUMBER'],
    how="inner"
)
use_of_force_combined.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FORCETYPE"        |"INCIDENT PCT"  |"l_d90k_TRI INCIDENT NUMBER"  |"PATROL BOROUGH"  |"OCCURRENCE DATE"  |"FORCE AGAINST MOS"  |"r_cgks_TRI INCIDENT NUMBER"  |"AGE"  |"SUBJECT GENDER"  |"SUBJECT RACE"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Physical Force     |1               |2020020962067                 |MANHATTAN         |2020-05-06         |Physical Force       |2020020962067                 |NULL   |MALE              |BLACK           |
|Physical Force     |1               |2020020962143                 |MANHATTAN         |2020-10-11         |Physical Force       |2020020962143                 |NULL   |MALE   

In [19]:
use_of_force_combined = use_of_force_combined.drop(['r_cgks_TRI INCIDENT NUMBER'])
use_of_force_combined = use_of_force_combined.withColumnRenamed("l_d90k_TRI INCIDENT NUMBER", "TRI INCIDENT NUMBER")
use_of_force_combined.show()

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FORCETYPE"        |"INCIDENT PCT"  |"TRI INCIDENT NUMBER"  |"PATROL BOROUGH"  |"OCCURRENCE DATE"  |"FORCE AGAINST MOS"  |"AGE"  |"SUBJECT GENDER"  |"SUBJECT RACE"  |
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
|Physical Force     |1               |2020020962067          |MANHATTAN         |2020-05-06         |Physical Force       |NULL   |MALE              |BLACK           |
|Physical Force     |1               |2020020962143          |MANHATTAN         |2020-10-11         |Physical Force       |NULL   |MALE              |BLACK           |
|Physical Force     |5               |2020024962080          |MANHATTAN         |2020-06-30         |Physical Force       |NULL   |MALE              |BLACK     

In [20]:
use_of_force_combined.count()

29723

## Exporting Data to Gold Medallion

In [10]:
table_name = "SAFEGUARDING_NYC_SCHEMA_GOLD.use_of_force_combined"
use_of_force_combined.write.saveAsTable(table_name, mode="overwrite")