In [1]:
import os
import sys

try:
    import pyspark
except ImportError:
    import findspark
    findspark.init()
    import pyspark
    
    
# give notebook access to crymepipelines app modules
CRYMEPIPELINES_PATH = '/home/ben/.envs/cc/CrymeClarity/crymepipelines/src'
sys.path.insert(0, CRYMEPIPELINES_PATH)

#build spark session
APP_NAME = 'CRYME_PIPELINE_DEV'
sc = pyspark.SparkContext()
spark = pyspark.sql.SparkSession(sc).builder.appName(APP_NAME).getOrCreate()


In [2]:
# import cyrmepipelines app modules
import csv
from datetime import datetime, timedelta
import os
import pickle as p
import shutil

from shared.objects.samples import SamplesManager
from shared.settings import CF_TRUST_DELAY, START_DATE, cf_conn, cp_conn, TMP_DIR, BIN_DIR
from tasks.base import SparkCrymeTask, NativeCrymeTask
from utils import crime_occ_udf, ts_to_minutes_in_day_udf, ts_to_hour_of_day_udf, ts_to_day_of_week_udf, ts_conv, safety_rel_crimes
from tasks.mixins import SearchForCrimesMixin

In [3]:
crime_incidents = spark.read.parquet(CRYMEPIPELINES_PATH+'/tmp/clean_crime_incidents.parquet')
crime_incidents = crime_incidents.filter(crime_incidents.date_occ > datetime.now().date() - timedelta(days=30))

In [6]:
by_type = crime_incidents.groupBy('premis_desc').agg({'_id': 'count'}).orderBy("count(_id)", ascending=False)
by_type.show(by_type.count(), False)

+---------------------------------------------------------+----------+
|premis_desc                                              |count(_id)|
+---------------------------------------------------------+----------+
|STREET                                                   |2011      |
|SINGLE FAMILY DWELLING                                   |1039      |
|MULTI-UNIT DWELLING (APARTMENT, DUPLEX, ETC)             |772       |
|PARKING LOT                                              |600       |
|SIDEWALK                                                 |433       |
|VEHICLE, PASSENGER/TRUCK                                 |314       |
|OTHER BUSINESS                                           |293       |
|DRIVEWAY                                                 |154       |
|GARAGE/CARPORT                                           |140       |
|RESTAURANT/FAST FOOD                                     |118       |
|PARKING UNDERGROUND/BUILDING                             |75        |
|MARKE