In [1]:
## imports
import pandas as pd
import numpy as np
import plotnine
from plotnine import *
import random

## print multiple things from same cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from datetime import datetime, timedelta

## Load data

In [3]:
## load data on 2020 crimes in DC
df = dc_crim_2020 = pd.read_csv("https://opendata.arcgis.com/datasets/f516e0dd7b614b088ad781b0c4002331_2.csv")

## create report_dt column
df['report_dt'] = pd.to_datetime(df.REPORT_DAT)

QUESTIONS FROM CLASS

In [18]:
for i, sub_df in df.groupby("WARD"):
    break

In [22]:
sub_df["WARD"] #all equal to 1 bc grouped by this

13       1
28       1
34       1
46       1
48       1
        ..
27872    1
27873    1
27888    1
27891    1
27908    1
Name: WARD, Length: 3542, dtype: int64

In [30]:
results = []

for i, sub_df in df.groupby("WARD"):
    mean_sum = sub_df.X.mean() + sub_df.Y.mean()
    results.append([i+1, mean_sum])

results

## so after doing it in the for loop-->you see what you have to put in the function

df.groupby("WARD").apply( lambda sub_df:  sub_df.X.mean() + sub_df.Y.mean() )

[[2, -38.1065400923547],
 [3, -38.133017308246316],
 [4, -38.133762748409644],
 [5, -38.06723019223189],
 [6, -38.06920232851738],
 [7, -38.10806279976277],
 [8, -38.05873302914667],
 [9, -38.1384608523579]]



WARD
1   -38.106540
2   -38.133017
3   -38.133763
4   -38.067230
5   -38.069202
6   -38.108063
7   -38.058733
8   -38.138461
dtype: float64

## Warm-up Demo

In [8]:
%%time
for i in range(df.shape[0]):
    r = df.iloc[i]
    r.X + r.Y

CPU times: user 470 ms, sys: 3.46 ms, total: 473 ms
Wall time: 471 ms


In [10]:
%%time
for i,r in df.iterrows():
    r.X + r.Y

CPU times: user 290 ms, sys: 10.9 ms, total: 300 ms
Wall time: 298 ms


In [12]:
%%time
df.apply(lambda r: r.X + r.Y, axis = 1)

CPU times: user 122 ms, sys: 10.3 ms, total: 132 ms
Wall time: 130 ms


0       -38.083572
1       -38.171101
2       -38.091828
3       -38.066249
4       -38.062885
           ...    
27926   -38.176379
27927   -38.108333
27928   -38.073717
27929   -38.132576
27930   -38.123181
Length: 27931, dtype: float64

In [14]:
%%time
## Super fast, but only works with built-in numpy functions.
df.X + df.Y

CPU times: user 674 μs, sys: 327 μs, total: 1 ms
Wall time: 936 μs


0       -38.083572
1       -38.171101
2       -38.091828
3       -38.066249
4       -38.062885
           ...    
27926   -38.176379
27927   -38.108333
27928   -38.073717
27929   -38.132576
27930   -38.123181
Length: 27931, dtype: float64

# Practice

In [32]:
## define crimes to look for and crimes to look within
## CCN is Central Complaint Number: https://go.mpdconline.com/GO/GO_401_01.pdf
CCN_examples = ['20165648', '20123250']
C_Tar = C_Target = crimes_lookfor = df[df.CCN.astype(str).isin(CCN_examples)][['CCN', 'WARD', 'OFFENSE', 'report_dt']]
C_Oth = C_Other  = other_crimes = df[~df.CCN.astype(str).isin(CCN_examples)]

## print crimes_lookfor
C_Tar.head()
# other_crimes.head()
C_Oth.head()

## using these two data frames to search for these two rows in tehse rows of others and use it to extract all the overlapping matches

Unnamed: 0,CCN,WARD,OFFENSE,report_dt
6347,20165648,6,MOTOR VEHICLE THEFT,2020-11-20 02:25:50+00:00
17857,20123250,2,MOTOR VEHICLE THEFT,2020-08-29 05:00:25+00:00


Unnamed: 0,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
0,-76.98754,38.903968,20008120,2020/01/14 15:49:55+00,DAY,OTHERS,THEFT F/AUTO,1200 - 1299 BLOCK OF NEAL STREET NE,401081.0,137345.0,...,8802.0,Precinct 77,38.90396,-76.987537,,2020/01/14 15:04:04+00,,673713315,,2020-01-14 15:49:55+00:00
1,-77.00484,38.833739,20008192,2020/01/14 19:43:04+00,DAY,GUN,ASSAULT W/DANGEROUS WEAPON,100 - 165 BLOCK OF MISSISSIPPI AVENUE SE,399580.0,129549.0,...,9803.0,Precinct 124,38.833731,-77.004837,,2020/01/14 17:17:37+00,,673713316,,2020-01-14 19:43:04+00:00
2,-76.999472,38.907644,20008246,2020/01/14 20:09:52+00,EVENING,OTHERS,THEFT/OTHER,400 - 499 BLOCK OF MORSE STREET NE,400046.0,137753.0,...,8803.0,Precinct 76,38.907636,-76.99947,,2020/01/10 23:00:00+00,2020/01/14 18:00:44+00,673713317,,2020-01-14 20:09:52+00:00
3,-76.982729,38.91648,20008501,2020/01/15 03:28:53+00,EVENING,OTHERS,THEFT/OTHER,1500 - 1599 BLOCK OF NEW YORK AVENUE NE,401498.0,138734.0,...,8803.0,Precinct 76,38.916472,-76.982727,,2020/01/15 02:31:24+00,,673713318,,2020-01-15 03:28:53+00:00
4,-76.980077,38.917191,20009148,2020/01/16 04:12:47+00,MIDNIGHT,OTHERS,THEFT/OTHER,1600 - 1779 BLOCK OF NEW YORK AVENUE NE,401728.0,138813.0,...,8803.0,Precinct 76,38.917184,-76.980074,,2020/01/16 01:15:36+00,2020/01/16 01:19:54+00,673713319,,2020-01-16 04:12:47+00:00


**Task**: we have two crimes we want to look for. We want to look in the remaining crime reports for crime reports that are:

- Located in the same ward as the two focal crimes
- Reported at the same time as the focal crime or up to 1000 minutes later (changed from slides which stated 20 mins since crime ids changed since last time so this long bandwidth helps us find matches!)

Solutions compare two ways to solve:

- Using a for loop
- Using a function

## 1. Loop approach

In [34]:
## create empty container to store results 
store_matches = {}

## loop through two example crimes
for i in range(C_Tar.shape[0]): # same as shape
    
    ## extract row
    r = one_row = C_Tar.iloc[i]

    ## first, subset to crimes in same ward
    same_wards = C_Oth[C_Oth.WARD == r.WARD]
    
    ## second, with those same-ward crimes, construct indicator for reported within 20 minutes
    ## (interpreting as after but could do either)
    ### substep: get time cutoff
    CUTOFF = r.report_dt +  timedelta(minutes=1200)
    
    ### substep: use that to subset
    same_wards_sametime = same_wards[(same_wards.report_dt >= r.report_dt) & 
                                    (same_wards.report_dt <= CUTOFF)].copy()
    
    ## third, store the results
    store_matches[str(one_row.CCN)] = same_wards_sametime
    
## finally, concatenate results into one df
all_matches = pd.concat(store_matches)
all_matches.head()

Unnamed: 0,Unnamed: 1,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
20165648,3826,-76.997316,38.904969,20165805,2020/11/20 15:06:04+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 22:30:39+00,2020/11/20 03:00:43+00,673927169,,2020-11-20 15:06:04+00:00
20165648,4348,-76.999518,38.891483,20165709,2020/11/20 04:27:36+00,MIDNIGHT,OTHERS,MOTOR VEHICLE THEFT,100 - 199 BLOCK OF 5TH STREET NE,400042.0,135959.0,...,8200.0,Precinct 89,38.891475,-76.999516,,2020/11/20 03:02:27+00,,673927939,,2020-11-20 04:27:36+00:00
20165648,5265,-76.994365,38.900203,20165859,2020/11/20 15:37:59+00,DAY,OTHERS,THEFT/OTHER,800 - 899 BLOCK OF H STREET NE,400489.0,136927.0,...,8402.0,Precinct 82,38.900195,-76.994363,,2020/11/13 22:00:23+00,2020/11/14 00:00:13+00,673929709,,2020-11-20 15:37:59+00:00
20165648,7027,-77.001316,38.898915,20165932,2020/11/20 18:56:18+00,DAY,OTHERS,THEFT F/AUTO,300 - 399 BLOCK OF G STREET NE,399886.0,136784.0,...,8301.0,Precinct 83,38.898907,-77.001314,,2020/11/20 15:30:02+00,2020/11/20 18:25:35+00,673959939,,2020-11-20 18:56:18+00:00
20165648,7179,-76.997328,38.885141,20165798,2020/11/20 12:46:32+00,DAY,OTHERS,THEFT/OTHER,600 - 669 BLOCK OF PENNSYLVANIA AVENUE SE,400232.0,135255.0,...,6500.0,Precinct 89,38.885133,-76.997326,CAPITOL HILL,2020/11/19 23:43:15+00,,673959075,,2020-11-20 12:46:32+00:00


# 1.5 Iterrow Approach

In [36]:
## create empty container to store results 
store_matches = {}

## loop through two example crimes
for i, r in C_Tar.iterrows(): # same as 

    ## subset to crimes in same ward
    same_wards = C_Oth[C_Oth.WARD == r.WARD]
    
    ## second, with those same-ward crimes, construct indicator for reported within 20 minutes
    ## (interpreting as after but could do either)
    ### substep: get time cutoff
    CUTOFF = r.report_dt +  timedelta(minutes=1200)
    
    ### substep: use that to subset
    same_wards_sametime = same_wards[(same_wards.report_dt >= r.report_dt) & 
                                    (same_wards.report_dt <= CUTOFF)].copy()
    
    ## third, store the results
    store_matches[str(one_row.CCN)] = same_wards_sametime
    
## finally, concatenate results into one df
all_matches = pd.concat(store_matches)
all_matches.head()

Unnamed: 0,Unnamed: 1,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
20123250,3449,-77.050528,38.913354,20123422,2020/08/29 16:45:57+00,DAY,OTHERS,THEFT F/AUTO,2200 - 2399 BLOCK OF DECATUR PLACE NW,395618.0,138388.0,...,4100.0,Precinct 13,38.913346,-77.050526,,2020/08/26 22:00:29+00,2020/08/27 12:00:51+00,673926478,,2020-08-29 16:45:57+00:00
20123250,4075,-77.039824,38.905656,20123507,2020/08/29 22:04:46+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1700 - 1779 BLOCK OF M STREET NW,396546.0,137533.0,...,10700.0,Precinct 17,38.905648,-77.039822,GOLDEN TRIANGLE,2020/08/27 19:01:24+00,2020/08/29 19:00:05+00,673927464,,2020-08-29 22:04:46+00:00
20123250,21849,-77.021929,38.899129,20123419,2020/08/29 17:15:19+00,DAY,OTHERS,THEFT/OTHER,700 - 799 BLOCK OF 7TH STREET NW,398098.0,136808.0,...,5801.0,Precinct 129,38.899121,-77.021926,DOWNTOWN,2020/08/29 16:05:40+00,2020/08/29 16:08:33+00,674197732,,2020-08-29 17:15:19+00:00
20123250,22244,-77.038491,38.913727,20401318,2020/08/29 14:29:59+00,DAY,OTHERS,THEFT/OTHER,1724 - 1799 BLOCK OF 17TH STREET NW,396662.0,138429.0,...,5302.0,Precinct 15,38.91372,-77.038489,,2020/08/28 20:55:00+00,2020/08/28 21:05:00+00,674200456,,2020-08-29 14:29:59+00:00
20123250,23901,-77.040091,38.909646,20123389,2020/08/29 16:05:18+00,DAY,OTHERS,THEFT F/AUTO,1700 - 1799 BLOCK OF P STREET NW,396523.0,137976.0,...,5303.0,Precinct 15,38.909638,-77.040089,,2020/08/28 22:00:23+00,2020/08/29 08:00:27+00,674222454,,2020-08-29 16:05:18+00:00


In [83]:
for i, r in C_Tar.iterrows():
    break

i #to see the index
r #all the columns associated with this data (can treat r as a dictionary where these are the keys)
r["CCN"]

## treat all the columbns as dictionary keys and extract from there like using r 

## looking at the sub things you are wokring on and printing them out is helpful

6347

CCN                           20165648
WARD                                 6
OFFENSE            MOTOR VEHICLE THEFT
report_dt    2020-11-20 02:25:50+00:00
Name: 6347, dtype: object

20165648

In [85]:
C_Tar  #two rows but you can see how i and r are in it

Unnamed: 0,CCN,WARD,OFFENSE,report_dt
6347,20165648,6,MOTOR VEHICLE THEFT,2020-11-20 02:25:50+00:00
17857,20123250,2,MOTOR VEHICLE THEFT,2020-08-29 05:00:25+00:00


## 2. Function approach

Practice rewriting the above loop as a function

### 2.1 define the function

In [173]:
store_matches_2 = {}

def find_related_crimes(r): # imagine the function taking in one row as its sole variable
    ## subset C-Other on the ward of C_target
    same_wards = C_Oth[C_Oth.WARD == r.WARD]

    ## create time cutoof of time of crime + 1200 minutes
    CUTOFF = r.report_dt +  timedelta(minutes=1200)

    ## filter the subset by time as well (between time of crie and cutoff)
    same_wards_sametime = same_wards[(same_wards.report_dt >= r.report_dt) & 
                                    (same_wards.report_dt <= CUTOFF)].copy()

    ## store data in dictionary
    store_matches_2[str(r.CCN)] = same_wards_sametime
    #--> this means that the data frame of the same ward of same time is going into the dictionary where the CCN is the key

    #return store_matches_2 don't have to return anything here --> isntead later ou are just printing the store matches
    #bc here otherwise you've been adding all the times that you are returning and then concatenating --> so the way we are doing it takes less time




#YOU DONT WANT TO return pd.concat(store_matches) --> liek doing this after the function call instead of within the function bc if you did t everytime it would take so long bc do it for every row in the C_Target 
#also bc i think wiht the return within the numbers icnrease maybe expoentianlly and tehn adds it all up at the end??

### 2.2 apply it to one of the focal crimes

In [205]:


find_related_crimes(C_Tar.iloc[1])

store_matches_2

pd.concat(store_matches_2)

{'20165648':                X          Y       CCN              REPORT_DAT     SHIFT  \
 3826  -76.997316  38.904969  20165805  2020/11/20 15:06:04+00       DAY   
 4348  -76.999518  38.891483  20165709  2020/11/20 04:27:36+00  MIDNIGHT   
 5265  -76.994365  38.900203  20165859  2020/11/20 15:37:59+00       DAY   
 7027  -77.001316  38.898915  20165932  2020/11/20 18:56:18+00       DAY   
 7179  -76.997328  38.885141  20165798  2020/11/20 12:46:32+00       DAY   
 7180  -76.997316  38.904969  20165803  2020/11/20 14:45:06+00       DAY   
 15446 -77.015554  38.899950  20166039  2020/11/20 22:07:10+00   EVENING   
 15613 -77.005894  38.905167  20165986  2020/11/20 22:17:27+00   EVENING   
 
        METHOD              OFFENSE  \
 3826   OTHERS         THEFT F/AUTO   
 4348   OTHERS  MOTOR VEHICLE THEFT   
 5265   OTHERS          THEFT/OTHER   
 7027   OTHERS         THEFT F/AUTO   
 7179   OTHERS          THEFT/OTHER   
 7180   OTHERS         THEFT F/AUTO   
 15446  OTHERS          THEFT

Unnamed: 0,Unnamed: 1,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
20165648,3826,-76.997316,38.904969,20165805,2020/11/20 15:06:04+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 22:30:39+00,2020/11/20 03:00:43+00,673927169,,2020-11-20 15:06:04+00:00
20165648,4348,-76.999518,38.891483,20165709,2020/11/20 04:27:36+00,MIDNIGHT,OTHERS,MOTOR VEHICLE THEFT,100 - 199 BLOCK OF 5TH STREET NE,400042.0,135959.0,...,8200.0,Precinct 89,38.891475,-76.999516,,2020/11/20 03:02:27+00,,673927939,,2020-11-20 04:27:36+00:00
20165648,5265,-76.994365,38.900203,20165859,2020/11/20 15:37:59+00,DAY,OTHERS,THEFT/OTHER,800 - 899 BLOCK OF H STREET NE,400489.0,136927.0,...,8402.0,Precinct 82,38.900195,-76.994363,,2020/11/13 22:00:23+00,2020/11/14 00:00:13+00,673929709,,2020-11-20 15:37:59+00:00
20165648,7027,-77.001316,38.898915,20165932,2020/11/20 18:56:18+00,DAY,OTHERS,THEFT F/AUTO,300 - 399 BLOCK OF G STREET NE,399886.0,136784.0,...,8301.0,Precinct 83,38.898907,-77.001314,,2020/11/20 15:30:02+00,2020/11/20 18:25:35+00,673959939,,2020-11-20 18:56:18+00:00
20165648,7179,-76.997328,38.885141,20165798,2020/11/20 12:46:32+00,DAY,OTHERS,THEFT/OTHER,600 - 669 BLOCK OF PENNSYLVANIA AVENUE SE,400232.0,135255.0,...,6500.0,Precinct 89,38.885133,-76.997326,CAPITOL HILL,2020/11/19 23:43:15+00,,673959075,,2020-11-20 12:46:32+00:00
20165648,7180,-76.997316,38.904969,20165803,2020/11/20 14:45:06+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 23:45:48+00,2020/11/20 03:00:00+00,673959076,,2020-11-20 14:45:06+00:00
20165648,15446,-77.015554,38.89995,20166039,2020/11/20 22:07:10+00,EVENING,OTHERS,THEFT/OTHER,300 - 363 BLOCK OF MASSACHUSETTS AVENUE NW,398651.0,136899.0,...,5900.0,Precinct 143,38.899942,-77.015552,DOWNTOWN,2020/11/20 17:30:16+00,2020/11/20 22:08:28+00,674035345,,2020-11-20 22:07:10+00:00
20165648,15613,-77.005894,38.905167,20165986,2020/11/20 22:17:27+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1151 - 1199 BLOCK OF 1ST STREET NE,399489.0,137478.0,...,10603.0,Precinct 144,38.905159,-77.005891,NOMA,2020/11/20 20:15:26+00,2020/11/20 21:46:24+00,674037731,,2020-11-20 22:17:27+00:00
20123250,3449,-77.050528,38.913354,20123422,2020/08/29 16:45:57+00,DAY,OTHERS,THEFT F/AUTO,2200 - 2399 BLOCK OF DECATUR PLACE NW,395618.0,138388.0,...,4100.0,Precinct 13,38.913346,-77.050526,,2020/08/26 22:00:29+00,2020/08/27 12:00:51+00,673926478,,2020-08-29 16:45:57+00:00
20123250,4075,-77.039824,38.905656,20123507,2020/08/29 22:04:46+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1700 - 1779 BLOCK OF M STREET NW,396546.0,137533.0,...,10700.0,Precinct 17,38.905648,-77.039822,GOLDEN TRIANGLE,2020/08/27 19:01:24+00,2020/08/29 19:00:05+00,673927464,,2020-08-29 22:04:46+00:00


In [207]:

find_related_crimes(C_Tar.iloc[0])

store_matches_2

pd.concat(store_matches_2) ## just to change look of output

{'20165648':                X          Y       CCN              REPORT_DAT     SHIFT  \
 3826  -76.997316  38.904969  20165805  2020/11/20 15:06:04+00       DAY   
 4348  -76.999518  38.891483  20165709  2020/11/20 04:27:36+00  MIDNIGHT   
 5265  -76.994365  38.900203  20165859  2020/11/20 15:37:59+00       DAY   
 7027  -77.001316  38.898915  20165932  2020/11/20 18:56:18+00       DAY   
 7179  -76.997328  38.885141  20165798  2020/11/20 12:46:32+00       DAY   
 7180  -76.997316  38.904969  20165803  2020/11/20 14:45:06+00       DAY   
 15446 -77.015554  38.899950  20166039  2020/11/20 22:07:10+00   EVENING   
 15613 -77.005894  38.905167  20165986  2020/11/20 22:17:27+00   EVENING   
 
        METHOD              OFFENSE  \
 3826   OTHERS         THEFT F/AUTO   
 4348   OTHERS  MOTOR VEHICLE THEFT   
 5265   OTHERS          THEFT/OTHER   
 7027   OTHERS         THEFT F/AUTO   
 7179   OTHERS          THEFT/OTHER   
 7180   OTHERS         THEFT F/AUTO   
 15446  OTHERS          THEFT

Unnamed: 0,Unnamed: 1,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
20165648,3826,-76.997316,38.904969,20165805,2020/11/20 15:06:04+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 22:30:39+00,2020/11/20 03:00:43+00,673927169,,2020-11-20 15:06:04+00:00
20165648,4348,-76.999518,38.891483,20165709,2020/11/20 04:27:36+00,MIDNIGHT,OTHERS,MOTOR VEHICLE THEFT,100 - 199 BLOCK OF 5TH STREET NE,400042.0,135959.0,...,8200.0,Precinct 89,38.891475,-76.999516,,2020/11/20 03:02:27+00,,673927939,,2020-11-20 04:27:36+00:00
20165648,5265,-76.994365,38.900203,20165859,2020/11/20 15:37:59+00,DAY,OTHERS,THEFT/OTHER,800 - 899 BLOCK OF H STREET NE,400489.0,136927.0,...,8402.0,Precinct 82,38.900195,-76.994363,,2020/11/13 22:00:23+00,2020/11/14 00:00:13+00,673929709,,2020-11-20 15:37:59+00:00
20165648,7027,-77.001316,38.898915,20165932,2020/11/20 18:56:18+00,DAY,OTHERS,THEFT F/AUTO,300 - 399 BLOCK OF G STREET NE,399886.0,136784.0,...,8301.0,Precinct 83,38.898907,-77.001314,,2020/11/20 15:30:02+00,2020/11/20 18:25:35+00,673959939,,2020-11-20 18:56:18+00:00
20165648,7179,-76.997328,38.885141,20165798,2020/11/20 12:46:32+00,DAY,OTHERS,THEFT/OTHER,600 - 669 BLOCK OF PENNSYLVANIA AVENUE SE,400232.0,135255.0,...,6500.0,Precinct 89,38.885133,-76.997326,CAPITOL HILL,2020/11/19 23:43:15+00,,673959075,,2020-11-20 12:46:32+00:00
20165648,7180,-76.997316,38.904969,20165803,2020/11/20 14:45:06+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 23:45:48+00,2020/11/20 03:00:00+00,673959076,,2020-11-20 14:45:06+00:00
20165648,15446,-77.015554,38.89995,20166039,2020/11/20 22:07:10+00,EVENING,OTHERS,THEFT/OTHER,300 - 363 BLOCK OF MASSACHUSETTS AVENUE NW,398651.0,136899.0,...,5900.0,Precinct 143,38.899942,-77.015552,DOWNTOWN,2020/11/20 17:30:16+00,2020/11/20 22:08:28+00,674035345,,2020-11-20 22:07:10+00:00
20165648,15613,-77.005894,38.905167,20165986,2020/11/20 22:17:27+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1151 - 1199 BLOCK OF 1ST STREET NE,399489.0,137478.0,...,10603.0,Precinct 144,38.905159,-77.005891,NOMA,2020/11/20 20:15:26+00,2020/11/20 21:46:24+00,674037731,,2020-11-20 22:17:27+00:00
20123250,3449,-77.050528,38.913354,20123422,2020/08/29 16:45:57+00,DAY,OTHERS,THEFT F/AUTO,2200 - 2399 BLOCK OF DECATUR PLACE NW,395618.0,138388.0,...,4100.0,Precinct 13,38.913346,-77.050526,,2020/08/26 22:00:29+00,2020/08/27 12:00:51+00,673926478,,2020-08-29 16:45:57+00:00
20123250,4075,-77.039824,38.905656,20123507,2020/08/29 22:04:46+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1700 - 1779 BLOCK OF M STREET NW,396546.0,137533.0,...,10700.0,Precinct 17,38.905648,-77.039822,GOLDEN TRIANGLE,2020/08/27 19:01:24+00,2020/08/29 19:00:05+00,673927464,,2020-08-29 22:04:46+00:00


### 2.3 Use apply to cover all the other focal crimes

In [209]:


C_Tar.apply(find_related_crimes, axis = 1)

store_matches_2

pd.concat(store_matches_2) ## do this at the end, not within the function bc otherwise 

6347     None
17857    None
dtype: object

{'20165648':                X          Y       CCN              REPORT_DAT     SHIFT  \
 3826  -76.997316  38.904969  20165805  2020/11/20 15:06:04+00       DAY   
 4348  -76.999518  38.891483  20165709  2020/11/20 04:27:36+00  MIDNIGHT   
 5265  -76.994365  38.900203  20165859  2020/11/20 15:37:59+00       DAY   
 7027  -77.001316  38.898915  20165932  2020/11/20 18:56:18+00       DAY   
 7179  -76.997328  38.885141  20165798  2020/11/20 12:46:32+00       DAY   
 7180  -76.997316  38.904969  20165803  2020/11/20 14:45:06+00       DAY   
 15446 -77.015554  38.899950  20166039  2020/11/20 22:07:10+00   EVENING   
 15613 -77.005894  38.905167  20165986  2020/11/20 22:17:27+00   EVENING   
 
        METHOD              OFFENSE  \
 3826   OTHERS         THEFT F/AUTO   
 4348   OTHERS  MOTOR VEHICLE THEFT   
 5265   OTHERS          THEFT/OTHER   
 7027   OTHERS         THEFT F/AUTO   
 7179   OTHERS          THEFT/OTHER   
 7180   OTHERS         THEFT F/AUTO   
 15446  OTHERS          THEFT

Unnamed: 0,Unnamed: 1,X,Y,CCN,REPORT_DAT,SHIFT,METHOD,OFFENSE,BLOCK,XBLOCK,YBLOCK,...,CENSUS_TRACT,VOTING_PRECINCT,LATITUDE,LONGITUDE,BID,START_DATE,END_DATE,OBJECTID,OCTO_RECORD_ID,report_dt
20165648,3826,-76.997316,38.904969,20165805,2020/11/20 15:06:04+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 22:30:39+00,2020/11/20 03:00:43+00,673927169,,2020-11-20 15:06:04+00:00
20165648,4348,-76.999518,38.891483,20165709,2020/11/20 04:27:36+00,MIDNIGHT,OTHERS,MOTOR VEHICLE THEFT,100 - 199 BLOCK OF 5TH STREET NE,400042.0,135959.0,...,8200.0,Precinct 89,38.891475,-76.999516,,2020/11/20 03:02:27+00,,673927939,,2020-11-20 04:27:36+00:00
20165648,5265,-76.994365,38.900203,20165859,2020/11/20 15:37:59+00,DAY,OTHERS,THEFT/OTHER,800 - 899 BLOCK OF H STREET NE,400489.0,136927.0,...,8402.0,Precinct 82,38.900195,-76.994363,,2020/11/13 22:00:23+00,2020/11/14 00:00:13+00,673929709,,2020-11-20 15:37:59+00:00
20165648,7027,-77.001316,38.898915,20165932,2020/11/20 18:56:18+00,DAY,OTHERS,THEFT F/AUTO,300 - 399 BLOCK OF G STREET NE,399886.0,136784.0,...,8301.0,Precinct 83,38.898907,-77.001314,,2020/11/20 15:30:02+00,2020/11/20 18:25:35+00,673959939,,2020-11-20 18:56:18+00:00
20165648,7179,-76.997328,38.885141,20165798,2020/11/20 12:46:32+00,DAY,OTHERS,THEFT/OTHER,600 - 669 BLOCK OF PENNSYLVANIA AVENUE SE,400232.0,135255.0,...,6500.0,Precinct 89,38.885133,-76.997326,CAPITOL HILL,2020/11/19 23:43:15+00,,673959075,,2020-11-20 12:46:32+00:00
20165648,7180,-76.997316,38.904969,20165803,2020/11/20 14:45:06+00,DAY,OTHERS,THEFT F/AUTO,600 - 699 BLOCK OF ORLEANS PLACE NE,400233.0,137456.0,...,10602.0,Precinct 83,38.904961,-76.997314,,2020/11/19 23:45:48+00,2020/11/20 03:00:00+00,673959076,,2020-11-20 14:45:06+00:00
20165648,15446,-77.015554,38.89995,20166039,2020/11/20 22:07:10+00,EVENING,OTHERS,THEFT/OTHER,300 - 363 BLOCK OF MASSACHUSETTS AVENUE NW,398651.0,136899.0,...,5900.0,Precinct 143,38.899942,-77.015552,DOWNTOWN,2020/11/20 17:30:16+00,2020/11/20 22:08:28+00,674035345,,2020-11-20 22:07:10+00:00
20165648,15613,-77.005894,38.905167,20165986,2020/11/20 22:17:27+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1151 - 1199 BLOCK OF 1ST STREET NE,399489.0,137478.0,...,10603.0,Precinct 144,38.905159,-77.005891,NOMA,2020/11/20 20:15:26+00,2020/11/20 21:46:24+00,674037731,,2020-11-20 22:17:27+00:00
20123250,3449,-77.050528,38.913354,20123422,2020/08/29 16:45:57+00,DAY,OTHERS,THEFT F/AUTO,2200 - 2399 BLOCK OF DECATUR PLACE NW,395618.0,138388.0,...,4100.0,Precinct 13,38.913346,-77.050526,,2020/08/26 22:00:29+00,2020/08/27 12:00:51+00,673926478,,2020-08-29 16:45:57+00:00
20123250,4075,-77.039824,38.905656,20123507,2020/08/29 22:04:46+00,EVENING,OTHERS,MOTOR VEHICLE THEFT,1700 - 1779 BLOCK OF M STREET NW,396546.0,137533.0,...,10700.0,Precinct 17,38.905648,-77.039822,GOLDEN TRIANGLE,2020/08/27 19:01:24+00,2020/08/29 19:00:05+00,673927464,,2020-08-29 22:04:46+00:00


In [211]:
## another way
C_Tar.apply(lambda r: find_related_crimes(r), axis = 1)

## there are None s here bc 

6347     None
17857    None
dtype: object

Oddities ( i suppose that make it unique for data science):

- no return
- many global variables

**depending on ur workflow (bc data science is not software engineering), you may just want to explore --> so just using functions as way sto build oher objects

in the data science wrokflow, don't have to respect things but there are flexible way you can use other objects