## Course Project

In [1]:
import pandas as pd
from apyori import apriori, load_transactions
from datetime import datetime

In [2]:
df = pd.read_csv('crime.csv')

## Data Preprocessing
### Data Cleaning


In [3]:
# Only focus on IS_CRIME
df = df.loc[df['IS_CRIME'] == True]

### Data Reduction

In [4]:
# For  Apriori: OFFENSE_CATEGORY_ID, NEIGHBORHOOD_ID
df = df[["OFFENSE_CATEGORY_ID", "NEIGHBORHOOD_ID" ]]

## Frequent Patterns
###  Apriori Algorithm 

In [5]:
association_rules = apriori(df.values[0:10],min_support = 0.02, min_confidence = 0.1, min_lift = 3)
association_results = list(association_rules)

In [6]:
results = []
for item in association_results:
    
    # first index of the inner list
    # Contains base item and add item
    pair = item[0] 
    items = [x for x in pair]
    
    value0 = str(items[0])
    value1 = str(items[1])

    #second index of the inner list
    value2 = str(item[1])[:7]

    #third index of the list located at 0th
    #of the third index of the inner list

    value3 = str(item[2][0][2])[:7]
    value4 = str(item[2][0][3])[:7]
    
    rows = (value0, value1,value2,value3,value4)
    results.append(rows)
    
labels = ['Crime','Neighborhood','Support','Confidence','Lift']
pretty_print = pd.DataFrame.from_records(results, columns = labels)

print(pretty_print)

                        Crime      Neighborhood Support Confidence     Lift
0              city-park-west  all-other-crimes     0.1    0.33333  3.33333
1                   montbello  all-other-crimes     0.1    0.33333  3.33333
2                       speer  all-other-crimes     0.1    0.33333  3.33333
3                     larceny           belcaro     0.1        1.0  3.33333
4                     larceny      cherry-creek     0.1        1.0  3.33333
5                   goldsmith      drug-alcohol     0.1        1.0     10.0
6  gateway-green-valley-ranch           larceny     0.1        1.0  3.33333
7    theft-from-motor-vehicle          highland     0.1        1.0  3.33333
8    theft-from-motor-vehicle        villa-park     0.1    0.33333  3.33333
9    theft-from-motor-vehicle         wellshire     0.1    0.33333  3.33333


### Apriori Algorithm Version 2
https://travis-ci.org/asaini/Apriori.svg?branch=master

#### Data Reduction

In [19]:
df2 = df[["OFFENSE_CATEGORY_ID", "NEIGHBORHOOD_ID","FIRST_OCCURRENCE_DATE"]]
df2 = df2[df2.OFFENSE_CATEGORY_ID != "all-other-crimes"]

df2['DATE'], df2['TIME'] = df2['FIRST_OCCURRENCE_DATE'].str.split(' ', 1).str
df2['DATE'] = pd.to_datetime(df2['DATE'])
df2['DAY_OF_WEEK'] = df2['DATE'].dt.day_name()


In [20]:
test2 = df2[['DAY_OF_WEEK', 'NEIGHBORHOOD_ID','OFFENSE_CATEGORY_ID']]
test2 = test2.sample(1000)
test2.to_csv('test2.csv')

In [22]:
!python apriori.py -f 'test2.csv' -s 0.0012

item: ('Monday', 'regis') , 0.002
item: ('Thursday', 'university-park') , 0.002
item: ('Sunday', 'capitol-hill') , 0.002
item: ('sloan-lake', 'Friday') , 0.002
item: ('university-hills', 'Sunday') , 0.002
item: ('public-disorder', 'union-station') , 0.002
item: ('auto-theft', 'gateway-green-valley-ranch') , 0.002
item: ('cherry-creek', 'Wednesday') , 0.002
item: ('east-colfax', 'Sunday') , 0.002
item: ('cbd', 'aggravated-assault') , 0.002
item: ('overland', 'other-crimes-against-persons') , 0.002
item: ('Monday', 'north-capitol-hill') , 0.002
item: ('highland', 'Sunday') , 0.002
item: ('drug-alcohol', 'south-park-hill') , 0.002
item: ('university-park', 'larceny') , 0.002
item: ('northeast-park-hill', 'Tuesday') , 0.002
item: ('Sunday', 'west-colfax') , 0.002
item: ('robbery', 'capitol-hill') , 0.002
item: ('public-disorder', 'goldsmith') , 0.002
item: ('Monday', 'union-station') , 0.002
item: ('ruby-hill', 'Tuesday') , 0.002
item: ('Sunday', 'virginia-village') , 