# Frequent Itemset for Top 10 Areas

In [None]:
import sys
import cmath as math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Though the following import is not directly being used, it is required
# for 3D projection to work
import sklearn
from mpl_toolkits.mplot3d import Axes3D
from sklearn.cluster import KMeans
from sklearn import datasets
import sklearn.metrics as sm
from sklearn.metrics import confusion_matrix, classification_report

## Read Data for All 77 Areas

In [17]:
pickup_location = pd.read_csv('pickup_latitude_longitude.csv')
pickup_location.info()
pickup_location.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9305703 entries, 0 to 9305702
Data columns (total 3 columns):
pickup_community_area    float64
pickup_latitude          float64
pickup_longitude         float64
dtypes: float64(3)
memory usage: 213.0 MB


Unnamed: 0,pickup_community_area,pickup_latitude,pickup_longitude
0,24.0,199.0,510.0
1,8.0,294.0,113.0
2,6.0,686.0,500.0
3,28.0,411.0,545.0
4,56.0,230.0,394.0


In [12]:
pickup_location['pickup_community_area'].unique()

array([24.,  8.,  6., 28., 56., 32., 76.,  7., 77., 34., 22., 33., 59.,
       25., 41., 69.,  4., 60.,  3., 21., 11., 44., 57.,  5., 39., 23.,
       15., 18., 14., 31., 16.,  2., 35., 20., 10., 40., 17.,  1., 13.,
       49., 58., 71., 61., 30., 63., 19., 12., 29., 62., 64., 36., 68.,
       43., 51., 48., 38., 42., 26., 46., 45., 27., 66., 37., 73., 65.,
       52., 50.,  9., 67., 47., 70., nan, 72., 75., 54., 53., 55., 74.])

## Pull Data only for Top 10 Areas

In [9]:
top10_pickup_loc = pickup_location[pickup_location['pickup_community_area'].isin([8,32,28,76,6,7,24,33,56,3,77,22,5,4,41,1,16,2,14,21,11,34,39,35,31,15,10,23,49,13])] 
top10_pickup_loc.info()
top10_pickup_loc.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9249431 entries, 0 to 9305702
Data columns (total 3 columns):
pickup_community_area    float64
pickup_latitude          float64
pickup_longitude         float64
dtypes: float64(3)
memory usage: 282.3 MB


Unnamed: 0,pickup_community_area,pickup_latitude,pickup_longitude
0,24.0,199.0,510.0
1,8.0,294.0,113.0
2,6.0,686.0,500.0
3,28.0,411.0,545.0
4,56.0,230.0,394.0


In [10]:
top10_pickup_loc['pickup_community_area'].unique()

array([24.,  8.,  6., 28., 56., 32., 76.,  7., 77., 34., 22., 33., 41.,
        4.,  3., 21., 11.,  5., 39., 23., 15., 14., 31., 16.,  2., 35.,
       10.,  1., 13., 49.])

## Convert each column into a list and Create a tuple

In [13]:
g1 = top10_pickup_loc['pickup_latitude'].values
g2 = top10_pickup_loc['pickup_longitude'].values
location = tuple(zip(g1, g2))

## Populate Frequency for the Freq-Itemset (pickup latitude, longitude)

In [14]:
from pymining import itemmining
relim_input = itemmining.get_relim_input(location)
report = itemmining.relim(relim_input, min_support=2)
report

{frozenset({8.0}): 2,
 frozenset({8.0, 220.0}): 2,
 frozenset({20.0}): 2,
 frozenset({20.0, 378.0}): 2,
 frozenset({66.0}): 2,
 frozenset({66.0, 405.0}): 2,
 frozenset({194.0}): 2,
 frozenset({194.0, 493.0}): 2,
 frozenset({296.0}): 2,
 frozenset({296.0, 569.0}): 2,
 frozenset({298.0}): 2,
 frozenset({298.0, 395.0}): 2,
 frozenset({357.0}): 2,
 frozenset({189.0, 357.0}): 2,
 frozenset({378.0}): 2,
 frozenset({396.0}): 2,
 frozenset({396.0, 430.0}): 2,
 frozenset({446.0}): 2,
 frozenset({137.0, 446.0}): 2,
 frozenset({493.0}): 2,
 frozenset({528.0}): 2,
 frozenset({528.0, 711.0}): 2,
 frozenset({550.0}): 2,
 frozenset({397.0, 550.0}): 2,
 frozenset({569.0}): 2,
 frozenset({579.0}): 2,
 frozenset({333.0, 579.0}): 2,
 frozenset({624.0}): 2,
 frozenset({180.0, 624.0}): 2,
 frozenset({625.0}): 2,
 frozenset({541.0, 625.0}): 2,
 frozenset({703.0}): 2,
 frozenset({465.0, 703.0}): 2,
 frozenset({714.0}): 2,
 frozenset({533.0, 714.0}): 2,
 frozenset({740.0}): 2,
 frozenset({5.0, 740.0}): 2,
 fr