# Frequent Itemset for Near North

In [4]:
%matplotlib inline
import math
import pandas as pd
import numpy as np
import scipy as sci
from scipy import stats
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from dateutil.parser import parse

##  Read and Pull  2016 Pickup Latitude and Longitude for Near North(8)

In [5]:
pd.set_option('mode.chained_assignment', None)
# Read the data from each month in 2016 and save them into monthly dataframe called jan, feb, mar,...
jan = pd.read_csv('chicago_taxi_trips_2016_01.csv')
feb = pd.read_csv('chicago_taxi_trips_2016_02.csv')
mar = pd.read_csv('chicago_taxi_trips_2016_03.csv')
apr = pd.read_csv('chicago_taxi_trips_2016_04.csv')
may = pd.read_csv('chicago_taxi_trips_2016_05.csv')
june = pd.read_csv('chicago_taxi_trips_2016_06.csv')
july = pd.read_csv('chicago_taxi_trips_2016_07.csv')
aug = pd.read_csv('chicago_taxi_trips_2016_08.csv')
sept = pd.read_csv('chicago_taxi_trips_2016_09.csv')
octo = pd.read_csv('chicago_taxi_trips_2016_10.csv')
nov = pd.read_csv('chicago_taxi_trips_2016_11.csv')
dec = pd.read_csv('chicago_taxi_trips_2016_12.csv')

# Clean (Remove) the columns and rows that are not applicable, unknown or missing values from each month
near_north_pickup16 = pd.DataFrame()
top_pickup = []
for mo in jan, feb, mar, apr, may, june, july, aug, sept, octo, nov, dec:
    mo = mo[mo['pickup_community_area'].isin([8.0])]  # 8: Near North, 32: Loop, 28: Near West
    mo = mo.drop(['fare','trip_miles','trip_seconds','pickup_community_area','dropoff_community_area','taxi_id','trip_end_timestamp','dropoff_latitude','dropoff_longitude','tips','pickup_census_tract', 'dropoff_census_tract','tolls','extras','trip_total','payment_type','company'], axis=1)
    mo = mo[(mo['pickup_latitude'].notnull()) & (jan['pickup_longitude'].notnull()) ]

    top_pickup.append(mo)

near_north_pickup16 = pd.concat(top_pickup, axis=0, join='outer', ignore_index=True)
near_north_pickup16 = near_north_pickup16.drop('Unnamed: 0', 1)

near_north_pickup16.info()
near_north_pickup16.head()


Boolean Series key will be reindexed to match DataFrame index.



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3215696 entries, 0 to 3215695
Data columns (total 3 columns):
pickup_latitude         float64
pickup_longitude        float64
trip_start_timestamp    object
dtypes: float64(2), object(1)
memory usage: 73.6+ MB


Unnamed: 0,pickup_latitude,pickup_longitude,trip_start_timestamp
0,294.0,113.0,1/13/16 13:30
1,754.0,410.0,1/15/16 23:30
2,161.0,649.0,1/15/16 18:15
3,599.0,346.0,1/15/16 2:45
4,170.0,351.0,1/23/16 17:00


## Convert each column into a list and Create a tuple

In [6]:
g1 = near_north_pickup16['pickup_latitude'].values
g2 = near_north_pickup16['pickup_longitude'].values
location = tuple(zip(g1, g2))
# print(g1)
# print(g2)
# print(location)

## Populate Frequency for the Freq-Itemset (pickup latitude, longitude)

In [7]:
from pymining import itemmining
relim_input = itemmining.get_relim_input(location)
report = itemmining.relim(relim_input, min_support=2)
report

{frozenset({271.0}): 1563,
 frozenset({516.0}): 1563,
 frozenset({271.0, 516.0}): 1563,
 frozenset({344.0}): 1638,
 frozenset({667.0}): 1638,
 frozenset({344.0, 667.0}): 1638,
 frozenset({146.0}): 5413,
 frozenset({414.0}): 5413,
 frozenset({146.0, 414.0}): 5413,
 frozenset({218.0}): 15311,
 frozenset({437.0}): 15311,
 frozenset({218.0, 437.0}): 15311,
 frozenset({207.0}): 44602,
 frozenset({239.0}): 44602,
 frozenset({207.0, 239.0}): 44602,
 frozenset({130.0}): 47961,
 frozenset({532.0}): 47961,
 frozenset({130.0, 532.0}): 47961,
 frozenset({204.0}): 50205,
 frozenset({474.0}): 50205,
 frozenset({204.0, 474.0}): 50205,
 frozenset({733.0}): 50873,
 frozenset({767.0}): 50873,
 frozenset({733.0, 767.0}): 50873,
 frozenset({366.0}): 63784,
 frozenset({695.0}): 63784,
 frozenset({366.0, 695.0}): 63784,
 frozenset({511.0}): 66715,
 frozenset({526.0}): 66715,
 frozenset({511.0, 526.0}): 66715,
 frozenset({453.0}): 69469,
 frozenset({454.0}): 69469,
 frozenset({453.0, 454.0}): 69469,
 frozens

In [1]:
import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in('jmcd', 'uQQnXI4uR6ZLhnDLXvhu')

trace1 = {
  "x": ["555344"], 
  "y": ["(346, 599)"], 
  "marker": {"color": "rgb(241, 194, 50)"}, 
  "name": "555.3k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d556a8"
}
trace2 = {
  "x": ["456092"], 
  "y": ["(210, 470)"], 
  "marker": {"color": "rgb(147, 196, 125)"}, 
  "name": "456.0k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "961bc8"
}
trace3 = {
  "x": ["378455"], 
  "y": ["(410, 754)"], 
  "marker": {"color": "rgb(246, 178, 107)"}, 
  "name": "378.5k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "a5c691"
}
trace4 = {
  "x": ["282897"], 
  "y": ["(206, 688)"], 
  "marker": {"color": "rgb(204, 65, 37)"}, 
  "name": "282.9k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d0deb7"
}
trace5 = {
  "x": ["239768"], 
  "y": ["(113, 294)"], 
  "marker": {"color": "rgb(74, 134, 232)"}, 
  "name": "239.8k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "19ff92"
}
trace6 = {
  "x": ["238512"], 
  "y": ["(419, 615)"], 
  "marker": {"color": "rgb(102, 102, 102)"}, 
  "name": "238.5k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "776091"
}
trace7 = {
  "x": ["188739"], 
  "y": ["(161, 649)"], 
  "marker": {"color": "rgb(166, 77, 121)"}, 
  "name": "188.7k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace8 = {
  "x": ["176773"], 
  "y": ["(167, 754)"], 
  "marker": {"color": "rgb(101, 71, 132)"}, 
  "name": "176.8k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace9 = {
  "x": ["176125"], 
  "y": ["(170, 351)"], 
  "marker": {"color": "rgb(66, 97, 129)"}, 
  "name": "176.1k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace10 = {
  "x": ["105457"], 
  "y": ["(413, 636)"], 
  "marker": {"color": "rgb(111, 60, 197)"}, 
  "name": "105.5k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace11 = {
  "x": ["69469"], 
  "y": ["(453, 454)"], 
  "marker": {"color": "rgb(123, 52, 98)"}, 
  "name": "69.5k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "776091"
}
trace12 = {
  "x": ["66715"], 
  "y": ["(511, 526)"], 
  "marker": {"color": "rgb(152, 27, 99)"}, 
  "name": "66.7k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace13 = {
  "x": ["63784"], 
  "y": ["(366, 695)"], 
  "marker": {"color": "rgb(56, 46, 134)"}, 
  "name": "63.8k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace14 = {
  "x": ["50873"], 
  "y": ["(733, 767)"], 
  "marker": {"color": "rgb(88, 30, 47)"}, 
  "name": "50.8k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
trace15 = {
  "x": ["50205"], 
  "y": ["(204, 474)"], 
  "marker": {"color": "rgb(201, 124, 60)"}, 
  "name": "50.2k", 
  "orientation": "h", 
  "type": "bar", 
  "uid": "d8b4fd"
}
data = Data([trace15, trace14, trace13, trace12, trace11, trace10, trace9, trace8, trace7, trace6, trace5, trace4, trace3, trace2, trace1])
layout = {
  "annotations": [
    {
      "x": 0.1, 
      "y": 0.3, 
      "align": "center", 
      "arrowcolor": "rgba(68, 68, 68, 0)", 
      "arrowhead": 1, 
      "arrowsize": 1, 
      "arrowwidth": 0, 
      "ax": -147, 
      "ay": 159.5, 
      "bgcolor": "rgba(0,0,0,0)", 
      "bordercolor": "", 
      "borderpad": 1, 
      "borderwidth": 1, 
      "font": {
        "color": "", 
        "family": "", 
        "size": 0
      }, 
      "opacity": 1, 
      "showarrow": True, 
      "textangle": 0, 
      "xanchor": "auto", 
      "xref": "paper", 
      "yanchor": "auto", 
      "yref": "paper"
    }
  ], 
  "autosize": False, 
  "bargap": 0.2, 
  "bargroupgap": 0, 
  "barmode": "group", 
  "boxgap": 0.3, 
  "boxgroupgap": 0.3, 
  "boxmode": "overlay", 
  "dragmode": "zoom", 
  "font": {
    "color": "#444", 
    "family": "\"Open sans\", verdana, arial, sans-serif", 
    "size": 12
  }, 
  "height": 600, 
  "hidesources": False, 
  "hovermode": "x", 
  "legend": {
    "x": 0.767953667954, 
    "y": 0.746666666667, 
    "bgcolor": "rgba(255, 255, 255, 0)", 
    "bordercolor": "#444", 
    "borderwidth": 0, 
    "font": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "traceorder": "reversed", 
    "xanchor": "left", 
    "yanchor": "top"
  }, 
  "margin": {
    "r": 80, 
    "t": 100, 
    "autoexpand": True, 
    "b": 125, 
    "l": 200, 
    "pad": 0
  }, 
  "paper_bgcolor": "rgb(240, 240, 240)", 
  "plot_bgcolor": "rgb(240, 240, 240)", 
  "separators": ".,", 
  "showlegend": True, 
  "title": "<br>2016 Frequency of Top 15 Pickup Locations in Near North Side", 
  "titlefont": {
    "color": "", 
    "family": "", 
    "size": 0
  }, 
  "width": 800, 
  "xaxis": {
    "anchor": "y", 
    "autorange": True, 
    "autotick": True, 
    "domain": [0, 1], 
    "dtick": 5, 
    "exponentformat": "B", 
    "gridcolor": "#eee", 
    "gridwidth": 1, 
    "linecolor": "#444", 
    "linewidth": 1, 
    "mirror": False, 
    "nticks": 0, 
    "overlaying": False, 
    "position": 0, 
    "range": [0, 17.7894736842], 
    "rangemode": "normal", 
    "showexponent": "all", 
    "showgrid": False, 
    "showline": False, 
    "showticklabels": True, 
    "tick0": 0, 
    "tickangle": "auto", 
    "tickcolor": "#444", 
    "tickfont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "ticklen": 5, 
    "ticks": "", 
    "tickwidth": 1, 
    "title": "Frequency of Pickup Latitude and Longitide", 
    "titlefont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "type": "linear", 
    "zeroline": False, 
    "zerolinecolor": "#444", 
    "zerolinewidth": 1
  }, 
  "yaxis": {
    "anchor": "x", 
    "autorange": True, 
    "autotick": True, 
    "domain": [0, 1], 
    "dtick": 1, 
    "exponentformat": "B", 
    "gridcolor": "#eee", 
    "gridwidth": 1, 
    "linecolor": "#444", 
    "linewidth": 1, 
    "mirror": False, 
    "nticks": 0, 
    "overlaying": False, 
    "position": 0, 
    "range": [-0.5, 19.5], 
    "rangemode": "normal", 
    "showexponent": "all", 
    "showgrid": False, 
    "showline": False, 
    "showticklabels": True, 
    "tick0": 0, 
    "tickangle": "auto", 
    "tickcolor": "#444", 
    "tickfont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "ticklen": 5, 
    "ticks": "", 
    "tickwidth": 1, 
    "title": "", 
    "titlefont": {
      "color": "", 
      "family": "", 
      "size": 0
    }, 
    "type": "category", 
    "zeroline": False, 
    "zerolinecolor": "#444", 
    "zerolinewidth": 1
  }
}
fig = Figure(data=data, layout=layout)
plot_url = py.plot(fig)

## A set of unique pickup latitude

In [8]:
# my_list = near_north_pickup16["pickup_latitude"].values
# uniqueVals = np.unique(my_list)

# uniqueVals.dtype
uniqueVals = near_north_pickup16["pickup_latitude"].unique()
print(uniqueVals)


[294. 754. 161. 599. 170. 688. 210. 454. 419. 207. 167. 695. 474. 767.
 413. 218. 130. 526. 414. 516. 667.]


## A set of unique pickup longitude

In [9]:
my_list1 = near_north_pickup16["pickup_longitude"].values
uniqueVals1 = np.unique(my_list1)
print(uniqueVals1)

[113. 146. 204. 206. 239. 271. 344. 346. 351. 366. 410. 437. 453. 470.
 511. 532. 615. 636. 649. 733. 754.]
