In [25]:
import seaborn as sn
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv
from pprint import pprint
import random
from sklearn import preprocessing
from __future__ import division
import pickle
from math import sqrt 
from pprint import pprint
import lxml

# Load data from "uncorrupted" part of dataset
filename = '../data/parking_citations_uncorrupted.csv'
random.seed(42)

In [3]:
# Get headers and data types
columns = []
with open(filename) as f:
    reader = csv.reader(f)
    columns = reader.next()
    
names = [col.replace(" ","_") for col in columns]

dtypes = {
    'Ticket_number': 'unicode' ,
    'Issue_Date': 'unicode' ,
    'Issue_Time':'unicode'  ,
    'Meter_Id': 'unicode',
    'Marked_Time': 'unicode' ,
    'RP_State_Plate': 'unicode',
    'Plate_Expiry_Date': 'unicode' ,
    'VIN': 'unicode' ,
    'Make': 'unicode' ,
    'Body Style': 'unicode' ,
    'Color': 'unicode' ,
    'Location': 'unicode' ,
    'Route': 'unicode' ,
    'Agency': 'unicode' ,
    'Violation_Code': 'unicode' ,
    'Violation_Description': 'unicode' ,
    'Fine_amount': np.float64 ,
    'Latitude': np.float64 ,
    'Longitude': np.float64 ,
}

In [75]:
# Load data from file
citations = pd.read_csv(
    filename ,
    header = 0 ,
    names = names ,
    dtype = dtypes ,
)

In [76]:
# start html document
from lxml.builder import E
from lxml import html, etree
doc = E.html()


In [78]:
# Create picklists for categorical variables
head = E.head()

cat_vars = [
    'Meter_Id' ,
    'RP_State_Plate' ,
    'Violation_code' ,
    'Violation_Description' ,
    'Body_Style' ,
    'Color' ,
    'Location' ,
    'Route' ,
    'Agency' ,
]

for c in cat_vars:
    unique = citations[c][citations[c].notnull()].unique().tolist()
    datalist = E.datalist(id=c)
    for val in unique:
        opt = E.option(value=val)
        datalist.append(opt)
    head.append(datalist)

doc.append(head)

In [79]:
# Create input features
body = E.body()
form = E.form(action='/predict' , method='post')

variables = [(u'Issue_Date','date'), (u'Issue_time','time'), (u'Meter_Id','list'),
       (u'Marked_Time','time'), (u'RP_State_Plate','list'), (u'Plate_Expiry_Date','date'),
       (u'Body_Style','list'),(u'Color','list'), (u'Location','list'), (u'Route','list'), (u'Agency','list'),
       (u'Violation_code','list'), (u'Violation_Description','list'), (u'Fine_amount','number'),
       (u'Latitude','number'), (u'Longitude','number')]

for v in variables:
    label = E.label(v[0])
    
    if v[0] in cat_vars:
        i = E.input(list=v[0])
    else:
        i = E.input(type=v[1] , name=v[0])
    form.append(label)
    form.append(i)
    
submit = E.input(type='submit' , value='Probability Top 25 Make')

body.append(form)
doc.append(body)


In [80]:
result = etree.tostring(doc,
                        xml_declaration=True,
                        doctype='<!DOCTYPE html>',
                        encoding='utf-8',
                        pretty_print=True)

In [81]:
with open('../predict.html' , 'w') as f:
    f.write(result)

In [139]:
citations.iloc[3]

Ticket_number                     1106506435
Issue_Date               2015-12-22T00:00:00
Issue_time                              1105
Meter_Id                                 NaN
Marked_Time                              NaN
RP_State_Plate                            CA
Plate_Expiry_Date                   201701.0
VIN                                      NaN
Make                                    CHRY
Body_Style                                PA
Color                                     GO
Location                     1159 HUNTLEY DR
Route                                   2A75
Agency                                   1.0
Violation_code                        8069AA
Violation_Description       NO STOP/STAND AM
Fine_amount                               93
Latitude                               99999
Longitude                              99999
Name: 3, dtype: object

In [92]:
js = {
'Issue_Date':                '2015-09-15T00:00:00' ,
'Issue_time':                                115,
'Meter_Id':                                  'WF55',
'Marked_Time':                               1145.0,
'RP_State_Plate':                             'CA',
'Plate_Expiry_Date':                    200316.0,
'VIN':                                       9817,
'Make':                                     'CHEV',
'Body_Style':                                'PA',
'Color':                                      'BK',
'Location':                   'GEORGIA ST/OLYMPIC',
'Route':                                   '1FB70',
'Agency':                                    '1.0',
'Violation_code':                          '8069A',
'Violation_Description':    'NO STOPPING/STANDING',
'Fine_amount':                                93,
'Latitude':                                99999,
'Longitude':                               99999,
}

In [93]:
import json
json.dumps(js)

'{"Location": "GEORGIA ST/OLYMPIC", "Issue_Date": "2015-09-15T00:00:00", "Body_Style": "PA", "Violation_code": "8069A", "Plate_Expiry_Date": 200316.0, "Make": "CHEV", "Agency": 1.0, "VIN": 9817, "Violation_Description": "NO STOPPING/STANDING", "Longitude": 99999, "Color": "BK", "Issue_time": 115, "Marked_Time": 1145.0, "Latitude": 99999, "Fine_amount": 93, "RP_State_Plate": "CA", "Route": "1FB70", "Meter_Id": "WF55"}'

In [94]:
import pickle
with open('../app/models/Color_label_encoder.pkl','rb') as f:
    enc = pickle.load(f)

In [97]:
enc.transform(['BK'])

array([6])

In [22]:
from datetime import datetime 

start_epoch = datetime(2000,01,01)
data = {
    "Issue_Date": "2003-09-15T00:00:00" , #T00:00:00
    "Plate_Expiry_Date":"200301" ,
}

issue_dt = data['Issue_Date']
issue_dt = datetime.strptime(issue_dt, "%Y-%m-%dT%M:%H:%S")
issue_dt = (issue_dt - start_epoch).days
data['Issue_Date'] = issue_dt

exp_dt = data['Plate_Expiry_Date']
exp_dt = datetime.strptime(exp_dt , "%Y%m")
exp_dt = (exp_dt - start_epoch).days
data['Plate_Expiry_Date'] = exp_dt

print(data)


{'Plate_Expiry_Date': 1096, 'Issue_Date': 1353}


In [118]:
pd.to_timedelta('2018')

Timedelta('-1 days +23:59:59.999798')

In [145]:
from datetime import datetime
dt = datetime.strptime('200301','%Y%M')
dt = dt.strftime(format="%Y%M%dT00:00:00")
print dt
pd.to_timedelta(dt)

20030101T00:00:00


Timedelta('13909 days 19:01:00')

In [149]:
pd.to_timedelta('20030915T00:00:00')

Timedelta('13910 days 08:35:00')

In [152]:
pd.to_timedelta('20030112T00:00:00')

Timedelta('13909 days 19:12:00')

In [157]:
(pd.to_datetime('20160102') - datetime(2016,01,01)).days

1