In [1]:
import csv, sqlite3

In [5]:
%%time

# Create the sqlite table in memory
con = sqlite3.connect(":memory:")
cur = con.cursor()

cur.execute("DROP TABLE IF EXISTS parking_citations ;")

cur.execute("""
    CREATE TABLE parking_citations (
        ticket_number VARCHAR(11)  ,
        issue_date DATE ,
        issue_time FLOAT ,
        meter_id VARCHAR(9) ,
        marked_time FLOAT ,
        rp_state_plate  VARCHAR(2) ,
        plate_expiry_date  DATE ,
        VIN  VARCHAR(17) ,
        make VARCHAR(5) ,
        body_style VARCHAR(2) ,
        color VARCHAR(2) ,
        location VARCHAR(35) ,
        route VARCHAR(5) ,
        agency VARCHAR(4),
        violation_code VARCHAR(9) ,
        violation_description VARCHAR(35) ,
        fine_amount FLOAT ,
        latitude FLOAT ,
        longitude FLOAT
    ) ;
""")

with open('../data/parking_citations_uncorrupted.csv','r') as f:
    reader = csv.reader(f)
    for row in reader:
        cur.execute("""INSERT INTO parking_citations (
            ticket_number,
            issue_date ,
            issue_time ,
            meter_id ,
            marked_time ,
            rp_state_plate ,
            plate_expiry_date ,
            VIN ,
            make ,
            body_style ,
            color ,
            location ,
            route ,
            agency ,
            violation_code ,
            violation_description ,
            fine_amount ,
            latitude ,
            longitude
        ) VALUES (
            ? , ? , ? , ? , ? ,
            ? , ? , ? , ? , ? ,
            ? , ? , ? , ? , ? ,
            ? , ? , ? , ?
        )""",row)
        
con.commit()

CPU times: user 32.9 s, sys: 236 ms, total: 33.1 s
Wall time: 33.1 s


In [28]:
%%time 

# Top 25 most common makes
cur.execute("""
    SELECT
        make
        ,COUNT(ticket_number) as count
    FROM
        parking_citations
    GROUP BY make
    ORDER BY count DESC
    LIMIT 25
""")

CPU times: user 9.8 s, sys: 396 ms, total: 10.2 s
Wall time: 10.2 s


<sqlite3.Cursor at 0x7f138b13b650>

In [29]:
%%time

# Most common color for each Make
cur.execute("""
    SELECT
        make,
        color,
        car_count
    FROM
    (
    SELECT 
        make,
        color,
        count(ticket_number) as car_count
    FROM 
        parking_citations
    GROUP BY make,color
    HAVING car_count = 3
    ) a 
    GROUP BY make, color
    HAVING car_count = MAX(car_count)
""")

CPU times: user 11.7 s, sys: 512 ms, total: 12.2 s
Wall time: 12.2 s


<sqlite3.Cursor at 0x7f138b13b650>

In [31]:
%%time

# First ticket issued for each make
cur.execute("""
    SELECT
        make
        ,ticket_number
    FROM 
        parking_citations
    GROUP BY make 
    HAVING issue_date = min(issue_date)
""")

CPU times: user 7.86 s, sys: 676 ms, total: 8.54 s
Wall time: 8.54 s


<sqlite3.Cursor at 0x7f138b13b650>