https://www.kaggle.com/code/donyoe/exploring-42-3m-nyc-parking-tickets

In [1]:
def init_ddb_from_csv(db_filename, tablename, csv_filename, **kwargs):
    """
    Load from the csv file into a DuckDB database.
    
    db_filename: Name of the database
    tablename: Table to load to
    csv_filename: CSV file to load from
    **kwargs: Options for DuckDB's read_csv function, see https://duckdb.org/docs/data/csv/overview
    """
    import duckdb
    duckdb_con = duckdb.connect(db_filename)
    read_csv_args_list = ["'{}'".format(csv_filename)]
    schema = {tablename : {
        "fact" : tablename + "_fact",
        "dimension_tables" : {},
        "col_to_table_map" : {}
    }}
    for key, value in kwargs.items():
        read_csv_args_list.append("{0} = {1}".format(key, value))
    read_csv_args = ','.join(read_csv_args_list)
    sql_stmt = "CREATE TABLE {} AS SELECT * FROM read_csv({}, AUTO_DETECT=TRUE)".format(tablename, read_csv_args)
    print(sql_stmt)
    duckdb_con.sql(sql_stmt)
    table = duckdb_con.table(tablename)
    for col in table.columns:
        schema[tablename]["col_to_table_map"][col] = schema[tablename]["fact"]
    duckdb_con.close()
    return schema

def init_ddb_from_split_csv(db_filename, tablename, split_csv_foldername, **kwargs):
    """
    Load the split csv file into a DuckDB database and expose a view with tablename

    db_filename: Name of the database
    tablename: View to expose giving the impression of a table
    csv_filename: Folder containing the split CSV files
    **kwargs: Options for DuckDB's read_csv function, see https://duckdb.org/docs/data/csv/overview
    """
    import duckdb
    import os
    duckdb_con = duckdb.connect(db_filename)
    schema = {tablename : {
        "fact" : tablename + "_fact",
        "dimension_tables" : {},
        "col_to_table_map" : {}
    }}
    read_csv_args_list = ["AUTO_DETECT = TRUE"]
    for key, value in kwargs.items():
        read_csv_args_list.append("{0} = {1}".format(key, value))
    read_csv_args = ','.join(read_csv_args_list)
    num_dims = 0
    cols = []
    sub_tablenames = []
    for root, dirs, files in os.walk(split_csv_foldername):
        for file in files:
            sub_tablename = tablename + "_" + file.split(".csv")[0]
            sub_tablenames.append(sub_tablename)
            if 'dim' in file:
                num_dims += 1
                # Assuming that dimension tables are named dimx.csv
                dim_no = sub_tablename.split("dim")[1]
                schema[tablename]["dimension_tables"][sub_tablename] = 'p' + dim_no
            full_filename = root + '/' + file
            sql_stmt = "CREATE TABLE {} AS SELECT * FROM read_csv('{}', {})".format(sub_tablename, full_filename, read_csv_args)
            print(sql_stmt)
            duckdb_con.sql(sql_stmt)
            table = duckdb_con.table(sub_tablename)
            for col in table.columns:
                # HACK: not fool proof, the CSV could contain a column starting with letter 'p'
                if col[0] == 'p':
                    continue
                cols.append('"' + col + '"')
                schema[tablename]['col_to_table_map'][col] = sub_tablename

    # Now create a view corresponding to a single original csv file
    join_clauses = []
    for i in range(num_dims):
        join_clause = "{}_fact.p{} = {}_dim{}.p{}".format(tablename, i, tablename, i, i)
        join_clauses.append(join_clause)

    sql_stmt = "CREATE VIEW {} AS SELECT ".format(tablename) + ",".join(cols) + \
        " FROM " + ",".join(sub_tablenames) + " WHERE " + (" AND ").join(join_clauses)
    print(sql_stmt)
    duckdb_con.sql(sql_stmt)
    duckdb_con.close()
    return schema

In [2]:
dbname = "nyc_parking_tickets.db"

# # Default format
tablename = "tickets_2014"
input_file = "nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2014__August_2013___June_2014_.csv"
schema1 = init_ddb_from_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

tablename = "tickets_2015"
input_file = "nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2015.csv"
schema2 = init_ddb_from_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

tablename = "tickets_2016"
input_file = "nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2016.csv"
schema3 = init_ddb_from_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

# # Currently a bug in DuckDB
# # See issue https://github.com/duckdb/duckdb/issues/8649
# # tablename = "tickets_2017"
# # input_file = "nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2017.csv"
# # schema4 = init_ddb_from_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

# # Split format
# tablename = "tickets_2014"
# input_file = "nyc_parking_tickets_split/Parking_Violations_Issued_-_Fiscal_Year_2014__August_2013___June_2014_"
# schema1 = init_ddb_from_split_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

# tablename = "tickets_2015"
# input_file = "nyc_parking_tickets_split/Parking_Violations_Issued_-_Fiscal_Year_2015"
# schema2 = init_ddb_from_split_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

# tablename = "tickets_2016"
# input_file = "nyc_parking_tickets_split/Parking_Violations_Issued_-_Fiscal_Year_2016"
# schema3 = init_ddb_from_split_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

# # Currently a bug in DuckDB
# # See issue https://github.com/duckdb/duckdb/issues/8649
# # tablename = "tickets_2017"
# # input_file = "nyc_parking_tickets_split/Parking_Violations_Issued_-_Fiscal_Year_2017"
# # schema4 = init_ddb_from_split_csv(dbname, tablename, input_file, SAMPLE_SIZE=-1)

CREATE TABLE tickets_2014 AS SELECT * FROM read_csv('nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2014__August_2013___June_2014_.csv',SAMPLE_SIZE = -1, AUTO_DETECT=TRUE)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CREATE TABLE tickets_2015 AS SELECT * FROM read_csv('nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2015.csv',SAMPLE_SIZE = -1, AUTO_DETECT=TRUE)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CREATE TABLE tickets_2016 AS SELECT * FROM read_csv('nyc_parking_tickets/Parking_Violations_Issued_-_Fiscal_Year_2016.csv',SAMPLE_SIZE = -1, AUTO_DETECT=TRUE)


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [3]:
import numpy as np
import pandas as pd
import ibis
import os

# Since we want all expressions to run, even if the output is not used
# By default, expressions are lazily evaluated
# The function call to_pandas() explicitly evaluates an expression
# We want to avoid invoking to_pandas() all the time
ibis.options.interactive = True

con = ibis.duckdb.connect(dbname)
for schema in [schema1, schema2, schema3]:
    con.register_schema(schema)
con.tables

Tables
------
- tickets_2014
- tickets_2015
- tickets_2016

In [4]:
tickets_2014 = con.table('tickets_2014')
tickets_2015 = con.table('tickets_2015')
tickets_2016 = con.table('tickets_2016')

In [7]:
tickets_2014 = tickets_2014.mutate(month=tickets_2014['Issue Date'].month())
tickets_2015 = tickets_2015.mutate(month=tickets_2015['Issue Date'].month())
tickets_2016 = tickets_2016.mutate(month=tickets_2016['Issue Date'].month())

tickets_2014 = tickets_2014.mutate(year=tickets_2014['Issue Date'].year())
tickets_2015 = tickets_2015.mutate(year=tickets_2015['Issue Date'].year())
tickets_2016 = tickets_2016.mutate(year=tickets_2016['Issue Date'].year())

In [11]:
tickets_2014_2012_to_2018 = tickets_2014[tickets_2014.year > 2012]
tickets_2014_2012_to_2018 = tickets_2014_2012_to_2018[tickets_2014_2012_to_2018.year < 2018]
groups = tickets_2014_2012_to_2018.group_by(['year', 'month']).aggregate(tickets_2014_2012_to_2018.count())
print(groups)

tickets_2015_2012_to_2018 = tickets_2015[tickets_2015.year > 2012]
tickets_2015_2012_to_2018 = tickets_2015_2012_to_2018[tickets_2015_2012_to_2018.year < 2018]
groups = tickets_2015_2012_to_2018.group_by(['year', 'month']).aggregate(tickets_2015_2012_to_2018.count())
print(groups)

tickets_2016_2012_to_2018 = tickets_2016[tickets_2016.year > 2012]
tickets_2016_2012_to_2018 = tickets_2016_2012_to_2018[tickets_2016_2012_to_2018.year < 2018]
groups = tickets_2016_2012_to_2018.group_by(['year', 'month']).aggregate(tickets_2016_2012_to_2018.count())
print(groups)

┏━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mmonth[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint32[0m │ [2mint64[0m       │
├───────┼───────┼─────────────┤
│  [1;36m2013[0m │     [1;36m1[0m │        [1;36m1590[0m │
│  [1;36m2013[0m │     [1;36m2[0m │         [1;36m375[0m │
│  [1;36m2013[0m │     [1;36m3[0m │         [1;36m461[0m │
│  [1;36m2013[0m │     [1;36m4[0m │         [1;36m574[0m │
│  [1;36m2013[0m │     [1;36m5[0m │         [1;36m276[0m │
│  [1;36m2013[0m │     [1;36m6[0m │        [1;36m3272[0m │
│  [1;36m2013[0m │     [1;36m7[0m │      [1;36m172711[0m │
│  [1;36m2013[0m │     [1;36m8[0m │      [1;36m838280[0m │
│  [1;36m2013[0m │     [1;36m9[0m │      [1;36m826688[0m │
│  [1;36m2013[0m │    [1;36m10[0m │      [1;36m952833[0m │
│     [2m…[0m │     [2m…[0m │           [2m…[0m │
└───────┴───────┴─────────────

┏━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mmonth[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint32[0m │ [2mint64[0m       │
├───────┼───────┼─────────────┤
│  [1;36m2013[0m │     [1;36m1[0m │          [1;36m29[0m │
│  [1;36m2013[0m │     [1;36m2[0m │          [1;36m15[0m │
│  [1;36m2013[0m │     [1;36m3[0m │          [1;36m24[0m │
│  [1;36m2013[0m │     [1;36m4[0m │          [1;36m11[0m │
│  [1;36m2013[0m │     [1;36m5[0m │          [1;36m10[0m │
│  [1;36m2013[0m │     [1;36m6[0m │          [1;36m87[0m │
│  [1;36m2013[0m │     [1;36m7[0m │          [1;36m89[0m │
│  [1;36m2013[0m │     [1;36m8[0m │          [1;36m90[0m │
│  [1;36m2013[0m │     [1;36m9[0m │         [1;36m104[0m │
│  [1;36m2013[0m │    [1;36m10[0m │          [1;36m52[0m │
│     [2m…[0m │     [2m…[0m │           [2m…[0m │
└───────┴───────┴─────────────

┏━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mmonth[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint32[0m │ [2mint64[0m       │
├───────┼───────┼─────────────┤
│  [1;36m2013[0m │     [1;36m1[0m │           [1;36m5[0m │
│  [1;36m2013[0m │     [1;36m2[0m │           [1;36m4[0m │
│  [1;36m2013[0m │     [1;36m3[0m │          [1;36m11[0m │
│  [1;36m2013[0m │     [1;36m4[0m │          [1;36m11[0m │
│  [1;36m2013[0m │     [1;36m5[0m │           [1;36m4[0m │
│  [1;36m2013[0m │     [1;36m6[0m │           [1;36m6[0m │
│  [1;36m2013[0m │     [1;36m7[0m │           [1;36m6[0m │
│  [1;36m2013[0m │     [1;36m8[0m │          [1;36m13[0m │
│  [1;36m2013[0m │     [1;36m9[0m │          [1;36m14[0m │
│  [1;36m2013[0m │    [1;36m10[0m │          [1;36m18[0m │
│     [2m…[0m │     [2m…[0m │           [2m…[0m │
└───────┴───────┴─────────────

In [15]:
group_by_make_2014 = tickets_2014.group_by('Vehicle Make').aggregate(tickets_2014.count())
group_by_make_2015 = tickets_2015.group_by('Vehicle Make').aggregate(tickets_2015.count())
group_by_make_2016 = tickets_2016.group_by('Vehicle Make').aggregate(tickets_2016.count())
print(group_by_make_2014)
print(group_by_make_2015)
print(group_by_make_2016)

┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m       │ [2mint64[0m       │
├──────────────┼─────────────┤
│ [32mTOYOT       [0m │      [1;36m878786[0m │
│ [32mNISSA       [0m │      [1;36m655276[0m │
│ [32mHONDA       [0m │      [1;36m810460[0m │
│ [32mGMC         [0m │      [1;36m313671[0m │
│ [32mMITSU       [0m │      [1;36m129208[0m │
│ [32mMERCU       [0m │       [1;36m97696[0m │
│ [32mCADIL       [0m │       [1;36m82261[0m │
│ [32mCHEVR       [0m │      [1;36m743556[0m │
│ [32mSATUR       [0m │       [1;36m26602[0m │
│ [32mME/BE       [0m │      [1;36m299845[0m │
│ [2m…[0m            │           [2m…[0m │
└──────────────┴─────────────┘



┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m       │ [2mint64[0m       │
├──────────────┼─────────────┤
│ [32mFRUEH       [0m │      [1;36m432073[0m │
│ [32mFORD        [0m │     [1;36m1521874[0m │
│ [32mNISSA       [0m │      [1;36m908783[0m │
│ [32mHONDA       [0m │     [1;36m1102614[0m │
│ [32mDODGE       [0m │      [1;36m403307[0m │
│ [32mACURA       [0m │      [1;36m214310[0m │
│ [32mCHRYS       [0m │      [1;36m209736[0m │
│ [32mBMW         [0m │      [1;36m377156[0m │
│ [32mKIA         [0m │      [1;36m101821[0m │
│ [32mCHEVR       [0m │      [1;36m897845[0m │
│ [2m…[0m            │           [2m…[0m │
└──────────────┴─────────────┘



┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m       │ [2mint64[0m       │
├──────────────┼─────────────┤
│ [32mNISSA       [0m │      [1;36m834833[0m │
│ [32mHONDA       [0m │     [1;36m1014074[0m │
│ [32mFORD        [0m │     [1;36m1324774[0m │
│ [32mDODGE       [0m │      [1;36m359203[0m │
│ [32mTOYOT       [0m │     [1;36m1154790[0m │
│ [32mJEEP        [0m │      [1;36m302513[0m │
│ [32mVOLVO       [0m │       [1;36m76708[0m │
│ [32mCHEVR       [0m │      [1;36m759663[0m │
│ [32mME/BE       [0m │      [1;36m362575[0m │
│ [32mBMW         [0m │      [1;36m353303[0m │
│ [2m…[0m            │           [2m…[0m │
└──────────────┴─────────────┘



In [14]:
group_by_year_make_2014 = tickets_2014.group_by(['year', 'Vehicle Make']).aggregate(tickets_2014.count())
group_by_year_make_2015 = tickets_2015.group_by(['year', 'Vehicle Make']).aggregate(tickets_2015.count())
group_by_year_make_2016 = tickets_2016.group_by(['year', 'Vehicle Make']).aggregate(tickets_2016.count())
print(group_by_year_make_2014)
print(group_by_year_make_2015)
print(group_by_year_make_2016)

┏━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m       │ [2mint64[0m       │
├───────┼──────────────┼─────────────┤
│  [1;36m2013[0m │ [32mTOYOT       [0m │      [1;36m427072[0m │
│  [1;36m2013[0m │ [32mNISSA       [0m │      [1;36m313727[0m │
│  [1;36m2013[0m │ [32mHONDA       [0m │      [1;36m391529[0m │
│  [1;36m2013[0m │ [32mGMC         [0m │      [1;36m150681[0m │
│  [1;36m2013[0m │ [32mMITSU       [0m │       [1;36m64716[0m │
│  [1;36m2013[0m │ [32mMERCU       [0m │       [1;36m49989[0m │
│  [1;36m2013[0m │ [32mCADIL       [0m │       [1;36m39905[0m │
│  [1;36m2013[0m │ [32mCHEVR       [0m │      [1;36m356989[0m │
│  [1;36m2013[0m │ [32mSATUR       [0m │       [1;36m13509[0m │
│  [1;36m2013[0m │ [32mME/BE       [0m │      [1;36m145388[0m │
│

┏━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m       │ [2mint64[0m       │
├───────┼──────────────┼─────────────┤
│  [1;36m2015[0m │ [32mHONDA       [0m │      [1;36m557315[0m │
│  [1;36m2014[0m │ [32mBMW         [0m │      [1;36m183539[0m │
│  [1;36m2014[0m │ [32mCHEVR       [0m │      [1;36m447652[0m │
│  [1;36m2015[0m │ [32mMITSU       [0m │       [1;36m70124[0m │
│  [1;36m2014[0m │ [32mHONDA       [0m │      [1;36m545161[0m │
│  [1;36m2015[0m │ [32mCHEVR       [0m │      [1;36m450117[0m │
│  [1;36m2014[0m │ [32mGMC         [0m │      [1;36m178360[0m │
│  [1;36m2014[0m │ [32mME/BE       [0m │      [1;36m189034[0m │
│  [1;36m2014[0m │ [32mFRUEH       [0m │      [1;36m200994[0m │
│  [1;36m2015[0m │ [32mHYUND       [0m │      [1;36m138198[0m │
│

┏━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Make[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m       │ [2mint64[0m       │
├───────┼──────────────┼─────────────┤
│  [1;36m2015[0m │ [32mTOYOT       [0m │      [1;36m625355[0m │
│  [1;36m2015[0m │ [32mFORD        [0m │      [1;36m712159[0m │
│  [1;36m2015[0m │ [32mLINCO       [0m │       [1;36m58714[0m │
│  [1;36m2015[0m │ [32mCADIL       [0m │       [1;36m47262[0m │
│  [1;36m2015[0m │ [32mGMC         [0m │      [1;36m154406[0m │
│  [1;36m2015[0m │ [32mCHEVR       [0m │      [1;36m420003[0m │
│  [1;36m2015[0m │ [32mHONDA       [0m │      [1;36m554267[0m │
│  [1;36m2015[0m │ [32mNISSA       [0m │      [1;36m452501[0m │
│  [1;36m2015[0m │ [32mFRUEH       [0m │      [1;36m215683[0m │
│  [1;36m2015[0m │ [32mMAZDA       [0m │       [1;36m57252[0m │
│

In [17]:
group_by_violation_code_2014 = tickets_2014.group_by('Violation Code').aggregate(tickets_2014.count())
group_by_violation_code_2015 = tickets_2015.group_by('Violation Code').aggregate(tickets_2015.count())
group_by_violation_code_2016 = tickets_2016.group_by('Violation Code').aggregate(tickets_2016.count())
print(group_by_violation_code_2014)
print(group_by_violation_code_2015)
print(group_by_violation_code_2016)

┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint64[0m          │ [2mint64[0m       │
├────────────────┼─────────────┤
│              [1;36m0[0m │          [1;36m21[0m │
│              [1;36m1[0m │          [1;36m18[0m │
│              [1;36m2[0m │          [1;36m14[0m │
│              [1;36m3[0m │          [1;36m15[0m │
│              [1;36m4[0m │         [1;36m748[0m │
│              [1;36m5[0m │      [1;36m131519[0m │
│              [1;36m6[0m │         [1;36m314[0m │
│              [1;36m7[0m │      [1;36m433213[0m │
│              [1;36m8[0m │        [1;36m2800[0m │
│              [1;36m9[0m │        [1;36m9122[0m │
│              [2m…[0m │           [2m…[0m │
└────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint64[0m          │ [2mint64[0m       │
├────────────────┼─────────────┤
│              [1;36m0[0m │          [1;36m17[0m │
│              [1;36m1[0m │        [1;36m5325[0m │
│              [1;36m2[0m │         [1;36m135[0m │
│              [1;36m3[0m │        [1;36m4481[0m │
│              [1;36m4[0m │         [1;36m956[0m │
│              [1;36m5[0m │      [1;36m224560[0m │
│              [1;36m6[0m │         [1;36m245[0m │
│              [1;36m7[0m │      [1;36m719753[0m │
│              [1;36m8[0m │        [1;36m3827[0m │
│              [1;36m9[0m │       [1;36m10972[0m │
│              [2m…[0m │           [2m…[0m │
└────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint64[0m          │ [2mint64[0m       │
├────────────────┼─────────────┤
│              [1;36m0[0m │           [1;36m8[0m │
│              [1;36m1[0m │        [1;36m1186[0m │
│              [1;36m2[0m │         [1;36m106[0m │
│              [1;36m3[0m │         [1;36m676[0m │
│              [1;36m4[0m │         [1;36m824[0m │
│              [1;36m5[0m │      [1;36m112405[0m │
│              [1;36m6[0m │         [1;36m219[0m │
│              [1;36m7[0m │      [1;36m492478[0m │
│              [1;36m8[0m │        [1;36m2965[0m │
│              [1;36m9[0m │       [1;36m13126[0m │
│              [2m…[0m │           [2m…[0m │
└────────────────┴─────────────┘



In [18]:
group_by_year_violation_code_2014 = tickets_2014.group_by(['year','Violation Code']).aggregate(tickets_2014.count())
group_by_year_violation_code_2015 = tickets_2015.group_by(['year','Violation Code']).aggregate(tickets_2015.count())
group_by_year_violation_code_2016 = tickets_2016.group_by(['year','Violation Code']).aggregate(tickets_2016.count())
print(group_by_year_violation_code_2014)
print(group_by_year_violation_code_2015)
print(group_by_year_violation_code_2016)

┏━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint64[0m          │ [2mint64[0m       │
├───────┼────────────────┼─────────────┤
│  [1;36m2013[0m │              [1;36m5[0m │       [1;36m83408[0m │
│  [1;36m2013[0m │              [1;36m7[0m │      [1;36m263455[0m │
│  [1;36m2013[0m │             [1;36m21[0m │      [1;36m640686[0m │
│  [1;36m2013[0m │             [1;36m71[0m │      [1;36m227488[0m │
│  [1;36m2013[0m │             [1;36m19[0m │      [1;36m135003[0m │
│  [1;36m2013[0m │             [1;36m20[0m │      [1;36m257097[0m │
│  [1;36m2013[0m │             [1;36m14[0m │      [1;36m409225[0m │
│  [1;36m2013[0m │             [1;36m70[0m │      [1;36m100286[0m │
│  [1;36m2013[0m │             [1;36m37[0m │      [1;36m354456[0m │
│  [1;36m2013[0m │     

┏━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint64[0m          │ [2mint64[0m       │
├───────┼────────────────┼─────────────┤
│  [1;36m2015[0m │             [1;36m78[0m │       [1;36m35865[0m │
│  [1;36m2014[0m │             [1;36m21[0m │      [1;36m820819[0m │
│  [1;36m2014[0m │             [1;36m38[0m │      [1;36m672060[0m │
│  [1;36m2015[0m │             [1;36m38[0m │      [1;36m746562[0m │
│  [1;36m2014[0m │             [1;36m71[0m │      [1;36m275786[0m │
│  [1;36m2014[0m │             [1;36m46[0m │      [1;36m288225[0m │
│  [1;36m2015[0m │             [1;36m70[0m │      [1;36m133451[0m │
│  [1;36m2015[0m │             [1;36m14[0m │      [1;36m517733[0m │
│  [1;36m2014[0m │             [1;36m40[0m │      [1;36m258027[0m │
│  [1;36m2014[0m │     

┏━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation Code[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mint64[0m          │ [2mint64[0m       │
├───────┼────────────────┼─────────────┤
│  [1;36m2015[0m │             [1;36m46[0m │      [1;36m308878[0m │
│  [1;36m2015[0m │             [1;36m38[0m │      [1;36m596597[0m │
│  [1;36m2015[0m │             [1;36m37[0m │      [1;36m356107[0m │
│  [1;36m2015[0m │             [1;36m40[0m │      [1;36m249269[0m │
│  [1;36m2015[0m │             [1;36m19[0m │      [1;36m158139[0m │
│  [1;36m2015[0m │             [1;36m20[0m │      [1;36m321352[0m │
│  [1;36m2015[0m │             [1;36m14[0m │      [1;36m469491[0m │
│  [1;36m2015[0m │             [1;36m31[0m │       [1;36m70575[0m │
│  [1;36m2015[0m │             [1;36m71[0m │      [1;36m271906[0m │
│  [1;36m2015[0m │     

In [19]:
group_by_vehicle_body_type_2014 = tickets_2014.group_by('Vehicle Body Type').aggregate(tickets_2014.count())
group_by_vehicle_body_type_2015 = tickets_2015.group_by('Vehicle Body Type').aggregate(tickets_2015.count())
group_by_vehicle_body_type_2016 = tickets_2016.group_by('Vehicle Body Type').aggregate(tickets_2016.count())
print(group_by_vehicle_body_type_2014)
print(group_by_vehicle_body_type_2015)
print(group_by_vehicle_body_type_2016)

┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m            │ [2mint64[0m       │
├───────────────────┼─────────────┤
│ [32mSDN              [0m │      [1;36m441801[0m │
│ [32mSUBN             [0m │     [1;36m2797693[0m │
│ [32mVAN              [0m │     [1;36m1384393[0m │
│ [32mP-U              [0m │       [1;36m24036[0m │
│ [32mDELV             [0m │      [1;36m730526[0m │
│ [2mNULL[0m              │      [1;36m110868[0m │
│ [32mTRLR             [0m │       [1;36m55643[0m │
│ [32mTWOD             [0m │        [1;36m1157[0m │
│ [32mWORK             [0m │          [1;36m48[0m │
│ [32mUTIL             [0m │       [1;36m78454[0m │
│ [2m…[0m                 │           [2m…[0m │
└───────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m            │ [2mint64[0m       │
├───────────────────┼─────────────┤
│ [32m2DSD             [0m │      [1;36m319046[0m │
│ [32mSUBN             [0m │     [1;36m3729346[0m │
│ [32mVAN              [0m │     [1;36m1709091[0m │
│ [32m4DSD             [0m │     [1;36m3340014[0m │
│ [32mDELV             [0m │      [1;36m892781[0m │
│ [32mSDN              [0m │      [1;36m524596[0m │
│ [32mPICK             [0m │      [1;36m297562[0m │
│ [32mT/CR             [0m │       [1;36m14581[0m │
│ [32mREFG             [0m │       [1;36m89643[0m │
│ [32mTK               [0m │       [1;36m31199[0m │
│ [2m…[0m                 │           [2m…[0m │
└───────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m            │ [2mint64[0m       │
├───────────────────┼─────────────┤
│ [32mSDN              [0m │      [1;36m424043[0m │
│ [32mSUBN             [0m │     [1;36m3466037[0m │
│ [32mP-U              [0m │       [1;36m23649[0m │
│ [32mTWOD             [0m │         [1;36m827[0m │
│ [32mDELV             [0m │      [1;36m755282[0m │
│ [32mVAN              [0m │     [1;36m1518303[0m │
│ [32mFOUR             [0m │        [1;36m3814[0m │
│ [2mNULL[0m              │       [1;36m39271[0m │
│ [32mTRLR             [0m │       [1;36m33008[0m │
│ [32mTAXI             [0m │       [1;36m63613[0m │
│ [2m…[0m                 │           [2m…[0m │
└───────────────────┴─────────────┘



In [20]:
group_by_year_vehicle_body_type_2014 = tickets_2014.group_by(['year', 'Vehicle Body Type']).aggregate(tickets_2014.count())
group_by_year_vehicle_body_type_2015 = tickets_2015.group_by(['year', 'Vehicle Body Type']).aggregate(tickets_2015.count())
group_by_year_vehicle_body_type_2016 = tickets_2016.group_by(['year', 'Vehicle Body Type']).aggregate(tickets_2016.count())
print(group_by_year_vehicle_body_type_2014)
print(group_by_year_vehicle_body_type_2015)
print(group_by_year_vehicle_body_type_2016)

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m            │ [2mint64[0m       │
├───────┼───────────────────┼─────────────┤
│  [1;36m2013[0m │ [32mVAN              [0m │      [1;36m639710[0m │
│  [1;36m2013[0m │ [32mSUBN             [0m │     [1;36m1337137[0m │
│  [1;36m2013[0m │ [32mPICK             [0m │      [1;36m105797[0m │
│  [1;36m2013[0m │ [32m4DSD             [0m │     [1;36m1212518[0m │
│  [1;36m2013[0m │ [32mDELV             [0m │      [1;36m337916[0m │
│  [1;36m2013[0m │ [32m2DSD             [0m │      [1;36m125306[0m │
│  [1;36m2013[0m │ [32mMCY              [0m │       [1;36m16442[0m │
│  [1;36m2013[0m │ [32mTAXI             [0m │       [1;36m25496[0m │
│  [1;36m2013[0m │ [32mREFG             [0m │       [1;36m29118[0m │
│

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m            │ [2mint64[0m       │
├───────┼───────────────────┼─────────────┤
│  [1;36m2014[0m │ [32mSUBN             [0m │     [1;36m1813551[0m │
│  [1;36m2015[0m │ [32mVAN              [0m │      [1;36m879764[0m │
│  [1;36m2015[0m │ [32m4DSD             [0m │     [1;36m1694252[0m │
│  [1;36m2014[0m │ [32m4DSD             [0m │     [1;36m1645761[0m │
│  [1;36m2014[0m │ [32mVAN              [0m │      [1;36m829117[0m │
│  [1;36m2015[0m │ [32mSUBN             [0m │     [1;36m1915458[0m │
│  [1;36m2014[0m │ [32mDELV             [0m │      [1;36m431316[0m │
│  [1;36m2014[0m │ [32mTRAC             [0m │       [1;36m37181[0m │
│  [1;36m2015[0m │ [32mSDN              [0m │      [1;36m237304[0m │
│

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mVehicle Body Type[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m            │ [2mint64[0m       │
├───────┼───────────────────┼─────────────┤
│  [1;36m2015[0m │ [32mVAN              [0m │      [1;36m795627[0m │
│  [1;36m2015[0m │ [32mSUBN             [0m │     [1;36m1868831[0m │
│  [1;36m2015[0m │ [32m4DSD             [0m │     [1;36m1638106[0m │
│  [1;36m2015[0m │ [32mDELV             [0m │      [1;36m400540[0m │
│  [1;36m2015[0m │ [32mREFG             [0m │       [1;36m42264[0m │
│  [1;36m2015[0m │ [32mPICK             [0m │      [1;36m141013[0m │
│  [1;36m2015[0m │ [32mUTIL             [0m │       [1;36m39663[0m │
│  [1;36m2015[0m │ [32mTRAC             [0m │       [1;36m38846[0m │
│  [1;36m2015[0m │ [32mBUS              [0m │       [1;36m26014[0m │
│

In [21]:
group_by_violation_county_2014 = tickets_2014.group_by('Violation County').aggregate(tickets_2014.count())
group_by_violation_county_2015 = tickets_2015.group_by('Violation County').aggregate(tickets_2015.count())
group_by_violation_county_2016 = tickets_2016.group_by('Violation County').aggregate(tickets_2016.count())
print(group_by_violation_county_2014)
print(group_by_violation_county_2015)
print(group_by_violation_county_2016)

┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m           │ [2mint64[0m       │
├──────────────────┼─────────────┤
│ [32mQ               [0m │     [1;36m1825974[0m │
│ [2mNULL[0m             │      [1;36m705205[0m │
│ [32mNY              [0m │     [1;36m3547196[0m │
│ [32mBX              [0m │      [1;36m943549[0m │
│ [32mK               [0m │     [1;36m1979048[0m │
│ [32mR               [0m │       [1;36m99290[0m │
│ [32mKINGS           [0m │           [1;36m1[0m │
│ [32mNYC             [0m │           [1;36m2[0m │
│ [32m103             [0m │           [1;36m1[0m │
│ [32mQUEEN           [0m │           [1;36m3[0m │
│ [2m…[0m                │           [2m…[0m │
└──────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m           │ [2mint64[0m       │
├──────────────────┼─────────────┤
│ [32mR               [0m │      [1;36m114317[0m │
│ [32mK               [0m │     [1;36m2395622[0m │
│ [32mNY              [0m │     [1;36m4110588[0m │
│ [32mBX              [0m │     [1;36m1183682[0m │
│ [32mQ               [0m │     [1;36m2165265[0m │
│ [2mNULL[0m             │     [1;36m1839734[0m │
│ [32mKINGS           [0m │          [1;36m10[0m │
│ [32mQUEEN           [0m │           [1;36m3[0m │
│ [32mNEW Y           [0m │           [1;36m2[0m │
│ [32mNEWY            [0m │           [1;36m2[0m │
│ [2m…[0m                │           [2m…[0m │
└──────────────────┴─────────────┘



┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mstring[0m           │ [2mint64[0m       │
├──────────────────┼─────────────┤
│ [2mNULL[0m             │     [1;36m1715038[0m │
│ [32mBX              [0m │     [1;36m1080118[0m │
│ [32mNY              [0m │     [1;36m3539259[0m │
│ [32mQ               [0m │     [1;36m1836567[0m │
│ [32mK               [0m │     [1;36m2198318[0m │
│ [32mR               [0m │       [1;36m93811[0m │
│ [32mQNS             [0m │           [1;36m4[0m │
│ [32mQUEEN           [0m │           [1;36m3[0m │
│ [32mBRONX           [0m │           [1;36m1[0m │
│ [32mKINGS           [0m │           [1;36m8[0m │
│ [2m…[0m                │           [2m…[0m │
└──────────────────┴─────────────┘



In [22]:
group_by_year_violation_county_2014 = tickets_2014.group_by(['year', 'Violation County']).aggregate(tickets_2014.count())
group_by_year_violation_county_2015 = tickets_2015.group_by(['year', 'Violation County']).aggregate(tickets_2015.count())
group_by_year_violation_county_2016 = tickets_2016.group_by(['year', 'Violation County']).aggregate(tickets_2016.count())
print(group_by_year_violation_county_2014)
print(group_by_year_violation_county_2015)
print(group_by_year_violation_county_2016)

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m           │ [2mint64[0m       │
├───────┼──────────────────┼─────────────┤
│  [1;36m2013[0m │ [32mQ               [0m │      [1;36m870388[0m │
│  [1;36m2012[0m │ [32mQ               [0m │         [1;36m111[0m │
│  [1;36m2013[0m │ [2mNULL[0m             │      [1;36m373369[0m │
│  [1;36m2013[0m │ [32mNY              [0m │     [1;36m1697924[0m │
│  [1;36m2018[0m │ [32mQ               [0m │          [1;36m20[0m │
│  [1;36m2013[0m │ [32mBX              [0m │      [1;36m437995[0m │
│  [1;36m2013[0m │ [32mK               [0m │      [1;36m960043[0m │
│  [1;36m2014[0m │ [32mNY              [0m │     [1;36m1847829[0m │
│  [1;36m2012[0m │ [2mNULL[0m             │          [1;36m38[0m │
│  [1;36m2014[0

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m           │ [2mint64[0m       │
├───────┼──────────────────┼─────────────┤
│  [1;36m2014[0m │ [32mNY              [0m │     [1;36m1975192[0m │
│  [1;36m2015[0m │ [32mNY              [0m │     [1;36m2134956[0m │
│  [1;36m2014[0m │ [32mK               [0m │     [1;36m1158169[0m │
│  [1;36m2015[0m │ [32mQ               [0m │     [1;36m1140168[0m │
│  [1;36m2014[0m │ [32mQ               [0m │     [1;36m1024853[0m │
│  [1;36m2014[0m │ [32mBX              [0m │      [1;36m580035[0m │
│  [1;36m2015[0m │ [32mK               [0m │     [1;36m1237094[0m │
│  [1;36m2014[0m │ [32mR               [0m │       [1;36m62430[0m │
│  [1;36m2015[0m │ [32mBX              [0m │      [1;36m603453[0m │
│  [1;36m2014

┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃[1m [0m[1myear[0m[1m [0m[1m [0m┃[1m [0m[1mViolation County[0m[1m [0m┃[1m [0m[1mCountStar()[0m[1m [0m┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ [2mint32[0m │ [2mstring[0m           │ [2mint64[0m       │
├───────┼──────────────────┼─────────────┤
│  [1;36m2015[0m │ [32mNY              [0m │     [1;36m1853109[0m │
│  [1;36m2015[0m │ [2mNULL[0m             │     [1;36m1061203[0m │
│  [1;36m2015[0m │ [32mK               [0m │     [1;36m1190297[0m │
│  [1;36m2015[0m │ [32mQ               [0m │     [1;36m1009491[0m │
│  [1;36m2015[0m │ [32mBX              [0m │      [1;36m582926[0m │
│  [1;36m2015[0m │ [32mR               [0m │       [1;36m53971[0m │
│  [1;36m2016[0m │ [32mK               [0m │     [1;36m1006986[0m │
│  [1;36m2016[0m │ [32mQ               [0m │      [1;36m826559[0m │
│  [1;36m2018[0m │ [32mBX              [0m │          [1;36m74[0m │
│  [1;36m2016[

In [23]:
!rm $dbname*