## Setup

In [1]:
import pandas as pd
from sodapy import Socrata
import numpy as np
from tqdm import tqdm

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.seattle.gov", None)

# Read corresponding sdot blocks between 08-18 from ground truth data
block_data = pd.read_csv('data/sdot_12.csv')



In [2]:
def run_query(client, data_id, element_key, date):
    query = "select occupancydatetime, paidoccupancy, sourceelementkey, parkingspacecount where sourceelementkey = " + str(element_key) + " and occupancydatetime between '" + str(date) + "T08:00:00' and '" + str(date) + "T19:59:00'"
    results = client.get(data_id, query=query)
    return results

## Query data from Seattle Open Data API

In [None]:
query_results = [run_query(client, "bwk6-iycu", element_key, date) for element_key, date in zip(tqdm(block_data['sourceelementkey']), block_data['Date'])]

## Filter blocks datetime pair that does not have full 08-18 period

In [None]:
occ = pd.DataFrame()
remove = pd.DataFrame()
for group in query_results:
    if len(group) == 720:
        df = pd.DataFrame.from_records(group)
        occ = pd.concat([occ, df])

In [None]:
occ["occupancydatetime"] = pd.to_datetime(occ["occupancydatetime"], format="%Y-%m-%dT%X.000")

In [None]:
occ.to_csv("data/sdot_12_raw.csv",index=False)

In [3]:
sdot_raw = pd.read_csv('data/sdot_12_raw.csv')

## Get each block parking capacity

In [4]:
sdot_raw["occupancydatetime"] = pd.to_datetime(sdot_raw["occupancydatetime"], format="%Y-%m-%d %X")

In [5]:
naive_occ = sdot_raw.groupby(["sourceelementkey", sdot_raw["occupancydatetime"].dt.date, sdot_raw["occupancydatetime"].dt.hour, "parkingspacecount"])["paidoccupancy"].agg(lambda x: pd.Series.mode(x)[0]).reset_index(allow_duplicates=True)

In [8]:
naive_occ["paidoccupancy"] = naive_occ["paidoccupancy"].astype(int)/naive_occ["parkingspacecount"].astype(int)
cap = naive_occ[["sourceelementkey", "parkingspacecount"]]
cap = cap.drop_duplicates()
cap = cap.reset_index(drop=True)
cap

Unnamed: 0,sourceelementkey,parkingspacecount
0,1013,4
1,1021,9
2,1022,9
3,1037,12
4,1277,9
...,...,...
485,123744,4
486,123942,5
487,123943,10
488,131238,2


In [9]:
# write to csv file
cap.to_csv("data/sdot_12_cap.csv",index=False)

In [10]:
naive_occ = naive_occ.drop("parkingspacecount", axis="columns")
naive_occ.columns = [naive_occ.columns[0], "date", "hour", naive_occ.columns[3]]

In [11]:
naive_occ

Unnamed: 0,sourceelementkey,date,hour,paidoccupancy
0,1013,2022-04-29,8,0.0
1,1013,2022-04-29,9,0.0
2,1013,2022-04-29,10,0.0
3,1013,2022-04-29,11,0.0
4,1013,2022-04-29,12,0.0
...,...,...,...,...
7147,136322,2022-04-27,15,0.0
7148,136322,2022-04-27,16,0.0
7149,136322,2022-04-27,17,0.0
7150,136322,2022-04-27,18,0.0


In [None]:
naive_occ.to_csv("data/sdot_12_naive.csv",index=False)

In [None]:
id = naive_occ.drop_duplicates(subset=["date", "sourceelementkey"], keep="first")

In [None]:
id