In [None]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

import pandas as pd
import geopandas as gpd
from siuba import *

import shared_utils
import datetime as dt

import gcsfs

from calitp_data.storage import get_fs
fs = get_fs()

GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/safety_projects/"

# Read in TIMS Export

In [None]:
tims = pd.read_parquet(f'{GCS_FILE_PATH}TIMS_Data.parquet')

In [None]:
tims.info(verbose=True, show_counts=True)

In [None]:
tims >> count(_.COLLISION_SEVERITY)

In [None]:
# keep only small subset of columns
tims_small = (tims 
              >> select(_.CASE_ID,_.ACCIDENT_YEAR,_.COLLISION_DATE,_.COLLISION_TIME,_.COLLISION_SEVERITY,
                                 _.PCF_VIOL_CATEGORY,_.TYPE_OF_COLLISION,_.MVIW,
                                 _.NUMBER_KILLED,_.NUMBER_INJURED,_.PEDESTRIAN_ACCIDENT,_.BICYCLE_ACCIDENT,
                                 _.LATITUDE,_.LONGITUDE,_.POINT_X,_.POINT_Y
                            )
              >> filter(_.COLLISION_SEVERITY<=2) # fatality or severe injury
               )

In [None]:
tims_small.info()

In [None]:
tims_small.head(10)

In [None]:
# make geodataframe w/ relevant columns 
tims_gdf = ((gpd.GeoDataFrame(
    tims_small, geometry=gpd.points_from_xy(tims_small.POINT_X, tims_small.POINT_Y))
           ) >> filter(-_.geometry.is_empty)
           )

In [None]:
# set a CRS: assume WGS 84? 
tims_gdf = tims_gdf.set_crs('4326')

In [None]:
# project to match project data
tims_gdf = tims_gdf.to_crs(shared_utils.geography_utils.CA_NAD83Albers)

In [None]:
tims_gdf.explore()