# Download the latest data
There are up to four calls that need to be made to download or update the pedestrian safety data.
- Mapillary's road data
- Mapillary's signage data
- LAPD's collision data
- The neighborhood council boundaries shapefile

In [None]:
import configparser
import pandas as pd
import requests
import datetime
import os
import sys

# Load the mapillary client module
try:
    os.chdir(os.path.dirname(os.path.realpath(__file__)))
except:
    pass
mapillarywrapperdir = '../../311-data/mapillarywrapper/'
sys.path.append(mapillarywrapperdir)
from mapillarywrapper.client import client


config = configparser.ConfigParser()
config.read(mapillarywrapperdir + 'mapillary.cfg')
CLIENT_ID = config['CLIENT-ID']['CLIENT_ID']
mapillaryclient = client.MapClient(CLIENT_ID)



Set the save locations of the raw data csv files.

In [None]:
rawdatadir = '../Data/1-raw-data/'
rawroaddatacsv = rawdatadir + 'raw-road-data.csv'
rawsigndatacsv = rawdatadir + 'raw-sign-data.csv'
rawcollisiondatacsv = rawdatadir + 'raw-collision-data.csv'
neighborhoodcouncilshapesgeojson = rawdatadir + 'neighborhoodcouncils.geojson'

## Bounding box

This is the latitude/longitude box that encompasses the City of LA according to https://boundingbox.klokantech.com

In [None]:
laLowerLeft = [33.703622, -118.668187]
laUpperRight = [34.337306, -118.155295]

## Merging method
This will combine dataframes of the same dataset downloaded at different times or with different filters.

In [None]:
def combinedataframes(
    leftdf: pd.DataFrame, rightdf: pd.DataFrame, keycolumn: str
):
  """Combine dataframes of the same dataset downloaded at different times or with different filters."""

  # Use the keycolumn as the index with a different name, to differentiate them.
  leftreindexed = leftdf.set_index(keycolumn, drop=False)
  leftreindexed.index.name = 'indexkeys'
  rightreindexed = rightdf.set_index(keycolumn, drop=False)
  rightreindexed.index.name = 'indexkeys'

  # Merge the keys, creating empty key-only rows for all new data
  allrows = leftreindexed.merge(
      rightreindexed[keycolumn], left_index=True, right_index=True, how='outer'
  ).drop(columns=[keycolumn + '_x', keycolumn + '_y'])

  # Update all rows with the newest data
  allrows.update(rightreindexed)
  
  # copy the keys from the index back into a regular column
  allrows[keycolumn] = allrows.index
  allrows.reset_index(drop=True, inplace=True)
  return allrows




### ISO Date methods

In [None]:
def fromapidateformat(isostring: str) -> datetime.datetime:
  return datetime.datetime.fromisoformat(isostring.strip('Z'))


def toapidateformat(dt: datetime.datetime):
  return dt.isoformat('T', 'milliseconds') + 'Z'

# Download data from mapillary
If mapillary data is saved locally, download the latest. If it isn't, download everything.

In [None]:
for [mapillarylayer, csvlocation] in [
  ['points', rawroaddatacsv], ['trafficsigns', rawsigndatacsv]
]:
  savefileexists = os.path.isfile(csvlocation)
  if savefileexists:
    previousdownload = pd.read_csv(csvlocation)
    mostrecentdata = previousdownload['last_seen_at'].map(
      fromapidateformat
    ).max()
    datacutoffdate = mostrecentdata - datetime.timedelta(days=7)

    newmapillarydata = pd.DataFrame(
      mapillaryclient.trafficinfo(
        laLowerLeft,
        laUpperRight,
        perpage=1000,
        layer=mapillarylayer,
        startlastseenat=toapidateformat(datacutoffdate.to_pydatetime())
      )
    )

    fulldataset = combinedataframes(previousdownload, newmapillarydata, 'key')

  else:
    fulldataset = pd.DataFrame(
      mapillaryclient.trafficinfo(
        laLowerLeft,
        laUpperRight,
        perpage=1000,
        layer=mapillarylayer,
      )
    )

  fulldataset.to_csv(csvlocation, index=False)
  print(f'Mapillary {mapillarylayer} dataset saved to {csvlocation}')

# Download LAPD Traffic collision data

Using the city's [traffic collision dataset](https://data.lacity.org/Public-Safety/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w). If the file doesn't exist, download everything. If it does exist, download everything added or updated since the last recorded change.

In [None]:
baseurl = "https://data.lacity.org/resource/d5tf-ez2w.csv"
recordsperpage = 50000
queryVisibleAndInvisibleFields = ':*,*'

def getCollisionDataPageURL(pagenum: int, additionalParams: dict = {}):
  req = requests.PreparedRequest()
  req.prepare_url(
    baseurl,
    params={
      '$limit': recordsperpage,
      '$offset': recordsperpage * pagenum,
      '$select': queryVisibleAndInvisibleFields,
      '$order': ':id',
      **additionalParams
    }
    # NOTE - data.lacity.org may start throttling after many requests. If that happens, we can signup for an app token.
  )
  return req.url


def downloadLACollisions(additionalParams: dict = {}):
  pagenum = 0
  firstpageURL = getCollisionDataPageURL(pagenum, additionalParams)
  collisiondatapages = [pd.read_csv(firstpageURL)]
  while (len(collisiondatapages[-1]) == recordsperpage):
    pagenum = pagenum + 1
    nextpageURL = getCollisionDataPageURL(pagenum)
    collisiondatapages.append(pd.read_csv(nextpageURL))

  alldownloadedcollisiondata = pd.concat(collisiondatapages, ignore_index=True)
  return alldownloadedcollisiondata


In [None]:
# Check for last saved datarow, if it exists.
if os.path.isfile(rawcollisiondatacsv):
    previousdownload = pd.read_csv(rawcollisiondatacsv)
    lastchange = pd.concat([previousdownload[':created_at'], previousdownload[':updated_at']], ignore_index=True).map(fromapidateformat).max().to_pydatetime()

    newrows = downloadLACollisions({"$where": f":created_at > '{toapidateformat(lastchange)}'"})
    print(f"New rows downloaded: {len(newrows)}")
    newrowsadded = combinedataframes(previousdownload, newrows, ":id")

    updatedrows = downloadLACollisions({"$where": f":updated_at > '{toapidateformat(lastchange)}'"})
    print(f"Updated rows downloaded: {len(updatedrows)}")
    fullcollisiondataset = combinedataframes(newrowsadded, updatedrows, ":id")
else:
    fullcollisiondataset = downloadLACollisions()
    print(f"Downloaded {len(fullcollisiondataset)} rows.")

fullcollisiondataset.to_csv(rawcollisiondatacsv, index=False)
print(f"Los Angeles collisions dataset saved to {rawcollisiondatacsv}")


# Download neighborhood council shapefile

Website: https://geohub.lacity.org/datasets/neighborhood-councils-certified/explore

API: https://opendata.arcgis.com/datasets/9c8639737e3a457a8c0f6a93f9c36974_18.geojson

In [None]:
ncshapefileresponse = requests.get('https://opendata.arcgis.com/datasets/9c8639737e3a457a8c0f6a93f9c36974_18.geojson')
with open(neighborhoodcouncilshapesgeojson, "wb") as geojsonfile:
    geojsonfile.write(ncshapefileresponse.content)
print(f"Saved shapefile to {neighborhoodcouncilshapesgeojson}")