# websites
- api explorer: https://data.torontopolice.on.ca/datasets/major-crime-indicators-1/api
- api documentation: https://developers.arcgis.com/rest/services-reference/enterprise/query-feature-service-layer-.htm 
- open data license https://data.torontopolice.on.ca/pages/licence 
- open data documentation (pdf)
    - The location of crime occurrences have been deliberately offset to the nearest road intersection node to protect the privacy of parties involved in the occurrence. All location data must be considered as an approximate location of the occurrence and users are advised not to interpret any of these locations as related to a specific address or individual.
- about https://data.torontopolice.on.ca/datasets/TorontoPS::major-crime-indicators-1/about 
- `improvement` parallel http requests: https://betterprogramming.pub/how-to-make-parallel-async-http-requests-in-python-d0bd74780b8a 

In [2]:
import numpy as np
import pandas as pd
import requests
import functools
import sys

In [3]:
url = f'https://services.arcgis.com/S9th0jAJ7bqgIRjw/arcgis/rest/services/Major_Crime_Indicators/FeatureServer/0/query?where=1%3D1&outFields=*&returnIdsOnly=true&outSR=4326&f=json'
r = requests.get(url)
json_data = r.json()
objectIds = sorted(json_data['objectIds'])
print(objectIds[:10])
print(f'number of records: {len(objectIds)}')

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
number of records: 262707


In [31]:
chunk_size = 200
objectIds_chunks = [objectIds[i:i+chunk_size] for i in range(0,len(objectIds),chunk_size)]

In [32]:
def get_data(id_list):
    
    bucket = ','.join(map(str, id_list))
    url = f'https://services.arcgis.com/S9th0jAJ7bqgIRjw/arcgis/rest/services/' + \
        'Major_Crime_Indicators/' + \
        'FeatureServer/0/query?objectIds=' + bucket + '&outFields=*&outSR=4326&f=json&returnExceededLimitFeatures=true'
    r = requests.get(url)
    try:
        r.raise_for_status()
        json_data = r.json()
        return pd.json_normalize(json_data['features'])    
    except requests.HTTPError as exception:
        print(f'error with chunk {id_list[0]}')
        print(exception)
        return None


In [33]:
get_data(objectIds_chunks[0])

Unnamed: 0,attributes.Index_,attributes.event_unique_id,attributes.Division,attributes.occurrencedate,attributes.reporteddate,attributes.premisetype,attributes.ucr_code,attributes.ucr_ext,attributes.offence,attributes.reportedyear,...,attributes.occurrencedayofweek,attributes.occurrencehour,attributes.MCI,attributes.Hood_ID,attributes.Neighbourhood,attributes.Long,attributes.Lat,attributes.ObjectId,geometry.x,geometry.y
0,3,GO-20141259602,D41,1388466000000,1388552400000,Commercial,2120,220,B&E W'Intent,2014,...,Tuesday,23,Break and Enter,121,Oakridge,-79.283837,43.692217,3,-79.283837,43.692217
1,4,GO-20141260128,D12,1388552400000,1388552400000,House,1430,100,Assault,2014,...,Wednesday,1,Assault,111,Rockcliffe-Smythe,-79.480268,43.683360,4,-79.480268,43.683360
2,5,GO-20141259483,D52,1388466000000,1388552400000,Transit,1430,100,Assault,2014,...,Tuesday,21,Assault,76,Bay Street Corridor,-79.386589,43.650853,5,-79.386589,43.650853
3,6,GO-20141260291,D51,1388552400000,1388552400000,Commercial,2120,200,B&E,2014,...,Wednesday,1,Break and Enter,71,Cabbagetown-South St.James Town,-79.368592,43.665875,6,-79.368592,43.665875
4,7,GO-20141260137,D14,1388552400000,1388552400000,Transit,1430,100,Assault,2014,...,Wednesday,1,Assault,95,Annex,-79.411749,43.666620,7,-79.411749,43.666620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,899,GO-20141337697,D32,1389589200000,1389589200000,Apartment,2120,200,B&E,2014,...,Monday,9,Break and Enter,36,Newtonbrook West,-79.445737,43.790059,199,-79.445737,43.790059
196,900,GO-20141337751,D14,1389589200000,1389589200000,Commercial,1420,100,Assault With Weapon,2014,...,Monday,1,Assault,82,Niagara,-79.407671,43.646468,200,-79.407671,43.646468
197,1,GO-20141624853,D31,1393736400000,1393736400000,Apartment,1430,100,Assault,2014,...,Sunday,5,Assault,22,Humbermede,-79.538894,43.743025,1,-79.538894,43.743025
198,2,GO-20141625057,D51,1393736400000,1393736400000,Commercial,2120,200,B&E,2014,...,Sunday,4,Break and Enter,73,Moss Park,-79.365716,43.655340,2,-79.365716,43.655340


In [40]:
get_data(objectIds_chunks[-1])


Unnamed: 0,attributes.Index_,attributes.event_unique_id,attributes.Division,attributes.occurrencedate,attributes.reporteddate,attributes.premisetype,attributes.ucr_code,attributes.ucr_ext,attributes.offence,attributes.reportedyear,...,attributes.occurrencedayofweek,attributes.occurrencehour,attributes.MCI,attributes.Hood_ID,attributes.Neighbourhood,attributes.Long,attributes.Lat,attributes.ObjectId,geometry.x,geometry.y
0,262090,GO-2021958398,D55,1621742400000,1621742400000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Sunday,12,Auto Theft,62,East End-Danforth,-79.292123,43.685678,262601,-79.292123,43.685678
1,262094,GO-2021954918,D32,1621742400000,1621742400000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Sunday,12,Auto Theft,34,Bathurst Manor,-79.439002,43.757777,262605,-79.439002,43.757777
2,262095,GO-2021954759,D41,1621742400000,1621742400000,Commercial,2135,210,Theft Of Motor Vehicle,2021,...,Sunday,11,Auto Theft,124,Kennedy Park,-79.247105,43.734551,262606,-79.247105,43.734551
3,262096,GO-2021954473,D54,1621742400000,1621742400000,House,2135,210,Theft Of Motor Vehicle,2021,...,Sunday,3,Auto Theft,59,Danforth East York,-79.332779,43.687287,262607,-79.332779,43.687287
4,262097,GO-2021955493,D32,1621742400000,1621742400000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Sunday,12,Auto Theft,33,Clanton Park,-79.458600,43.730923,262608,-79.458600,43.730923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102,262195,GO-20211008952,D14,1622174400000,1622433600000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Friday,13,Auto Theft,77,Waterfront Communities-The Island,-79.393304,43.641343,262700,-79.393304,43.641343
103,262173,GO-20211026932,D51,1622088000000,1622606400000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Thursday,23,Auto Theft,72,Regent Park,-79.356830,43.664375,262678,-79.356830,43.664375
104,262154,GO-20211060562,D23,1622001600000,1623038400000,Commercial,2135,210,Theft Of Motor Vehicle,2021,...,Wednesday,9,Auto Theft,1,West Humber-Clairville,-79.581078,43.713846,262659,-79.581078,43.713846
105,262156,GO-20211074260,D13,1622088000000,1623211200000,Outside,2135,210,Theft Of Motor Vehicle,2021,...,Thursday,16,Auto Theft,101,Forest Hill South,-79.417375,43.685784,262661,-79.417375,43.685784


In [41]:
df = functools.reduce(lambda x,y : pd.concat([x,y]), map(get_data, objectIds_chunks))

In [None]:
# for i in range(len(objectIds_chunks)):
#     try:
#         get_data(objectIds_chunks[i])
#         print(f"chunk {i} done")
#     except:
#         print(f'error on chunk {i}')
#         break
    

In [42]:
df.shape

(262707, 29)

In [37]:
import datetime
datetime.datetime.fromtimestamp(1393736400000/1000)


datetime.datetime(2014, 3, 2, 13, 0)

In [59]:
url = f'https://services.arcgis.com/S9th0jAJ7bqgIRjw/arcgis/rest/services/Major_Crime_Indicators/FeatureServer/0/query?where=1%3D1&returnExceededLimitFeatures=true&outFields=*&outSR=4326&f=json'
r = requests.get(url)
json_data = r.json()
pd.json_normalize(json_data['features'])

Unnamed: 0,attributes.Index_,attributes.event_unique_id,attributes.Division,attributes.occurrencedate,attributes.reporteddate,attributes.premisetype,attributes.ucr_code,attributes.ucr_ext,attributes.offence,attributes.reportedyear,...,attributes.occurrencedayofweek,attributes.occurrencehour,attributes.MCI,attributes.Hood_ID,attributes.Neighbourhood,attributes.Long,attributes.Lat,attributes.ObjectId,geometry.x,geometry.y
0,3,GO-20141259602,D41,1388466000000,1388552400000,Commercial,2120,220,B&E W'Intent,2014,...,Tuesday,23,Break and Enter,121,Oakridge,-79.283837,43.692217,3,-79.283837,43.692217
1,4,GO-20141260128,D12,1388552400000,1388552400000,House,1430,100,Assault,2014,...,Wednesday,1,Assault,111,Rockcliffe-Smythe,-79.480268,43.683360,4,-79.480268,43.683360
2,5,GO-20141259483,D52,1388466000000,1388552400000,Transit,1430,100,Assault,2014,...,Tuesday,21,Assault,76,Bay Street Corridor,-79.386589,43.650853,5,-79.386589,43.650853
3,6,GO-20141260291,D51,1388552400000,1388552400000,Commercial,2120,200,B&E,2014,...,Wednesday,1,Break and Enter,71,Cabbagetown-South St.James Town,-79.368592,43.665875,6,-79.368592,43.665875
4,7,GO-20141260137,D14,1388552400000,1388552400000,Transit,1430,100,Assault,2014,...,Wednesday,1,Assault,95,Annex,-79.411749,43.666620,7,-79.411749,43.666620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,906,GO-20141337257,D22,1389589200000,1389589200000,Apartment,2120,200,B&E,2014,...,Monday,10,Break and Enter,11,Eringate-Centennial-West Deane,-79.588474,43.644699,606,-79.588474,43.644699
996,907,GO-20141337866,D22,1389589200000,1389589200000,Apartment,2120,220,B&E W'Intent,2014,...,Monday,19,Break and Enter,11,Eringate-Centennial-West Deane,-79.588474,43.644699,607,-79.588474,43.644699
997,908,GO-20141337980,D14,1387947600000,1389589200000,Other,1430,100,Assault,2014,...,Wednesday,11,Assault,95,Annex,-79.417724,43.668582,608,-79.417724,43.668582
998,909,GO-20141337999,D42,1389589200000,1389589200000,Apartment,2120,200,B&E,2014,...,Monday,6,Break and Enter,117,LAmoreaux,-79.307024,43.800647,609,-79.307024,43.800647


In [58]:
import asyncio
import aiohttp
import time

async def get(url, session):
    try:
        async with session.get(url=url) as response:
            resp = await response.read()
            print("Successfully got url")
    except Exception as e:
        print("Unable to get url due to {}.".format(e.__class__))


async def main(urls):
    async with aiohttp.ClientSession() as session:
        ret = await asyncio.gather(*[get(url, session) for url in urls])
    print("Finalized all. Return is a list of len {} outputs.".format(len(ret)))


start = time.time()
asyncio.run(await main([url + ','.join(map(str,chunk)) for chunk in objectIds_chunks]))
end = time.time()

print(f"Took {end - start} seconds to pull {len(objectIds_chunks)} requests.")

Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully got url
Successfully 

RuntimeError: asyncio.run() cannot be called from a running event loop