In [18]:
import pandas as pd
import numpy as np
import json
import urllib

In [None]:
##################################################
##### Accidents #####
##################################################

In [None]:
# 2015-2021

# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/0/query?"

# We create the array with the years we're gonna loop through
year_iter = np.arange(2015, 2022)
file_name_init = "shapefiles/accidents/accidents_"
file_name_end = ".json"

# We create a JSON file containing data for each year in the array
for year in year_iter:
    # The queries are heavy and the GIS service constrains the number of rows that can be retrieved (50,000 rows).
    # For this reason, we run queries for each year individually
    where_clause = "ANO_OCURRENCIA_ACC = " + str(year)
    accidents = {'where': where_clause,
        'outFields': 'OBJECTID, FORMULARIO, LOCALIDAD, CIV, PK_CALZADA, CLASE_ACC, GRAVEDAD, FECHA_HORA_ACC',
        'returnGeometry': 'true',      
        'f': 'json',
    }
    encode_accidents = urllib.parse.urlencode(accidents).encode("utf-8")
    
    # We create a request and read it using urllib
    response_accidents = urllib.request.urlopen(url, encode_accidents)
    query_accidents = response_accidents.read()
    
    # We write the JSON response to a file
    file_name = file_name_init + str(year) + file_name_end
    with open(file_name, "wb") as json_file:
        json_file.write(query_accidents)
    print("Iter: " + str(year)) # Keep track of the process (we know how many files we're creating)

In [12]:
# 2022 up to Aug

# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/0/query?"

accidents = {'where': "ANO_OCURRENCIA_ACC = 2022 AND MES_OCURRENCIA_ACC IN ('ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO', 'JULIO', 'AGOSTO')",
    'outFields': 'OBJECTID, FORMULARIO, LOCALIDAD, CIV, PK_CALZADA, CLASE_ACC, GRAVEDAD, FECHA_HORA_ACC',
    'returnGeometry': 'true',      
    'f': 'json',
}
encode_accidents = urllib.parse.urlencode(accidents).encode("utf-8")

# We create a request and read it using urllib
response_accidents = urllib.request.urlopen(url, encode_accidents)
query_accidents = response_accidents.read()

# We write the JSON response to a file
with open("shapefiles/accidents/accidents_2022.json", "wb") as json_file:
    json_file.write(query_accidents)

In [None]:
##################################################
##### Injured people #####
##################################################

In [None]:
# 2015-2021

# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/2/query?"

# We create the array with the years we're gonna loop through
year_iter = np.arange(2015, 2022)
file_name_init = "shapefiles/injured/injured_people_"
file_name_end = ".json"

# We create a JSON file containing data for each year in the array
for year in year_iter:
    # The queries are heavy and the GIS service constrains the number of rows that can be retrieved (50,000 rows).
    # For this reason, we run queries for each year individually
    where_clause = "ANO_OCURRENCIA_ACC = " + str(year)
    injured_people = {'where': where_clause,
        'outFields': 'OBJECTID, FORMULARIO, LOCALIDAD, CLASE_ACC, CONDICION, GENERO, EDAD, FECHA_HORA_ACC',
        'returnGeometry': 'true',      
        'f': 'json',
    }
    encode_injured_people = urllib.parse.urlencode(injured_people).encode("utf-8")
    
    # We create a request and read it using urllib
    response_injured_people = urllib.request.urlopen(url, encode_injured_people)
    query_injured_people = response_injured_people.read()
    
    # We write the JSON response to a file
    file_name = file_name_init + str(year) + file_name_end
    with open(file_name, "wb") as json_file:
        json_file.write(query_injured_people)
    print("Iter: " + str(year)) # Keep track of the process (we know how many files we're creating)

In [17]:
# 2022 up to Aug

# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/2/query?"

injured_people = {'where': "ANO_OCURRENCIA_ACC = 2022 AND MES_OCURRENCIA_ACC IN ('ENERO', 'FEBRERO', 'MARZO', 'ABRIL', 'MAYO', 'JUNIO', 'JULIO', 'AGOSTO')",
    'outFields': 'OBJECTID, FORMULARIO, LOCALIDAD, CLASE_ACC, CONDICION, GENERO, EDAD, FECHA_HORA_ACC',
    'returnGeometry': 'true',      
    'f': 'json',
}
encode_injured_people = urllib.parse.urlencode(injured_people).encode("utf-8")

# We create a request and read it using urllib
response_injured_people = urllib.request.urlopen(url, encode_injured_people)
query_injured_people = response_injured_people.read()

# We write the JSON response to a file
with open("shapefiles/injured/injured_people_2022.json", "wb") as json_file:
    json_file.write(query_injured_people)

In [None]:
##################################################
##### Killed people #####
##################################################

In [None]:
# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/1/query?"

# We specify the query
killed_people = {'where': 'ANO_OCURRENCIA_ACC >= 2015',
    'outFields': 'OBJECTID, FORMULARIO, LOCALIDAD, CLASE_ACC, CONDICION, GENERO, EDAD, MUERTE_POSTERIOR, FECHA_POSTERIOR_MUERTE, FECHA_HORA_ACC',
    'returnGeometry': 'true',      
    'f': 'json',
}
encode_killed_people = urllib.parse.urlencode(killed_people).encode("utf-8")

# We create a request and read it using urllib
response_killed_people = urllib.request.urlopen(url, encode_killed_people)
query_killed_people = response_killed_people.read()

# We write the JSON response to a file
with open("shapefiles/killed/killed_people_2015-2022.json", "wb") as json_file:
    json_file.write(query_killed_people)

In [None]:
##################################################
##### Causes #####
##################################################

# For this and the following tables, since there is no date information available, we bring the info using the OBJECTID feature

In [None]:
# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/5/query?"

# Bringing 50,000 rows sometimes causes the server to not respond. So, we decrease the number of rows we bring to minimize the
# chances of the server not responding
row_constraint = 30000
max_objectid = 658183 # Checking on Sep 13, 2022. Max(OBJECTID) matches the number of rows
objectid_iter = int(max_objectid / row_constraint)
file_name_init = "shapefiles/causes/causes"
file_name_end = ".json"

# We create a JSON file containing "row_constraint" rows each
for i in range(objectid_iter):
    # We specify the query
    lower_objectid = i * row_constraint + 1
    upper_objectid = (i + 1) * row_constraint
    where_clause = "OBJECTID BETWEEN " + str(lower_objectid) + " AND " + str(upper_objectid)
    causes = {'where': where_clause,
        'outFields': 'OBJECTID, FORMULARIO, CODIGO_VEHICULO, CODIGO_CAUSA, NOMBRE, TIPO, TIPO_CAUSA',
        'returnGeometry': 'false',      
        'f': 'json',
    }
    encode_causes = urllib.parse.urlencode(causes).encode("utf-8")
    
    # We create a request and read it using urllib
    response_causes = urllib.request.urlopen(url, encode_causes)
    query_causes = response_causes.read()
    
    # We write the JSON response to a file
    file_name = file_name_init + str(i + 1) + file_name_end
    with open(file_name, "wb") as json_file:
        json_file.write(query_causes)
    print("Iter: " + str(i)) # Keep track of the process (we know how many files we're creating)
    
# We create the last JSON file
where_clause = "OBJECTID > " + str(upper_objectid)
causes = {'where': where_clause,
    'outFields': 'OBJECTID, FORMULARIO, CODIGO_VEHICULO, CODIGO_CAUSA, NOMBRE, TIPO, TIPO_CAUSA',
    'returnGeometry': 'false',      
    'f': 'json',
}
encode_causes = urllib.parse.urlencode(causes).encode("utf-8")

# We create a request and read it using urllib
response_causes = urllib.request.urlopen(url, encode_causes)
query_causes = response_causes.read()

# We write the JSON response to a file
file_name = file_name_init + str(objectid_iter + 1) + file_name_end
with open(file_name, "wb") as json_file:
    json_file.write(query_causes)
print("Last iter") # Keep track of the process (we know how many files we're creating)

In [None]:
##################################################
##### Road actors #####
##################################################

In [None]:
# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/6/query?"

# Bringing 50,000 rows sometimes causes the server to not respond. So, we decrease the number of rows we bring to minimize the
# chances of the server not responding
row_constraint = 30000
max_objectid = 1627505 # Checking on Sep 13, 2022. Max(OBJECTID) matches the number of rows
objectid_iter = int(max_objectid / row_constraint)
file_name_init = "shapefiles/actors/actors"
file_name_end = ".json"

# We create a JSON file containing "row_constraint" rows each
for i in range(objectid_iter):
    # We specify the query
    lower_objectid = i * row_constraint + 1
    upper_objectid = (i + 1) * row_constraint
    where_clause = "OBJECTID BETWEEN " + str(lower_objectid) + " AND " + str(upper_objectid)
    actors = {'where': where_clause,
        'outFields': 'OBJECTID, FORMULARIO, CODIGO_VICTIMA, CODIGO_VEHICULO, CONDICION, GENERO, EDAD, ESTADO, MUERTE_POSTERIOR, FECHA_POSTERIOR_MUERTE',
        'returnGeometry': 'false',      
        'f': 'json',
    }
    encode_actors = urllib.parse.urlencode(actors).encode("utf-8")
    
    # We create a request and read it using urllib
    response_actors = urllib.request.urlopen(url, encode_actors)
    query_actors = response_actors.read()
    
    # We write the JSON response to a file
    file_name = file_name_init + str(i + 1) + file_name_end
    with open(file_name, "wb") as json_file:
        json_file.write(query_actors)
    print("Iter: " + str(i)) # Keep track of the process (we know how many files we're creating)
    
# We create the last JSON file
where_clause = "OBJECTID > " + str(upper_objectid)
actors = {'where': where_clause,
    'outFields': 'OBJECTID, FORMULARIO, CODIGO_VICTIMA, CODIGO_VEHICULO, CONDICION, GENERO, EDAD, ESTADO, MUERTE_POSTERIOR, FECHA_POSTERIOR_MUERTE',
    'returnGeometry': 'false',      
    'f': 'json',
}
encode_actors = urllib.parse.urlencode(actors).encode("utf-8")

# We create a request and read it using urllib
response_actors = urllib.request.urlopen(url, encode_actors)
query_actors = response_actors.read()

# We write the JSON response to a file
file_name = file_name_init + str(objectid_iter + 1) + file_name_end
with open(file_name, "wb") as json_file:
    json_file.write(query_actors)
print("Last iter") # Keep track of the process (we know how many files we're creating)

In [None]:
##################################################
##### Vehicles #####
##################################################

In [None]:
# We specify the URL
url = "https://sig.simur.gov.co/arcgis/rest/services/Accidentalidad/WSAcidentalidad_Publico/FeatureServer/4/query?"

# Unlike OBJECTID in causes and actors, max(OBJECTID) in vehicles doesn't match its number of rows. For this reason, we need to
# carefully check OBJECTID to retrieve the info. This is done by manually changing the where clause
vehicles = {'where': 'OBJECTID BETWEEN 14262001 AND 15000000',
    'outFields': 'OBJECTID, FORMULARIO, CODIGO_VEHICULO, CLASE, SERVICIO, MODALIDAD, ENFUGA',
    'returnGeometry': 'false',      
    'f': 'json',
}
encode_vehicles = urllib.parse.urlencode(vehicles).encode("utf-8")

# We create a request and read it using urllib
response_vehicles = urllib.request.urlopen(url, encode_vehicles)
query_vehicles = response_vehicles.read()

# We write the JSON response to a file
with open("shapefiles/vehicles/vehicles18.json", "wb") as json_file:
    json_file.write(query_vehicles)
    
# We check the results
    # https://towardsdatascience.com/how-to-convert-json-into-a-pandas-dataframe-100b2ae1e0d8. Check Section 3
with open('shapefiles/vehicles/vehicles18.json','r') as f:
    data = json.loads(f.read())
vehicles_df = pd.json_normalize(data, record_path = ["features"])
len(vehicles_df)