In [1]:
import pandas as pd
import requests
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas
import json

In [2]:
from tqdm import tqdm

# Connect Snowflake

In [4]:
import yaml
SNOWFLAKE_PATH = "/Users/irene/CloudStation/Portfolio/uk-crime-api/config/snowflake_key.yaml"
with open(SNOWFLAKE_PATH, "r") as file:
    account_data = yaml.safe_load(file)

In [5]:
conn = snowflake.connector.connect(
    user=account_data["user"],
    password=account_data["password"],
    account=account_data["account"],
    warehouse=account_data["warehouse"],
    database=account_data["database"],
    schema=account_data["schema"]
    )

In [6]:
conn.cursor().execute("USE WAREHOUSE insurance_wh")
conn.cursor().execute("USE DATABASE insurance_db")
conn.cursor().execute("USE SCHEMA insurance_db.CRIME_SCHEMA") # change to police_crime schema

<snowflake.connector.cursor.SnowflakeCursor at 0x11d990bd0>

# Get crime data

In [3]:
from police_api import PoliceAPI
api = PoliceAPI(timeout=120)

## Force

In [20]:
forces = api.get_forces()

def get_force_id(records, name=None):
    ''' Get id force by name. If no name, return a dict of name:id mappings. '''
    if name:
        return {r.name: r.id for r in records if name.lower() in r.name.lower()}
    return {r.name: r.id for r in records}

# get the mapping of force names to their IDs
force_name_id = get_force_id(forces)

force_name_id_dict = {"force": {}}

for name, id in force_name_id.items():
    force_name_id_dict["force"][name] = {"id": id}

In [5]:
def get_neighbourhood_name_id(records, rid=None):
    ''' Get record name by id. '''
    if rid:
        return {r.name:r.id for r in records if rid == r.id}
    return {r.name:r.id for r in records}

def extract_senior_officer_details(senior_officers):
    return [{"name": officer.name, "rank": officer.rank, "bio": officer.bio, "contact_details": officer.contact_details} for officer in senior_officers]

for force_name, details in tqdm(force_name_id_dict["force"].items()):
    force_id = details['id']
    force_data = api.get_force(force_id)
    
    telephone = force_data.telephone
    description = force_data.description
    url = force_data.url
    engagement_methods = force_data.engagement_methods
    neighbourhoods = get_neighbourhood_name_id(force_data.neighbourhoods)
    senior_officers = extract_senior_officer_details(force_data.senior_officers)
    
    # Update the dictionary with additional details
    force_name_id_dict["force"][force_name].update({
        "telephone": telephone,
        "description": description,
        "url": url,
        "engagement_methods": engagement_methods,
        "neighbourhoods": neighbourhoods,
        "senior_officers": senior_officers
    })

  0%|          | 0/44 [00:00<?, ?it/s]

100%|██████████| 44/44 [01:00<00:00,  1.38s/it]


In [81]:
# import json
# with open("force_raw.json", "w") as outfile:
#     json.dump(force_name_id_dict, outfile)

## Neighbourhood

In [9]:
f = open('/Users/irene/CloudStation/Portfolio/uk-crime-api/force_raw.json')
force_name_id_dict = json.load(f)

In [11]:
def get_neighbourhood_name_id(records, rid=None):
    ''' Get record name by id. '''
    if rid:
        return {r.name:r.id for r in records if rid == r.id}
    return {r.name:r.id for r in records}

def get_officers_info(neighbourhood_officer):
    return [{"neighbourhood": officer.neighbourhood, "name": officer.name, "rank": officer.rank, 
            "bio": officer.bio, "contact_details": officer.contact_details} for officer in neighbourhood_officer]

def get_events_info(neighbourhood_event):
    return [{"neighbourhood": event.neighbourhood, "title": event.title, "type": event.type, "description": event.description,
             "contact_details": event.contact_details, "start_date": event.start_date, "address": event.address} for event in neighbourhood_event]

def get_priorities_info(neighbourhood_priority):
    return [{"neighbourhood": priority.neighbourhood, "issue": priority.issue, "action": priority.action, 
            "issue_date": priority.issue_date, "action_date": priority.action_date} for priority in neighbourhood_priority]

In [36]:
# get neighbourhood data from avon-and-somerset

neighbourhood_dict = {"neighbourhood": {}}
force_id = "avon-and-somerset"
force_name = "Avon and Somerset Constabulary"
force_data = api.get_force(force_id)
areas = get_neighbourhood_name_id(force_data.neighbourhoods)
neighbourhood_name = "Yeovil Town"
neighbourhood_id = areas["Yeovil Town"]
# Add the force name to neighbourhood_dict if not already present
if force_name not in neighbourhood_dict["neighbourhood"]:
    neighbourhood_dict["neighbourhood"][force_name] = {}
neighbourhood = force_data.get_neighbourhood(neighbourhood_id)
neighbourhood_dict["neighbourhood"][force_name] = {neighbourhood_name:{}}
# Update neighbourhood details for the current force
neighbourhood_dict["neighbourhood"][force_name][neighbourhood_name] = {
    "id": neighbourhood.id,
    "name": neighbourhood.name,
    # "description": neighbourhood.description,
    # "url_force": neighbourhood.url_force,
    # "population": neighbourhood.population,
    # "centre": neighbourhood.centre,
    # "links": neighbourhood.links,
    # "locations": neighbourhood.locations,
    # "contact_details": neighbourhood.contact_details,
    # "officers": get_officers_info(neighbourhood.officers),
    # "events": get_events_info(neighbourhood.events),
    # "priorities": get_priorities_info(neighbourhood.priorities),
    "boundary": neighbourhood.boundary
    
    }

In [29]:
neighbourhood_dict["neighbourhood"]['Avon and Somerset Constabulary']["Yeovil Town"]["boundary"][0]

(50.93482601324, -2.6676934416779)

In [None]:
# get all neighbourhood data from all force
# for force_name, details in tqdm(force_name_id_dict["force"].items()):
#     force_id = details['id']
#     force_data = api.get_force(force_id)
#     areas = get_neighbourhood_name_id(force_data.neighbourhoods)
    
#     # Add the force name to neighbourhood_dict if not already present
#     if force_name not in neighbourhood_dict["neighbourhood"]:
#         neighbourhood_dict["neighbourhood"][force_name] = {}
    
#     for neighbourhood_name, neighbourhood_id in tqdm(areas.items()):
#         neighbourhood = force_data.get_neighbourhood(neighbourhood_id)
#         neighbourhood_dict["neighbourhood"][force_name] = {neighbourhood_name:{}}
#         # Update neighbourhood details for the current force
#         neighbourhood_dict["neighbourhood"][force_name][neighbourhood_name] = {
#             "id": neighbourhood.id,
#             # "name": neighbourhood.name,
#             # "description": neighbourhood.description,
#             # "url_force": neighbourhood.url_force,
#             # "population": neighbourhood.population,
#             # "centre": neighbourhood.centre,
#             # "links": neighbourhood.links,
#             # "locations": neighbourhood.locations,
#             # "contact_details": neighbourhood.contact_details,
#             # "officers": get_officers_info(neighbourhood.officers),
#             # "events": get_events_info(neighbourhood.events),
#             # "priorities": get_priorities_info(neighbourhood.priorities),
#             "boundary": neighbourhood.boundary  
#         }

In [108]:
with open("neighbourhood_raw_v1.json", "w") as outfile:
    json.dump(neighbourhood_dict, outfile)

## ALL_Crime

In [55]:
# get avon-and-somerset all_crime
date_list = api.get_dates()
sorted_date_list = sorted(date_list, reverse=True)
latest_date = sorted_date_list[0]
force_id = "avon-and-somerset"
force_name = "Avon and Somerset Constabulary"
all_crime_dict = {"all_crime": {force_name:{}}}
all_crime = requests.get(f"https://data.police.uk/api/crimes-no-location?category=all-crime&force={force_id}&date={latest_date}").json()
all_crime_dict = {"all_crime": {force_name: all_crime}}

In [71]:
# get all all crime
date_list = api.get_dates()
sorted_date_list = sorted(date_list, reverse=True)
latest_date = sorted_date_list[0]
all_crime_dict = {"all_crime":{}}
for force_name, force_data in tqdm(force_name_id_dict["force"].items()):
    all_crime_dict["all_crime"][force_name] = {}
    force_id = force_data["id"]
    all_crime = requests.get(f"https://data.police.uk/api/crimes-no-location?category=all-crime&force={force_id}&date={latest_date}").json()
    all_crime_dict["all_crime"][force_name] = all_crime

100%|██████████| 44/44 [00:23<00:00,  1.86it/s]


In [80]:
for k,v in all_crime_dict["all_crime"].items():
    for i in v:
        print(i["persistent_id"])
    # print(v)

891819224dee7b47e80ea5fb25bb1f27a9f087fe2f51b4765761ed3cc6cd5935
7a710b0eec613d577223b7971b2f611945c6aa9ce4d71e0d5a416e3ce26a1d80
02408f29a648c6f208c2bcab098bb90a550cc7d8d85da4e9333164f693f5c377
0235318d3ab8bd5d54312804187537d677a425de680d9aad5be7f4b6010b4a0b
2dc18baebe8c469f426a489f476b58a9f43bf27739a4f0ac6badab9745cdc7bc
11541cce1037297c4bca2b786359b67edac93ce94c13cacefe0734ae2f9b6849
8f02ded8b53c7e9d66ad128ebf54a8d687c6e3f2bfb75e825d7e717ac228dfa5
266307a974ff2a75b371b2150478c5ca602c0e9ba1b0dbf5be6371f2c3d8bb8d
a836355e4577fb6f98c3f817cc24941974f36a6b99a5957d7d35340793b5e2a9
5b6e71b3d91e9e21bc6aa124cf0eced244e0ff191ac005d055f83d0af1b5129a
8d2f03f84fc2d13da5f72968d6087f94e3390906cf15ae523e5f4a2bdf2dccd8
76524ca43a87058921417983f4b47b0ccaee75cc2ad19144efe7a36ada3760ce
03c91f931b07b3cf8936c0b7eeb05e999c4985e9c58a57b92b7477ab0ace0e3d
910e1c27c6159243955c6772ad12541e736eb0a37ff08105ebee906b1b21d715
465662059d3aae15fffaed6de9abaff5f34caff73ada8e8237b8970839f20fe3
0e528e7978951a9cfb098dc3c

## CRIME_DETAIL

In [110]:
# get all crime detail
date_list = api.get_dates()
sorted_date_list = sorted(date_list, reverse=True)
latest_date = sorted_date_list[0]

crime_detail_dict = {"crime_detail": {}}
all_crime_dict = {"all_crime": {}}

for force_name, force_data in tqdm(force_name_id_dict["force"].items()):
    force_id = force_data["id"]
    all_crime_response = requests.get(f"https://data.police.uk/api/crimes-no-location?category=all-crime&force={force_id}&date={latest_date}")
    
    if all_crime_response.status_code == 200:
        all_crime_data = all_crime_response.json()
        all_crime_dict["all_crime"][force_name] = all_crime_data
        
        for crime in all_crime_data:
            persistent_id = crime['persistent_id']
            crime_detail_response = requests.get(f"https://data.police.uk/api/outcomes-for-crime/{persistent_id}")
            
            if crime_detail_response.status_code == 200:
                crime_detail = crime_detail_response.json()
                crime_detail_dict["crime_detail"][persistent_id] = crime_detail
            else:
                print(f"Failed to fetch details for crime with persistent_id: {persistent_id}")
    else:
        print(f"Failed to fetch crime data for force: {force_name}")

print("Crime details fetched successfully.")


  0%|          | 0/44 [01:22<?, ?it/s]


KeyboardInterrupt: 

In [111]:
crime_detail_dict

{'crime_detail': {'891819224dee7b47e80ea5fb25bb1f27a9f087fe2f51b4765761ed3cc6cd5935': {'outcomes': [{'category': {'code': 'under-investigation',
      'name': 'Under investigation'},
     'date': '2024-03',
     'person_id': None}],
   'crime': {'category': 'bicycle-theft',
    'location_type': None,
    'location': None,
    'context': '',
    'persistent_id': '891819224dee7b47e80ea5fb25bb1f27a9f087fe2f51b4765761ed3cc6cd5935',
    'id': 117424833,
    'location_subtype': '',
    'month': '2024-03'}},
  '7a710b0eec613d577223b7971b2f611945c6aa9ce4d71e0d5a416e3ce26a1d80': {'outcomes': [{'category': {'code': 'under-investigation',
      'name': 'Under investigation'},
     'date': '2024-03',
     'person_id': None}],
   'crime': {'category': 'bicycle-theft',
    'location_type': None,
    'location': None,
    'context': '',
    'persistent_id': '7a710b0eec613d577223b7971b2f611945c6aa9ce4d71e0d5a416e3ce26a1d80',
    'id': 117417018,
    'location_subtype': '',
    'month': '2024-03'}},
  

In [112]:
# get just one crime detail from each force
date_list = api.get_dates()
sorted_date_list = sorted(date_list, reverse=True)
latest_date = sorted_date_list[0]

crime_detail_dict = {"crime_detail": {}}
all_crime_dict = {"all_crime": {}}

for force_name, force_data in tqdm(force_name_id_dict["force"].items()):
    force_id = force_data["id"]
    all_crime_response = requests.get(f"https://data.police.uk/api/crimes-no-location?category=all-crime&force={force_id}&date={latest_date}")
    
    if all_crime_response.status_code == 200:
        all_crime_data = all_crime_response.json()
        all_crime_dict["all_crime"][force_name] = all_crime_data
        
        # select the first crime's persistent ID from the fetched data
        first_crime = all_crime_data[0] if all_crime_data else None
        if first_crime:
            persistent_id_to_fetch = first_crime['persistent_id']
            
            # fetch crime details for the selected persistent ID
            crime_detail_response = requests.get(f"https://data.police.uk/api/outcomes-for-crime/{persistent_id_to_fetch}")
            
            if crime_detail_response.status_code == 200:
                crime_detail = crime_detail_response.json()
                crime_detail_dict["crime_detail"][persistent_id_to_fetch] = crime_detail
                print("Crime details fetched successfully.")
            else:
                print(f"Failed to fetch details for crime with persistent_id: {persistent_id_to_fetch}")
        else:
            print(f"No crimes found for force: {force_name}")
    else:
        print(f"Failed to fetch crime data for force: {force_name}")

print("Process completed.")


  2%|▏         | 1/44 [00:00<00:20,  2.05it/s]

Crime details fetched successfully.


  5%|▍         | 2/44 [00:01<00:21,  1.94it/s]

Failed to fetch details for crime with persistent_id: 


  7%|▋         | 3/44 [00:01<00:28,  1.46it/s]

Failed to fetch details for crime with persistent_id: 


  9%|▉         | 4/44 [00:02<00:24,  1.66it/s]

No crimes found for force: Cheshire Constabulary


 11%|█▏        | 5/44 [00:03<00:30,  1.29it/s]

Failed to fetch details for crime with persistent_id: 


 14%|█▎        | 6/44 [00:04<00:27,  1.37it/s]

No crimes found for force: Cleveland Police


 16%|█▌        | 7/44 [00:04<00:21,  1.71it/s]

No crimes found for force: Cumbria Constabulary


 18%|█▊        | 8/44 [00:05<00:21,  1.67it/s]

No crimes found for force: Derbyshire Constabulary


 20%|██        | 9/44 [00:05<00:21,  1.65it/s]

Crime details fetched successfully.


 23%|██▎       | 10/44 [00:06<00:24,  1.40it/s]

Crime details fetched successfully.
No crimes found for force: Durham Constabulary


 27%|██▋       | 12/44 [00:07<00:18,  1.77it/s]

Crime details fetched successfully.


 30%|██▉       | 13/44 [00:08<00:21,  1.44it/s]

Crime details fetched successfully.


 32%|███▏      | 14/44 [00:09<00:22,  1.34it/s]

Failed to fetch details for crime with persistent_id: 


 34%|███▍      | 15/44 [00:10<00:27,  1.07it/s]

No crimes found for force: Greater Manchester Police


 36%|███▋      | 16/44 [00:11<00:26,  1.04it/s]

Crime details fetched successfully.


 39%|███▊      | 17/44 [00:12<00:23,  1.16it/s]

No crimes found for force: Hampshire Constabulary


 41%|████      | 18/44 [00:13<00:22,  1.15it/s]

Crime details fetched successfully.


 43%|████▎     | 19/44 [00:13<00:19,  1.26it/s]

No crimes found for force: Humberside Police


 45%|████▌     | 20/44 [00:15<00:22,  1.05it/s]

Crime details fetched successfully.


 48%|████▊     | 21/44 [00:15<00:19,  1.17it/s]

Crime details fetched successfully.


 50%|█████     | 22/44 [00:17<00:20,  1.07it/s]

Crime details fetched successfully.


 52%|█████▏    | 23/44 [00:17<00:17,  1.23it/s]

No crimes found for force: Lincolnshire Police


 55%|█████▍    | 24/44 [00:17<00:13,  1.46it/s]

No crimes found for force: Merseyside Police


 57%|█████▋    | 25/44 [00:19<00:16,  1.17it/s]

No crimes found for force: Metropolitan Police Service


 59%|█████▉    | 26/44 [00:19<00:13,  1.29it/s]

Crime details fetched successfully.


 61%|██████▏   | 27/44 [00:21<00:17,  1.04s/it]

Failed to fetch details for crime with persistent_id: 


 64%|██████▎   | 28/44 [00:21<00:14,  1.13it/s]

Failed to fetch details for crime with persistent_id: 


 66%|██████▌   | 29/44 [00:22<00:11,  1.26it/s]

No crimes found for force: Northamptonshire Police


 68%|██████▊   | 30/44 [00:23<00:09,  1.43it/s]

No crimes found for force: Northumbria Police


 70%|███████   | 31/44 [00:23<00:09,  1.40it/s]

Crime details fetched successfully.
No crimes found for force: Police Service of Northern Ireland


 75%|███████▌  | 33/44 [00:24<00:06,  1.70it/s]

Failed to fetch details for crime with persistent_id: 


 77%|███████▋  | 34/44 [00:26<00:07,  1.28it/s]

Crime details fetched successfully.


 80%|███████▉  | 35/44 [00:26<00:05,  1.51it/s]

Crime details fetched successfully.


 82%|████████▏ | 36/44 [00:27<00:06,  1.23it/s]

Crime details fetched successfully.


 84%|████████▍ | 37/44 [00:28<00:06,  1.12it/s]

Crime details fetched successfully.


 86%|████████▋ | 38/44 [00:29<00:04,  1.27it/s]

Failed to fetch details for crime with persistent_id: 


 89%|████████▊ | 39/44 [00:30<00:04,  1.18it/s]

Failed to fetch details for crime with persistent_id: 


 91%|█████████ | 40/44 [00:31<00:03,  1.15it/s]

Crime details fetched successfully.


 95%|█████████▌| 42/44 [00:32<00:01,  1.32it/s]

Crime details fetched successfully.
No crimes found for force: West Midlands Police


100%|██████████| 44/44 [00:33<00:00,  1.30it/s]

Failed to fetch details for crime with persistent_id: 
No crimes found for force: Wiltshire Police
Process completed.



