In [1]:
import pandas as pd
import requests
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas

In [2]:
from tqdm import tqdm

# Connect Snowflake

In [12]:
import yaml
SNOWFLAKE_PATH = "/Users/irene/CloudStation/Portfolio/uk_crime_api/config/snowflake_key.yaml"
with open(SNOWFLAKE_PATH, "r") as file:
    account_data = yaml.safe_load(file)

In [14]:
conn = snowflake.connector.connect(
    user=account_data["user"],
    password=account_data["password"],
    account=account_data["account"],
    warehouse=account_data["warehouse"],
    database=account_data["database"],
    schema=account_data["schema"]
    )

In [16]:
conn.cursor().execute("USE WAREHOUSE insurance_wh")
conn.cursor().execute("USE DATABASE insurance_db")
conn.cursor().execute("USE SCHEMA insurance_db.CRIME_SCHEMA") # change to police_crime schema

<snowflake.connector.cursor.SnowflakeCursor at 0x10fcaa950>

# Postcode

In [28]:
POSTCODE_PATH = "/Users/irene/CloudStation/Portfolio/uk_crime_api/postcodes.csv"
postcode = pd.read_csv(POSTCODE_PATH)

In [29]:
# fix data type 
postcode['postcode'] = postcode['postcode'].astype(str)
postcode['eastings'] = postcode['eastings'].astype(int)
postcode['northings'] = postcode['northings'].astype(int)
postcode["latitude"] = postcode["latitude"].astype(float)
postcode["longitude"] = postcode["longitude"].astype(float)
postcode["town"] = postcode["town"].astype(str)
postcode["region"] = postcode["region"].astype(str)
postcode["country"] = postcode["country"].astype(str)
postcode["country_string"] = postcode["country_string"].astype(str)

# rename column
new_column_names = {
    'postcode': "POSTCODE",
    'eastings': "EASTINGS",
    'northings': "NORTHINGS",
    'latitude': "LATITUDE",
    'longitude': "LONGITUDE",
    'town': "TOWN",
    'region': "REGION",
    'country': "COUNTRY",
    'country_string': "COUNTRY_STRING"
}

postcode.rename(columns=new_column_names, inplace=True)

In [18]:
# define the create table query based on your DataFrame columns
create_table_query = """
CREATE TABLE IF NOT EXISTS CRIME_POSTCODE (
    postcode STRING,
    eastings INT,
    northings INT,
    latitude FLOAT,
    longitude FLOAT,
    town STRING,
    region STRING,
    country STRING,
    country_string STRING
)
"""

# Execute the query
cursor = conn.cursor()
cursor.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x11cfcd750>

In [31]:
# upload postcode data to Snowflake
success, nchunks, nrows, _ = write_pandas(conn, postcode, 'CRIME_POSTCODE')

if success:
    print(f"Successfully loaded {nrows} rows into Snowflake table.")
else:
    print("Failed to load data into Snowflake.")

Successfully loaded 2854 rows into Snowflake table.


# Get crime data

In [33]:
# load postcode data from snowflake
sql_query = """
SELECT latitude, longitude FROM CRIME_POSTCODE
"""

cursor = conn.cursor()
cursor.execute(sql_query)
postcode_lon_lat = cursor.fetch_pandas_all()

In [58]:
postcode_lon_lat_dict = dict(zip(postcode_lon_lat["LATITUDE"],postcode_lon_lat["LONGITUDE"]))

## Force

In [15]:
from police_api import PoliceAPI
api = PoliceAPI()

In [4]:
forces = api.get_forces()

def get_force_id(records, name=None):
    ''' Get id force by name. If no name, return a dict of name:id mappings. '''
    if name:
        return {r.name: r.id for r in records if name.lower() in r.name.lower()}
    return {r.name: r.id for r in records}

# get the mapping of force names to their IDs
force_name_id = get_force_id(forces)

force_name_id_dict = {"force": {}}

for name, id in force_name_id.items():
    force_name_id_dict["force"][name] = {"id": id}

In [5]:
def get_neighbourhood_name_id(records, rid=None):
    ''' Get record name by id. '''
    if rid:
        return {r.name:r.id for r in records if rid == r.id}
    return {r.name:r.id for r in records}

def extract_senior_officer_details(senior_officers):
    return [{"name": officer.name, "rank": officer.rank, "bio": officer.bio, "contact_details": officer.contact_details} for officer in senior_officers]

for force_name, details in tqdm(force_name_id_dict["force"].items()):
    force_id = details['id']
    force_data = api.get_force(force_id)
    
    telephone = force_data.telephone
    description = force_data.description
    url = force_data.url
    engagement_methods = force_data.engagement_methods
    neighbourhoods = get_neighbourhood_name_id(force_data.neighbourhoods)
    senior_officers = extract_senior_officer_details(force_data.senior_officers)
    
    # Update the dictionary with additional details
    force_name_id_dict["force"][force_name].update({
        "telephone": telephone,
        "description": description,
        "url": url,
        "engagement_methods": engagement_methods,
        "neighbourhoods": neighbourhoods,
        "senior_officers": senior_officers
    })

  0%|          | 0/44 [00:00<?, ?it/s]

100%|██████████| 44/44 [01:00<00:00,  1.38s/it]


In [81]:
import json
with open("force_raw.json", "w") as outfile:
    json.dump(force_name_id_dict, outfile)

## Neighbourhood

In [70]:
def get_neighbourhood_name_id(records, rid=None):
    ''' Get record name by id. '''
    if rid:
        return {r.name:r.id for r in records if rid == r.id}
    return {r.name:r.id for r in records}

neighbourhood_dict = {"neighbourhood": {}}

for force_name, details in tqdm(force_name_id_dict["force"].items()):
    force_id = details['id']
    force_data = api.get_force(force_id)
    neighbourhood_name_id = get_neighbourhood_name_id(force_data.neighbourhoods)
    neighbourhood = force_data.get_neighbourhood(neighbourhood_name_id)
    neighbourhood_dict["neighbourhood"][force_name] = {}
    for neighbourhood_name, neighbourhood_id in neighbourhood_name_id.items():
        neighbourhood_dict["neighbourhood"][force_name][neighbourhood_name] = {
            "id": neighbourhood_id,
            "description": description
        }

  0%|          | 0/44 [00:00<?, ?it/s]

100%|██████████| 44/44 [00:19<00:00,  2.26it/s]


In [83]:
with open("neighbourhood_raw.json", "w") as outfile:
    json.dump(neighbourhood_dict, outfile)

In [11]:
# def get_officers_info(neighbourhood_officer):
#     return [{"neighbourhood": officer.neighbourhood, "name": officer.name, "rank": officer.rank, 
#             "bio": officer.bio, "contact_details": officer.contact_details} for officer in neighbourhood_officer]

# def get_events_info(neighbourhood_event):
#     return [{"neighbourhood": event.neighbourhood, "title": event.title, "type": event.type, "description": event.description,
#              "contact_details": event.contact_details, "start_date": event.start_date, "address": event.address} for event in neighbourhood_event]

# def get_priorities_info(neighbourhood_priority):
#     return [{"neighbourhood": priority.neighbourhood, "issue": priority.issue, "action": priority.action, 
#             "issue_date": priority.issue_date, "action_date": priority.action_date} for priority in neighbourhood_priority]

# def get_neighbourhood_info(force_neighbourhood):
#     return [{"name": neighbourhood.name, "id": neighbourhood.id, "description": neighbourhood.description, 
#              "url_force": neighbourhood.url_force, "population": neighbourhood.population, "locations": neighbourhood.centre, 
#              "links": neighbourhood.links, "locations": neighbourhood.locations,
#              "contact_details": neighbourhood.contact_details, "officers": get_officers_info(neighbourhood.officers), "events": get_events_info(neighbourhood.events),
#              "priorities": get_priorities_info(neighbourhood.priorities), "boundary": neighbourhood.boundary} for neighbourhood in force_neighbourhood]

## Crime

In [None]:
for force_name, details in tqdm(force_name_id_dict["force"].items()):
    force_id = details['id']
    force_data = api.get_force(force_id)
    neighbourhood_name_id = get_neighbourhood_name_id(force_data.neighbourhoods)
    neighbourhood = force_data.get_neighbourhood(neighbourhood_name_id)
    for key, value in neighbourhood_name_id.items():
        print(key)



In [None]:
force_id = details['id']
force_data = api.get_force("hampshire")
neighbourhood_name_id = get_neighbourhood_name_id(force_data.neighbourhoods)
for key, value in tqdm(neighbourhood_name_id.items()):
    # print(key)
    neighbourhood = force_data.get_neighbourhood(key)

In [49]:
def getNameId(records, rid=None):
    ''' Get record name by id. '''
    if rid:
        return {r.name:r.id for r in records if rid == r.id}
    return {r.name:r.id for r in records}
force = api.get_force('Staffordshire Police')
areas = getNameId(force.neighbourhoods)
neighbourhood = force.get_neighbourhood(areas['Aldershot North'])
crimes = api.get_crimes_area(neighbourhood.boundary, date='2024-03')

APIError: <unknown error code>

In [52]:
areas['Aldershot North']

'14RA01'

In [50]:
areas['Abbey Hulton &amp; Townsend']

KeyError: 'Abbey Hulton &amp; Townsend'

In [None]:
def setCrimesAsDataFrame(crimes, df=None):
    ''' Convert crimes result to dataframe. '''
    if df is None:
        df=pd.DataFrame(columns = ['cid','type', 'month','name','lid','location','lat','lon'])
        #[int, object, object, int, object, float, float]
    for c in crimes:
        df = df.append({'cid':c.id,'type':c.category.id, 'month':c.month, 'name':c.category.name,
                            'lat':c.location.latitude,'lon':c.location.longitude, 
                        'lid':c.location.id,'location':c.location.name }, ignore_index=True)

    df['lat']=df['lat'].astype(float)
    df['lon']=df['lon'].astype(float)
    
    return df

In [17]:
d = api.get_dates()
d

['2024-03',
 '2024-02',
 '2024-01',
 '2023-12',
 '2023-11',
 '2023-10',
 '2023-09',
 '2023-08',
 '2023-07',
 '2023-06',
 '2023-05',
 '2023-04',
 '2023-03',
 '2023-02',
 '2023-01',
 '2022-12',
 '2022-11',
 '2022-10',
 '2022-09',
 '2022-08',
 '2022-07',
 '2022-06',
 '2022-05',
 '2022-04',
 '2022-03',
 '2022-02',
 '2022-01',
 '2021-12',
 '2021-11',
 '2021-10',
 '2021-09',
 '2021-08',
 '2021-07',
 '2021-06',
 '2021-05',
 '2021-04']

-----

# test

In [None]:
force = api.get_force('avon-and-somerset')
areas = get_neighbourhood_name_id(force.neighbourhoods)
neighbourhood = force_data.get_neighbourhood(areas)

neighbourhood_dict = {"neighbourhood": {}}
for name, centre in areas.items():
    neighbourhood_dict["neighbourhood"][name] = {"id": id, "name": name, "description": description, "priorities": priorities}

In [None]:
def getIdName(records, name=None):
    ''' Get id force by name. If no name, return a dict of name:id mappings. '''
    if name:
        return {r.name:r.id for r in records if name.lower() in r.name.lower()}
    return {r.name:r.id for r in records}
force = api.get_force('hampshire')
areas = getIdName(force.neighbourhoods)
neighbourhood = force.get_neighbourhood( areas['Ryde'] )
neighbourhood.locations

In [None]:
force_data = api.get_force("norfolk")
def extract_senior_officer_details(senior_officers):
    return [{"force": officer.force, "name": officer.name, "rank": officer.rank, "bio": officer.bio, "contact_details": officer.contact_details} for officer in senior_officers]
extract_senior_officer_details(force_data.senior_officers)

In [49]:
lat = "51.500617"
lng = "-0.124629"
force = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q={lat},{lng}').json()#.get('force')

In [57]:
neigh = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q="metropolitan","E05013806N"]').json()#.get('neighbourhood')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [54]:
force

{'force': 'metropolitan', 'neighbourhood': 'E05013806N'}

In [None]:
def match_postcodes(postcode):
    """
    Function that takes a postcode and build a polygon based on the coordinates of the area around it.
    """
    response = requests.get(f'http://api.postcodes.io/postcodes/{postcode}')
    if response.ok:
        df = [response.json()['result']['latitude'],response.json()['result']['longitude']]
        force = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q={df[0]},{df[1]}').json().get('force')
        neigh = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q={df[0]},{df[1]}').json().get('neighbourhood')
        boundary = requests.get(f'https://data.police.uk/api/{force}/{neigh}/boundary').json()
        boundary = pd.DataFrame.from_dict(boundary)
        polygon=f"{boundary['latitude'].max()},{boundary['longitude'].max()}:\
            {boundary['latitude'].max()},{boundary['longitude'].min()}:\
                {boundary['latitude'].min()},{boundary['longitude'].min()}:\
                    {boundary['latitude'].min()},{boundary['longitude'].max()}"
    else:
        raise Exception
    return df, polygon

In [3]:
def fetch_dates():
    """
    Function that fetches the date range of the available data
    """
    response = requests.get('https://data.police.uk/api/crimes-street-dates')
    if response.ok:
        print("Data pull successfully")
        df = pd.DataFrame.from_dict(response.json())
        df = df.explode('stop-and-search')
    else:
        print("Data pull failed")
    return df

In [4]:
df = fetch_dates()

Data pull successfully


In [5]:
df

Unnamed: 0,date,stop-and-search
0,2024-03,bedfordshire
0,2024-03,btp
0,2024-03,cambridgeshire
0,2024-03,city-of-london
0,2024-03,cleveland
...,...,...
35,2021-04,warwickshire
35,2021-04,west-mercia
35,2021-04,west-midlands
35,2021-04,west-yorkshire


In [None]:
def match_postcodes(postcode):
    """
    Function that takes a postcode and build a polygon based on the coordinates of the area around it.
    """
    response = requests.get(f'http://api.postcodes.io/postcodes/{postcode}')
    if response.ok:
        df = [response.json()['result']['latitude'],response.json()['result']['longitude']]
        force = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q={df[0]},{df[1]}').json().get('force')
        neigh = requests.get(f'https://data.police.uk/api/locate-neighbourhood?q={df[0]},{df[1]}').json().get('neighbourhood')
        boundary = requests.get(f'https://data.police.uk/api/{force}/{neigh}/boundary').json()
        boundary = pd.DataFrame.from_dict(boundary)
        polygon=f"{boundary['latitude'].max()},{boundary['longitude'].max()}:\
            {boundary['latitude'].max()},{boundary['longitude'].min()}:\
                {boundary['latitude'].min()},{boundary['longitude'].min()}:\
                    {boundary['latitude'].min()},{boundary['longitude'].max()}"
    else:
        raise Exception
    return df, polygon

In [8]:
response = requests.get('http://api.postcodes.io/postcodes/')

In [9]:
response.json()

{'status': 400,
 'error': 'No postcode query submitted. Remember to include query parameter'}