In [1]:
import pandas as pd
import math

In [2]:
# path to the NSPL file
# download from https://geoportal.statistics.gov.uk/search?q=PRD_NSPL%20MAY_2024&sort=Date%20Created%7Ccreated%7Cdesc
nspl_path = 'data/NSPL21_MAY_2024_UK.csv'

In [3]:
# only loading the postcode and northing and easting columns
columns_to_load = ['pcds', 'oseast1m', 'osnrth1m']
nspl_df = pd.read_csv(nspl_path, usecols=columns_to_load)

In [4]:
def format_postcode(postcode):
    """
    Function to format a postcode.
    Remove any spaces and convert to uppercase.
    Insert a space the fourth character from the end.
    """
    cleaned_postcode = postcode.replace(" ", "").upper()
    formatted_postcode = cleaned_postcode[:-3] + ' ' + cleaned_postcode[-3:]
    return formatted_postcode

In [5]:
def get_postcode_coordinates(postcode, df):
    """
    Function to get northing and easting for a given postcode.
    Format postcode to uppercase with correct spacing.
    """
    formatted_postcode = format_postcode(postcode)
    result = df[df['pcds'] == formatted_postcode]

    if result.empty:
        return None, None

    northing = result['osnrth1m'].values[0]
    easting = result['oseast1m'].values[0]

    return northing, easting

In [6]:
def calculate_distance(start_pcode, end_pcode):
    """
    Function to calculate distance between two postcodes.
    """
    northing1, easting1 = get_postcode_coordinates(start_pcode, nspl_df)
    northing2, easting2 = get_postcode_coordinates(end_pcode, nspl_df)

    if northing1 is None or northing2 is None or easting1 is None or easting2 is None:
        return None  # Return None if any of the coordinates are not found

    # Calculate the deltas
    delta_easting = easting2 - easting1
    delta_northing = northing2 - northing1

    # Use Pythagorean theorem to calculate the distance
    distance = math.sqrt(delta_easting**2 + delta_northing**2)
    return distance

In [26]:
# generate test data
data = {
    'id': [1, 2, 3, 4, 5],
    'start_postcode': ['EC1P 1DR', 'BN52 9XH', 'W1F 7BY', 'LE8 0TR', 'BS4 2PJ'],
    'end_postcode': ['PO4 9UG', 'W4 2RJ', 'WA7 5JW', 'L39 3LN', 'WF10 1BY']
}

# Convert the dictionary into a pandas DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)

# cacluate the distance
df['distance_meters'] = df.apply(lambda row: calculate_distance(row['start_postcode'], row['end_postcode']), axis=1)

# Display the DataFrame with the distance
print(df)

   id start_postcode end_postcode
0   1       EC1P 1DR      PO4 9UG
1   2       BN52 9XH       W4 2RJ
2   3        W1F 7BY      WA7 5JW
3   4        LE8 0TR      L39 3LN
4   5        BS4 2PJ     WF10 1BY
   id start_postcode end_postcode  distance_meters
0   1       EC1P 1DR      PO4 9UG    104185.861699
1   2       BN52 9XH       W4 2RJ     72051.905402
2   3        W1F 7BY      WA7 5JW    267797.017431
3   4        LE8 0TR      L39 3LN    169318.410842
4   5        BS4 2PJ     WF10 1BY    267124.975255


In [24]:
nspl_df.sample(10)

Unnamed: 0,pcds,oseast1m,osnrth1m
1142999,JE2 3GP,,
774338,EC1P 1DR,531073.0,182317.0
2621411,WN5 8UQ,352342.0,404295.0
2571668,WA8 8GT,349367.0,384211.0
1527563,NE12 0DP,426500.0,570500.0
1884463,PO4 9UG,467990.0,99400.0
1565713,NE66 3EP,424219.0,624243.0
2610309,WF4 2LE,432968.0,414011.0
1879100,PO30 9UE,450180.0,89236.0
992782,GU8 5US,494675.0,138319.0
