# Data Cleaning of Distribution Centres

The distribution centre data could not be reliably retrieved from OSM, so a dataset of PostNL depots in the Netherlands was obtained via Schuberg Philis. This dataset has been cleaned for use in the analysis.

In [None]:
# Import necessary libraries
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import contextily as ctx

In [None]:
postnl_distribution = pd.read_csv('../distribution_centers/data/postnl_depots_adjusted.csv')

In [None]:
# Only keep the depotLive = True columns
postnl_distribution = postnl_distribution[postnl_distribution['depotLive'] == True]

In [None]:
# Only keep the Type = 'PostNL Sorteercentrum Pakketten'
postnl_distribution = postnl_distribution[postnl_distribution['Type'] == 'PostNL Sorteercentrum Pakketten']

In [None]:
# Only keep the columns that are needed
postnl_distribution = postnl_distribution[['depotAbbreviation', 'depotGpsLatitude', 'depotGpsLongitude', 'depotCity', 'depotId']]

In [None]:
# Create a GeoDataFrame
geometry = [Point(xy) for xy in zip(postnl_distribution['depotGpsLongitude'], postnl_distribution['depotGpsLatitude'])]
postnl_distribution = postnl_distribution.drop(['depotGpsLongitude', 'depotGpsLatitude'], axis=1)
gdf_distribution = gpd.GeoDataFrame(postnl_distribution, crs=4326, geometry=geometry)

In [None]:
gdf_distribution['type'] = 'distribution'
gdf_distribution.rename(columns={"depotId": "id"}, inplace=True)

In [None]:
gdf_distribution

In [None]:
gdf_distribution.to_file("../distribution_centers/output/postnl_distribution_cleaned.json")

In [None]:
# Plotting the distribution depots on a map
gdf = gdf_distribution.to_crs(epsg=3857)

fig, ax = plt.subplots(figsize=(10, 10))
gdf.plot(ax=ax, color="red", markersize=40, edgecolor="black")

ctx.add_basemap(ax, source=ctx.providers.OpenStreetMap.Mapnik)

for x, y, label in zip(gdf.geometry.x, gdf.geometry.y, gdf["depotAbbreviation"]):
    ax.text(x + 2000, y + 2000, label, fontsize=8)

ax.set_axis_off()
plt.title("Distribution Depots in the Netherlands", fontsize=14)
plt.tight_layout()
plt.show()