## Investigate and rank distance between scottish destilleries and train stops

In [None]:
import pandas as pd
import geopandas as gpd

Read distillery data

In [None]:
try:
    distilleries = gpd.read_file('https://raw.githubusercontent.com/mhamilt/completely-smashed/main/completely-smashed.geojson')
except:
    # Fallback to local data
    distilleries = gpd.read_file('data/completely-smashed.geojson')

Prepare distillery data

In [None]:
# Manipulate data of Dalwhinnie Distillery, which is sligthly off. Actual position is at 56.939655,-4.2430025 (train station 56.934334,-4.246498)
distilleries.set_index(["Name"], inplace=True)
distilleries.at['Dalwhinnie Distillery', 'Lat'] = 56.939655
distilleries.at['Dalwhinnie Distillery', 'Lng'] = -4.2430025

# Delete one Loch Lomond distillery, this is a duplicate in the dataset
distilleries.drop("Loch Lomond Distillery (1)", inplace=True)

# Change "Speyside malt" to "Speyside Malt"
distilleries.replace({'Description': {'Speyside malt': 'Speyside Malt'}}, inplace=True)

In [None]:
distilleries.geometry=gpd.points_from_xy(distilleries.Lng, distilleries.Lat)

In [None]:
distilleries.to_crs(epsg=27700, inplace=True)

Read UK train stops data

In [None]:
try:
    uk_stations = pd.read_csv('https://raw.githubusercontent.com/davwheat/uk-railway-stations/main/stations.csv', index_col=0)
except:
    uk_stations = pd.read_csv('data/stations.csv', index_col=0)

Prepare UK train stops data

In [None]:
uk_stations = gpd.GeoDataFrame(uk_stations, geometry=gpd.points_from_xy(uk_stations.long, uk_stations.lat))
uk_stations.set_crs('EPSG:4326', inplace=True)
uk_stations.to_crs(epsg=27700, inplace=True)

Find closest train stop to each distillery

In [None]:
distilleries_stops = distilleries.sjoin_nearest(uk_stations, distance_col="Distance in m")

Sort by distance & round

In [None]:
distilleries_stops = distilleries_stops.sort_values(by=["Distance in m"])
distilleries_stops["Distance in m"] = round(distilleries_stops["Distance in m"]).astype(int)

Filter columns and rename

In [None]:
distilleries_stops = distilleries_stops[["Owner", "Description", "Url", "Address", "index_right", "Distance in m", "geometry"]].rename(columns={"index_right": "Trainstop"})

Provide basic statistics

In [None]:
distilleries_stops.describe()

In [None]:
distilleries_stops

In [None]:
# If you want to visit all distilleries:
total_distance=distilleries_stops["Distance in m"].sum() * 1e-3 # in km
print(f"The total distance is {total_distance:.2f} km")
print(f"Visiting all distillieries requires to run {total_distance/42.195:.2f} marathons")
print(f"The amount of whisky consumed assuming 3 x 35 ml per distillery is: {len(distilleries_stops.index)*3*0.035:.2f} litres")

Save in various formats

In [None]:
# Change CRS to EPSG 4326
distilleries_stops.to_crs(epsg=4326, inplace = True)

# Save as GeoJson and use RFC7946 standard
distilleries_stops.to_file("output/distilleries_result.geojson", driver='GeoJSON', RFC7946="YES")

# Save as csv
distilleries_stops.to_csv("output/distilleries_result.csv")