In [80]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn import set_config
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import silhouette_score

set_config(display="diagram")

In [17]:
df = pd.read_csv("./data/open_pubs.csv", names=["fsa_id", "name", "address", "postcode", "easting", "northing", "latitude", "longitude", "local_authority"])
df.head()

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
0,22,Anchor Inn,"Upper Street, Stratford St Mary, COLCHESTER",CO7 6LW,604749,234404,51.970379,0.979340,Babergh
1,36,Ark Bar Restaurant,"Ark Bar And Restaurant, Cattawade Street, Bran...",CO11 1RH,610194,233329,51.958698,1.057832,Babergh
2,74,Black Boy,"The Lady Elizabeth, 7 Market Hill, SUDBURY, Su...",CO10 2EA,587334,241316,52.038595,0.729915,Babergh
3,75,Black Horse,"Lower Street, Stratford St Mary, COLCHESTER",CO7 6JS,622675,-5527598,\N,\N,Babergh
4,76,Black Lion,"Lion Road, Glemsford, SUDBURY",CO10 7RF,622675,-5527598,\N,\N,Babergh


In [23]:
df.latitude.replace("\\N", np.NaN, inplace=True)
df.longitude.replace("\\N", np.NaN, inplace=True)
df.dropna(inplace=True)
df.shape

(50564, 9)

# Pub Recommendation

In [25]:
list(enumerate(df.columns))

[(0, 'fsa_id'),
 (1, 'name'),
 (2, 'address'),
 (3, 'postcode'),
 (4, 'easting'),
 (5, 'northing'),
 (6, 'latitude'),
 (7, 'longitude'),
 (8, 'local_authority')]

In [68]:
preprocessor = ColumnTransformer([
    ("scaler", StandardScaler(), [6, 7]),
], remainder="drop")
tnf_df = preprocessor.fit_transform(df.values)
tnf_df.shape

(50564, 2)

In [69]:
model = NearestNeighbors(metric="euclidean", n_jobs=-1)
model.fit(tnf_df)

In [70]:
pub = 34
pub_data = tnf_df[pub].reshape(1, -1)
distances, indices = model.kneighbors(pub_data, n_neighbors=5)
df.iloc[indices[0], :]

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
41,755,The Brook Inn,"241 Bures Road, Great Cornard, SUDBURY",CO10 0JQ,588555,239362,52.020633,0.746619,Babergh
20,416,Kins Head Inn,"115 Bures Road, Great Cornard, SUDBURY",CO10 0JE,588383,239975,52.026188,0.744452,Babergh
55,783,The Five Bells,"63 Bures Road, Great Cornard, SUDBURY",CO10 0HU,588333,240302,52.029137,0.743898,Babergh
382,4876,The Henny Swan,"Henny Street, Great Henny, Sudbury, Suffolk",CO10 7LS,587924,238464,52.01278,0.736926,Braintree
22,459,Maldon Grey,"Cats Lane, Great Cornard, SUDBURY",CO10 2RZ,588540,241341,52.038399,0.747488,Babergh


## Pipeline

In [82]:
class PubRecommender:
    def __init__(self, preprocessor, model, data):
        self.preprocessor = preprocessor
        self.model = model
        self.data = df
    
    def get_k_recommendations(self, user_data, k):
        tnf_user_data = self.preprocessor.transform(user_data)
        indices = self.model.kneighbors(tnf_user_data, n_neighbors=k, return_distance=False)[0].tolist()
        return self.data.iloc[indices, :]
    
user_data = df.iloc[pub, :].values.reshape(1, -1)
recommender = PubRecommender(preprocessor, model, df)
recommender.get_k_recommendations(user_data, 6)

Unnamed: 0,fsa_id,name,address,postcode,easting,northing,latitude,longitude,local_authority
41,755,The Brook Inn,"241 Bures Road, Great Cornard, SUDBURY",CO10 0JQ,588555,239362,52.020633,0.746619,Babergh
20,416,Kins Head Inn,"115 Bures Road, Great Cornard, SUDBURY",CO10 0JE,588383,239975,52.026188,0.744452,Babergh
55,783,The Five Bells,"63 Bures Road, Great Cornard, SUDBURY",CO10 0HU,588333,240302,52.029137,0.743898,Babergh
382,4876,The Henny Swan,"Henny Street, Great Henny, Sudbury, Suffolk",CO10 7LS,587924,238464,52.01278,0.736926,Braintree
22,459,Maldon Grey,"Cats Lane, Great Cornard, SUDBURY",CO10 2RZ,588540,241341,52.038399,0.747488,Babergh
7,122,Caffeine Lounge,"14 Borehamgate Shopping Precinct, King Street,...",CO10 2ED,587527,241247,52.037903,0.732687,Babergh


In [83]:
model.kneighbors_graph

<50564x50564 sparse matrix of type '<class 'numpy.float64'>'
	with 252820 stored elements in Compressed Sparse Row format>