In [1]:
import requests
import json
import pprint

import numpy as np
import pandas as pd
import networkx as nx

from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import cosine_distances
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
df = pd.read_csv("deliverytime.csv", index_col="ID")

features = [
    'Delivery_person_Age',
    'Delivery_person_Ratings',
    'Restaurant_latitude',
    'Restaurant_longitude',
    'Delivery_location_latitude',
    'Delivery_location_longitude'
]

In [3]:
# Select only the relevant features
df_features = df[features]
df_features


Unnamed: 0_level_0,Delivery_person_Age,Delivery_person_Ratings,Restaurant_latitude,Restaurant_longitude,Delivery_location_latitude,Delivery_location_longitude
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4607,37,4.9,22.745049,75.892471,22.765049,75.912471
B379,34,4.5,12.913041,77.683237,13.043041,77.813237
5D6D,23,4.4,12.914264,77.678400,12.924264,77.688400
7A6A,38,4.7,11.003669,76.976494,11.053669,77.026494
70A2,32,4.6,12.972793,80.249982,13.012793,80.289982
...,...,...,...,...,...,...
7C09,30,4.8,26.902328,75.794257,26.912328,75.804257
D641,21,4.6,0.000000,0.000000,0.070000,0.070000
4F8D,30,4.9,13.022394,80.242439,13.052394,80.272439
5EEE,20,4.7,11.001753,76.986241,11.041753,77.026241


In [4]:
similarity_matrix = cosine_similarity(df_features)
similarity_matrix

array([[1.        , 0.99256067, 0.9859578 , ..., 0.99020823, 0.98001371,
        0.99035019],
       [0.99256067, 1.        , 0.99574118, ..., 0.99915509, 0.99300657,
        0.98859472],
       [0.9859578 , 0.99574118, 1.        , ..., 0.9986638 , 0.99946466,
        0.9946971 ],
       ...,
       [0.99020823, 0.99915509, 0.9986638 , ..., 1.        , 0.99699607,
        0.99192718],
       [0.98001371, 0.99300657, 0.99946466, ..., 0.99699607, 1.        ,
        0.99241998],
       [0.99035019, 0.98859472, 0.9946971 , ..., 0.99192718, 0.99241998,
        1.        ]])

In [5]:
similarity_df = pd.DataFrame(similarity_matrix, index=df.index, columns=df.index)
similarity_df

ID,4607,B379,5D6D,7A6A,70A2,9BB4,95B4,9EB2,1102,CDCD,...,5193,A333,C9AB,4E+21,1178,7C09,D641,4F8D,5EEE,5FB2
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4607,1.000000,0.992561,0.985958,0.989977,0.991068,0.989443,0.997860,0.992239,0.989469,0.996813,...,0.992238,0.992322,0.996951,0.990249,0.998634,0.997002,0.317595,0.990208,0.980014,0.990350
B379,0.992561,1.000000,0.995741,0.999089,0.999685,0.993179,0.994431,0.999911,0.993141,0.980763,...,0.999773,0.999873,0.987868,0.998768,0.994888,0.985052,0.295556,0.999155,0.993007,0.988595
5D6D,0.985958,0.995741,1.000000,0.991475,0.997701,0.998515,0.993536,0.994511,0.998484,0.976027,...,0.993631,0.994211,0.987908,0.999073,0.992618,0.983875,0.208282,0.998664,0.999465,0.994697
7A6A,0.989977,0.999089,0.991475,1.000000,0.997979,0.987325,0.990015,0.999558,0.987273,0.975776,...,0.999730,0.999616,0.982008,0.996032,0.991017,0.979234,0.329770,0.996850,0.988372,0.981391
70A2,0.991068,0.999685,0.997701,0.997979,1.000000,0.995180,0.994557,0.999313,0.995141,0.979411,...,0.998978,0.999203,0.988014,0.999665,0.994650,0.984774,0.272461,0.999865,0.995652,0.990578
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7C09,0.997002,0.985052,0.983875,0.979234,0.984774,0.990719,0.997477,0.983770,0.990776,0.998879,...,0.983266,0.983706,0.999666,0.985712,0.997357,1.000000,0.260464,0.984894,0.978229,0.994529
D641,0.317595,0.295556,0.208282,0.329770,0.272461,0.197098,0.259250,0.307376,0.197011,0.293934,...,0.315185,0.309999,0.247520,0.249875,0.272467,0.260464,1.000000,0.257756,0.186114,0.187413
4F8D,0.990208,0.999155,0.998664,0.996850,0.999865,0.996373,0.994673,0.998583,0.996336,0.978905,...,0.998122,0.998427,0.988317,0.999947,0.994513,0.984894,0.257756,1.000000,0.996996,0.991927
5EEE,0.980014,0.993007,0.999465,0.988372,0.995652,0.997097,0.989482,0.991591,0.997056,0.968876,...,0.990539,0.991213,0.983009,0.997496,0.988206,0.978229,0.186114,0.996996,1.000000,0.992420


In [6]:

from sklearn.metrics import DistanceMetric
from sklearn.metrics.pairwise import cosine_distances

In [9]:
def get_top_similar_items(item_id, n=10):
    return similarity_df[item_id].nlargest(n + 1)[1:]

sample_id = 1
top_similar_items = get_top_similar_items("4607", n=10)

print(f"Top 10 similar delivery times to ID {4607}:")
print(top_similar_items)


Top 10 similar delivery times to ID 4607:
ID
35C0     1.0
1FDF     1.0
7AAC     1.0
A58      1.0
4142     1.0
B49B     1.0
1D57     1.0
6496     1.0
A868     1.0
5E+09    1.0
Name: 4607, dtype: float64
