In [147]:
import numpy as np
import pandas as pd
import geopy.distance
from sklearn import preprocessing

In [3]:
from scavenger import *

In [74]:
query = (Images
         .select(Images.id, Images.street, Locations.dis_lat, Locations.dis_long)
         .join(Locations, on=(Images.street == Locations.street))
         .where(Images.street!="" and Images.usable==1)
         .limit(20)
         .dicts())

In [75]:
df = pd.DataFrame(query, columns =['id','street', 'dis_lat', 'dis_long'])

In [76]:
df.head()

Unnamed: 0,id,street,dis_lat,dis_long
0,0,Muntelplein,51.69426,5.30766
1,4,Citadellaan,51.69688,5.30403
2,11,Brugplein,51.69439,5.29895
3,14,Uilenburg,51.6876,5.29959
4,17,Hinthamerstraat,51.68902,5.3055


In [77]:
df.shape

(20, 4)

In [27]:
coords_1 = (52.2296756, 21.0122287)
coords_2 = (52.406374, 16.9251681)

print(geopy.distance.geodesic(coords_1, coords_2).km)

279.35290160430094


In [81]:
def suggestions(df, coords, nr_of_suggestions, idx):
    distances = []
    for row in df.values:
        distances.append((geopy.distance.geodesic(coords, (row[2], row[3])).km, row[0]))
    if idx in [index for distance, index in sorted(distances)[:nr_of_suggestions]]:
        suggestions = [index for distance, index in sorted(distances)[:nr_of_suggestions+1]]
        suggestions.remove(idx)
        return suggestions
    else: return [index for distance, index in sorted(distances)[:nr_of_suggestions]]

In [83]:
df['suggestions'] = df.apply(lambda x: suggestions(df, (x['dis_lat'], x['dis_long']), 5, x['id']), axis=1)

In [84]:
df.head()

Unnamed: 0,id,street,dis_lat,dis_long,suggestions
0,0,Muntelplein,51.69426,5.30766,"[18, 24, 4, 40, 38]"
1,4,Citadellaan,51.69688,5.30403,"[40, 0, 11, 18, 150]"
2,11,Brugplein,51.69439,5.29895,"[150, 4, 40, 152, 0]"
3,14,Uilenburg,51.6876,5.29959,"[153, 152, 17, 41, 20]"
4,17,Hinthamerstraat,51.68902,5.3055,"[41, 20, 151, 38, 44]"


In [87]:
df.drop('suggestions', axis=1);

In [88]:
def compare_coordinates(df, coords):
    distances = {}
    for row in df.values:
        distances[row[0]] = geopy.distance.geodesic(coords, (row[2], row[3])).km
    return distances

In [89]:
df['weights'] = df.apply(lambda x: compare_coordinates(df, (x['dis_lat'], x['dis_long'])), axis=1)

In [91]:
df.head()

Unnamed: 0,id,street,dis_lat,dis_long,suggestions,weights
0,0,Muntelplein,51.69426,5.30766,"[18, 24, 4, 40, 38]","{0: 0.0, 4: 0.38466957884739417, 11: 0.6024248..."
1,4,Citadellaan,51.69688,5.30403,"[40, 0, 11, 18, 150]","{0: 0.38466957884739417, 4: 0.0, 11: 0.4473536..."
2,11,Brugplein,51.69439,5.29895,"[150, 4, 40, 152, 0]","{0: 0.60242488764498, 4: 0.44735360247362216, ..."
3,14,Uilenburg,51.6876,5.29959,"[153, 152, 17, 41, 20]","{0: 0.9276271167076866, 4: 1.077185939451358, ..."
4,17,Hinthamerstraat,51.68902,5.3055,"[41, 20, 151, 38, 44]","{0: 0.6018386606348527, 4: 0.8804028775358267,..."


In [269]:
# KLOPT NIET, uitzoeken wat er fout ging met normalizeren oid!!

def suggestions(df, image_id, nr_suggestions):
    image_ids = [im_id for im_id, distance in df.loc[df['id']==image_id, 'weights'].values[0].items()]
    weights = [distance for im_id, distance in df.loc[df['id']==image_id, 'weights'].values[0].items()]
    weights_norm = [1 - ((val-min(weights)) / (max(weights)-min(weights))) for val in weights]
    sum_weights = sum(weights_norm)
    weights = [nr/sum_weights for nr in weights_norm]
    suggestions = []
    for time in range(nr_suggestions):
        suggestions.append(np.random.choice(image_ids, replace=False, p=weights))
    return weights

In [270]:
suggestions(df, 0, 5)

[0.0,
 0.0200733497091404,
 0.03143655258474364,
 0.04840669638901217,
 0.031405961291773477,
 0.01423097143063126,
 0.036859277766552366,
 0.019711594093132754,
 0.056097051402896954,
 0.16927401756022814,
 0.026149408149484025,
 0.0200733497091404,
 0.031405961291773477,
 0.044070838672928235,
 0.11576083287219267,
 0.16927401756022814,
 0.03936380567657919,
 0.036859277766552366,
 0.041050955466969206,
 0.048496080606040985]

In [212]:
df.loc[df['id']==0, 'weights'].values[0]

{0: 0.0,
 4: 0.38466957884739417,
 11: 0.60242488764498,
 14: 0.9276271167076866,
 17: 0.6018386606348527,
 18: 0.272710925985489,
 20: 0.7063416450430579,
 24: 0.3777371842808821,
 28: 1.0749989140032306,
 37: 3.243831547210551,
 38: 0.5011062909640002,
 40: 0.38466957884739417,
 41: 0.6018386606348527,
 44: 0.8445382159633944,
 45: 2.218347783165126,
 47: 3.243831547210551,
 150: 0.7543364097595252,
 151: 0.7063416450430579,
 152: 0.7866675955718403,
 153: 0.9293400041738189}

In [274]:
image_ids = [im_id for im_id, distance in df.loc[df['id']==0, 'weights'].values[0].items()]
weights = [distance for im_id, distance in df.loc[df['id']==0, 'weights'].values[0].items()]
weights_norm = [((val-min(weights)) / (max(weights)-min(weights))) for val in weights]
sum_weights = sum(weights_norm)
probs = [nr/sum_weights for nr in weights_norm]

In [275]:
probs

[0.0,
 0.0200733497091404,
 0.03143655258474364,
 0.04840669638901217,
 0.031405961291773477,
 0.01423097143063126,
 0.036859277766552366,
 0.019711594093132754,
 0.056097051402896954,
 0.16927401756022814,
 0.026149408149484025,
 0.0200733497091404,
 0.031405961291773477,
 0.044070838672928235,
 0.11576083287219267,
 0.16927401756022814,
 0.03936380567657919,
 0.036859277766552366,
 0.041050955466969206,
 0.048496080606040985]

In [280]:
sum_weights_2 = sum(weights)
probs2 = [nr/sum_weights_2 for nr in weights]