In [371]:
import numpy as np
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")


# Exploratory Analysis & PreProcessing

In [372]:
df_train = pd.read_csv('data.csv')

In [373]:
df_train.shape # Shape of our training set

(10000, 3)

In [374]:
df_train.head()

Unnamed: 0,address,latitude,longitude
0,"JAIPUR,H.NO.- 408, LAVENDER MANGALAM, ANAND NA...",71.052021,11.396546
1,"C-341-A,Malviya nagar,302017",70.516921,9.85515
2,"J-3A, Khandal Hostel, Jhalana Doongri,,Jaipur,...",70.430068,9.222819
3,846 Rani Sati Nagar Janpath Lane No. 10 Aj...,71.333194,8.762032
4,"12/132 , sector 12,girdhar marg, malviya nagar...",70.49008,10.209948


In [375]:
df_train['Comma_delimitted_address']=df_train['address'].str.split(',') #Comma delimitting string

In [376]:
df_train['zip']=df_train['Comma_delimitted_address']

In [377]:
for n in range(10000):
    df_train['zip'][n]=df_train['Comma_delimitted_address'][n][-1] #Extracting Zip Codes from address
    df_train['address'].iloc[n]=df_train['address'].iloc[n].lower() #Converting address to lowercase

In [142]:
df=df_train.groupby('zip').describe()
df ## Simple exploratory analysis giving us insights about various zipcodes associated with our addresses 

Unnamed: 0_level_0,latitude,latitude,latitude,latitude,latitude,latitude,latitude,latitude,longitude,longitude,longitude,longitude,longitude,longitude,longitude,longitude
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
zip,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
302001,227.0,70.563646,0.169794,70.14506,70.457821,70.50882,70.678291,71.278342,227.0,7.499307,0.453884,5.733055,7.335641,7.500807,7.627125,10.90016
302002,200.0,70.244976,0.116888,70.04176,70.156119,70.262491,70.321249,71.077559,200.0,6.794372,0.326928,5.963971,6.638705,6.793148,6.94277,8.713631
302003,136.0,70.244915,0.111343,70.014251,70.138482,70.247168,70.323494,70.868688,136.0,7.535019,0.204884,7.259255,7.409824,7.488543,7.550351,8.497583
302004,576.0,70.243602,0.08402,70.067955,70.184189,70.238455,70.292628,70.724412,576.0,8.368404,0.268464,7.666205,8.187441,8.356049,8.5251,9.321617
302005,51.0,70.705687,0.037339,70.615241,70.680351,70.705253,70.733307,70.785151,51.0,8.491171,0.204339,7.344134,8.413764,8.506746,8.582362,8.903108
302006,326.0,70.889371,0.116174,70.635735,70.806431,70.87013,70.966213,71.530073,326.0,8.053026,0.376979,7.147274,7.851548,8.066003,8.333253,8.907539
302007,2.0,70.591596,0.451391,70.272414,70.432005,70.591596,70.751187,70.910777,2.0,8.097628,0.132911,8.003646,8.050637,8.097628,8.144619,8.19161
302011,7.0,70.660223,0.321424,70.038364,70.595341,70.742748,70.791978,71.065814,7.0,11.02241,0.364164,10.500514,10.759103,11.160572,11.283139,11.411296
302012,959.0,71.369186,0.217888,70.756466,71.2332,71.337373,71.43541,72.515009,959.0,6.497777,0.542624,3.800507,6.155461,6.591599,6.820019,8.119502
302013,189.0,70.980195,0.142525,70.649209,70.908318,70.985594,71.075931,71.339939,189.0,5.09291,0.448873,3.952547,4.819912,5.136315,5.413151,6.488565


# Tokenizing the addresses

In [378]:
df_train['tokens'] = df_train['Comma_delimitted_address']
import nltk
for n in range(10000):
    tokens = nltk.word_tokenize(df_train['address'][n])
    exceptions = [',','jaipur','rajasthan' ,'near', 'road', 'apartment', 'adjacent','sector']
    #^ Customized stopwords that will have high colinearity with a wide range of addresses 
    #These words if not removed can skew Jaccard similarity between two unrelated addresses significantly
    #My exceptions/stopwords list by no means is exhaustive its just an attempt to showcase what needs to be done
    df_train['tokens'][n]=set(tokens).difference(exceptions)
    #Removing our custom set of stopwords

In [476]:
df_train.head()

Unnamed: 0,address,latitude,longitude,Comma_delimitted_address,zip,tokens,similarity,distance
0,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.052021,11.396546,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1.0,0.0
1,"c-341-a,malviya nagar,302017",70.516921,9.85515,"[C-341-A, Malviya nagar, 302017]",302017,"{nagar,302017, malviya, c-341-a}",0.0,1.63164
2,"j-3a, khandal hostel, jhalana doongri,,jaipur,...",70.430068,9.222819,"[J-3A, Khandal Hostel, Jhalana Doongri, , Ja...",302004,"{j-3a, khandal, jhalana, hostel, ,jaipur,30200...",0.0,2.26095
3,846 rani sati nagar janpath lane no. 10 aj...,71.333194,8.762032,[846 Rani Sati Nagar Janpath Lane No. 10 A...,302019,"{sati, india,302019,302019, ajmer, nagar, 10, ...",0.0416667,2.64948
4,"12/132 , sector 12,girdhar marg, malviya nagar...",70.49008,10.209948,"[12/132 , sector 12, girdhar marg, malviya n...",302017,"{girdhar, 12/132, 12, marg, malviya, nagar,302...",0.0,1.31293


# Calculating Jaccard Score within the same zipcode subgroup (Test Case used here is Zip Code 302029)

** Jaccard Score is a way to measure the similarity of two strings, its can be summarized as the  Intersection over Union of the sets at play **


** Higher score corresponds to higher similarity within two strings**

In [454]:
df_temp= df_train[df_train['zip']=='302029'] #Test Case of Sub Group ZipCode = 302029
df_temp['similarity']= df_temp['zip']

In [438]:
for i in range(len(df_temp)): #Calculating Jaccard Scores for element '0' w.r.t. other elements with the same zipcode
    intersection = df_temp['tokens'].iloc[0].intersection(df_temp['tokens'].iloc[i])
    jaccard_score= float(len(intersection))/((len(df_temp.tokens.iloc[0])) + len(df_temp.tokens.iloc[i]) - len(intersection))
    print(jaccard_score)
    df_temp['similarity'].iloc[i] = jaccard_score

1.0
0.08333333333333333
0.125
0.1
0.047619047619047616
0.05555555555555555
0.0
0.09090909090909091
0.0
0.038461538461538464
0.043478260869565216
0.043478260869565216
0.045454545454545456
0.07142857142857142
0.043478260869565216
0.08
0.0
0.043478260869565216
0.0
0.08
0.05263157894736842
0.08
0.0
0.047619047619047616
0.041666666666666664
0.04
0.043478260869565216
0.09523809523809523
0.0
0.09523809523809523
0.07692307692307693
0.09523809523809523
0.0
0.03571428571428571
0.0
0.0
0.09090909090909091
0.08
0.05
0.05
0.04
0.038461538461538464
0.09090909090909091
0.08695652173913043
0.14814814814814814
0.09090909090909091
0.08
0.045454545454545456
0.043478260869565216
0.0
0.0
0.08695652173913043
0.09523809523809523
0.08695652173913043
0.0
0.0
0.05555555555555555
0.13636363636363635
0.0
0.125
0.0
0.0
0.13043478260869565
0.08695652173913043
0.0
0.0
0.15789473684210525
0.04
0.058823529411764705
0.058823529411764705
0.047619047619047616
0.05
0.13043478260869565
0.0
0.13333333333333333
0.125
0.05
0.

In [441]:
df_temp.sort_values(by='similarity', ascending=False)

Unnamed: 0,address,latitude,longitude,Comma_delimitted_address,zip,tokens,similarity
0,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.052021,11.396546,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1
5201,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.051969,11.398205,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1
7369,"rose g-8 , mangalam aananda, opposite sanganer...",71.058382,11.359776,"[Rose G-8 , mangalam Aananda, opposite sanga...",302029,"{mangalam, railway, station, aananda, jaipur,3...",0.210526
7974,"rose g-8 , mangalam aananda, opposite sanganer...",71.062842,11.351647,"[Rose G-8 , mangalam Aananda, opposite sanga...",302029,"{mangalam, railway, station, aananda, jaipur,3...",0.210526
8244,"41 sunder nagar sanganer railway station, ke s...",70.999621,11.356692,"[41 sunder nagar sanganer railway station, ke...",302029,"{railway, station, samne, jaipur,302029, nagar...",0.2
5702,"flat 306, block iris, mangalam ananda,near san...",71.045985,11.359348,"[Flat 306, block iris, mangalam ananda, Near...",302029,"{mangalam, railway, station, block, station,30...",0.190476
2250,"bright cotton b4, khatri nagar, near sanganer ...",70.892179,10.845294,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818
8660,"bright cotton b4, khatri nagar, near sanganer ...",70.891893,10.846765,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818
6491,"bright cotton b4, khatri nagar, near sanganer ...",70.892897,10.848949,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818
4041,"bright cotton b4, khatri nagar, near sanganer ...",70.892786,10.848944,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818


# Calculating Jaccard Score for test case element '0' w.r.t. to all other addresses , and calculating relative distance from each of these points 

**Distance metric is calculate here assuming long/lat cordinates to be analogous to cordintaes in a Euclidean Space. This is done only because these lat/longitudes are encoded and dont refelect true positions on the globe so a Euclidean system should sufficiently capture the crux of the relationship (Nearness/Furtherness)**

In [469]:
df_copy=df_train
df_copy['similarity'] = df_copy['zip']
df_copy['distance'] = df_copy['zip']
import math  
for i in range(len(df_copy)):
    intersection = df_copy['tokens'].iloc[0].intersection(df_copy['tokens'].iloc[i])
    jaccard_score= float(len(intersection))/((len(df_copy.tokens.iloc[0])) + len(df_copy.tokens.iloc[i]) - len(intersection))
    print(jaccard_score)
    df_copy['similarity'].iloc[i] = jaccard_score 
    dist = math.sqrt((df_copy['latitude'].iloc[i] - df_copy['latitude'].iloc[0])**2 + (df_copy['longitude'].iloc[i] - df_copy['longitude'].iloc[0])**2)  
    df_copy['distance'].iloc[i] = dist  
    print(dist)

1.0
0.0
0.0
1.6316357220225368
0.0
2.2609542957529016
0.041666666666666664
2.6494759884073007
0.0
1.3129331623229332
0.0
2.2713669660142672
0.0
3.0775990106425137
0.05
2.87091040072285
0.05263157894736842
1.0719882578870683
0.037037037037037035
2.9039394064999913
0.0
4.898744073732788
0.05263157894736842
2.402313851113185
0.0
3.6353453438357963
0.0
0.8228131454042017
0.0
3.423675408728098
0.05
2.8662097673432565
0.0
4.647273381707964
0.0
1.784115985202047
0.041666666666666664
4.770033211803288
0.038461538461538464
4.977555688509073
0.0
5.37446448227521
0.045454545454545456
2.479421520469469
0.05
2.829759334215084
0.05263157894736842
2.682935001041983
0.0
1.083855372726804
0.08333333333333333
0.4067356001109065
0.05263157894736842
1.663428584485601
0.0
2.273984068319394
0.0
4.395469143586393
0.125
0.14945003582346594
0.0
2.687230367674802
0.0
4.731500200936997
0.1
0.7814959490221073
0.0
3.276397703623275
0.05
4.631527660164454
0.0
1.5134835646698839
0.0
1.810011254470001
0.0
2.587205751

2.7645554885549366
0.03571428571428571
2.9902520679590987
0.058823529411764705
1.1470733334865781
0.05
3.4074738681726524
0.05
2.945757775192184
0.0
5.853738397767087
0.0
3.6453176617794165
0.05
1.835005619496255
0.1111111111111111
0.8246206189146724
0.04
2.2116119586726484
0.043478260869565216
5.758008720707767
0.0
1.5623572138129145
0.047619047619047616
2.3156675660836403
0.0
1.2128837955278693
0.0
1.4750445191655217
0.0
0.4282462393493436
0.0
2.019284673722534
0.0
1.424023934159734
0.0
1.9410537765625906
0.0
5.1925968003361245
0.043478260869565216
1.2024816207871716
0.043478260869565216
1.5875283425711737
0.0
5.046820580841677
0.043478260869565216
2.5388027774135464
0.0
8.171161787351387
0.0
4.628165527091724
0.0
4.198630679577793
0.0
4.229060727295409
0.0
1.7159621363008857
0.0
3.3721802398950524
0.0
3.3178063727169476
0.0
1.2638672036669394
0.0
1.2346157456702247
0.0
2.387917729339517
0.0
1.25736173407351
0.08695652173913043
0.5094796564743497
0.04
3.2228829442239513
0.0
1.8667130

4.7509687284781155
0.0
2.737151822932992
0.0
3.536848499337535
0.05555555555555555
2.94893599591406
0.0
5.794339378434365
0.0
1.8247104597009662
0.058823529411764705
1.4206540224453676
0.03225806451612903
2.958163927034468
0.0
1.1980683894858328
0.045454545454545456
1.6112128881203052
0.0
5.0956116016510276
0.0
4.451861480831533
0.05
2.7767935988511474
0.0
2.1328674662729
0.0
0.8775054419713382
0.0
4.111149904191992
0.0
2.9923862714460188
0.05555555555555555
3.2787574214313224
0.0
1.4908736432719727
0.045454545454545456
4.7331491848296094
0.0
1.408821730357162
0.0
3.391181230332378
0.047619047619047616
2.3533484296665557
0.0
0.9342062352013649
0.0
1.9006260418121286
0.0
2.3447125688678043
0.05263157894736842
5.807580668030263
0.03571428571428571
0.2844752715216197
0.038461538461538464
1.1978115107992655
0.0
6.574909848944231
0.0
0.6519217730617604
0.047619047619047616
2.373450702779189
0.0
1.9621668641630199
0.05
4.752094770491967
0.0
5.625353794215211
0.0
6.031930466473267
0.0
4.80995

0.0
1.8354092882531154
0.0
0.8255898094892161
0.0
0.9179598124804782
0.05555555555555555
0.23595916742544976
0.0
3.326640099572147
0.043478260869565216
2.904844840873593
0.0
6.072037231943895
0.045454545454545456
3.3892848194246117
0.0
3.0610102364552474
0.0
3.3828545551034206
0.0
1.1449987639977426
0.041666666666666664
4.892720314994553
0.0
1.5224744177014014
0.045454545454545456
3.4932903765760432
0.05
3.2740289415674595
0.0
1.8933759008037139
0.058823529411764705
5.83106766032991
0.0
1.076542315175355
0.045454545454545456
3.078047816336748
0.043478260869565216
2.3759103750959065
0.0
2.6770258249652987
0.0
4.700092322222136
0.13636363636363635
0.038643438044791645
0.0
3.2817999236464424
0.043478260869565216
2.4306425064022426
0.04
3.893980046702176
0.047619047619047616
1.2597755481556314
0.0
3.2838156537836865
0.0
5.016128759718983
0.07142857142857142
2.1759413018974043
0.05
2.8671043767753663
0.0
6.004089504949206
0.0
3.1046538760672084
0.0
1.5181811908144394
0.0
2.8418021625488734


1.074018511426308
0.05263157894736842
0.2799057276779693
0.0
6.839338435012769
0.0
5.279720602519445
0.05263157894736842
2.5191091915322996
0.0
3.5600971798248353
0.0
2.131505216425814
0.0
4.974774310882543
0.045454545454545456
5.538625249873805
0.05555555555555555
4.535902104050072
0.043478260869565216
3.4985445860082103
0.045454545454545456
4.704977760133471
0.045454545454545456
3.638756428681947
0.058823529411764705
2.6949935614421903
0.1111111111111111
1.1803826419060903
0.08
3.0006855078137
0.08
0.24062070582672077
0.0
4.923448691702106
0.047619047619047616
1.161275911865659
0.0
4.1974797591846915
0.041666666666666664
6.972146433797608
0.0
2.67600808978237
0.05555555555555555
1.6081072725954688
0.045454545454545456
3.0885757800450655
0.043478260869565216
5.746160297821411
0.0
0.7063461169718548
0.0
2.4473116947915416
0.0
5.027694979816236
0.0
3.0559511555479
0.0
2.5519812931914205
0.05263157894736842
4.790187714914187
0.0
1.5483420110683543
0.0
3.5971636485922964
0.043478260869565

1.3397114262916432
0.05263157894736842
2.5751477098340523
0.0
1.1896781940700696
0.0
3.2983938087850215
0.0
5.156939626707988
0.0
4.692811523289572
0.0
5.473781839549856
0.047619047619047616
3.4616122058976178
0.0
2.6430001199758872
0.0
2.5695818551809353
0.0
1.1784651228727614
0.03125
1.0684238686896173
0.043478260869565216
0.8727145138019149
0.0
3.0440964101166506
0.05
4.0952152620046895
0.08
0.2417789302402427
0.0
0.9104943789214386
0.0
3.0300609197972572
0.047619047619047616
1.2703154157319652
0.05263157894736842
3.2024702811716983
0.0
2.8250322619992874
0.0
2.695174893902039
0.0
4.916078132886236
0.043478260869565216
2.2511125924227287
0.0
3.5828058389989503
0.0
0.2805019993756642
0.05
0.522010569996199
0.0
2.9864874660556984
0.0
1.0549557150099471
0.0
5.348704614325745
0.034482758620689655
5.522122498795262
0.05
2.230032531160954
0.0
1.4117856290083344
0.0
3.048362567835733
0.0
2.1474586520821424
0.0
6.2026328478625015
0.0
1.474694598861467
0.0
3.812096401302138
0.041666666666666

0.0
5.8720434607615015
0.0
5.010595925201338
0.05263157894736842
0.27803712651273216
0.09090909090909091
0.2801696869127418
0.0
4.815251206359385
0.07142857142857142
1.6548128815059249
0.04
6.15354064780505
0.0
3.491594144373315
0.05263157894736842
2.901356360302057
0.0
2.360315904598858
0.05263157894736842
5.217919424955719
0.0
4.899592995068553
0.0
3.276521462415726
0.041666666666666664
3.196760630044956
0.0
1.0502331426176217
0.047619047619047616
2.7045263391043664
0.0
3.544458482289388
0.0
2.0569125565436646
0.045454545454545456
4.172717778551759
0.0
5.207424164709581
0.05
3.013800692399118
0.0
2.9605652942131684
0.0
0.28632373071556977
0.0
3.296672287082276
0.0
2.1965406472455125
0.043478260869565216
3.1006027784717904
0.0
1.579970768102088
0.0
6.280573981918889
0.0
1.491619871518785
0.05555555555555555
4.653856268113082
0.0
3.8601789201480905
0.0
2.940019056276537
0.0
3.085545840541323
0.058823529411764705
1.3871847360337435
0.05263157894736842
1.6221019687720373
0.04545454545454

0.0
5.122621160963659
0.034482758620689655
2.270744827286844
0.0
1.511780888825634
0.04
3.0864803093583753
0.0
2.4757613032781514
0.0
1.5738444657637438
0.038461538461538464
5.070205941578042
0.041666666666666664
3.2563884756095294
0.0
3.0082635458957254
0.0
1.7898380151203794
0.05
3.9760006268799764
0.09523809523809523
0.8301776596784808
0.0
1.3920138070874315
0.05
2.1893221083432874
0.0
2.45560173996147
0.0
2.642854835748669
0.05
5.212614462790809
0.05263157894736842
3.3113579976712355
0.0
3.740970457904755
0.0
5.090260469498332
0.0
1.5363269575205418
0.0
6.498056143919985
0.0
3.32922969199461
0.05
5.263975861315724
0.0
1.1740673735605622
0.0
1.7590642418192344
0.0
5.193899108722431
0.0
5.359300618981853
0.0
4.017782966299047
0.0
2.1267355385811437
0.0
6.804036209369954
0.0
6.241190364157443
0.0
3.083198525063967
0.0
5.23076117939885
0.05
1.0909937205381495
0.0
4.70849569499264
0.038461538461538464
2.6088181277737896
0.0
1.6081578806306698
0.0
2.4967902352393483
0.0
3.941927760571263

1.3338200325428666
0.0
2.8884265065028196
0.0
1.1632596566653195
0.047619047619047616
2.3978809178823104
0.0
2.2215790461348064
0.0
3.4969470981165576
0.0
2.8879371139874648
0.04
4.63504992686989
0.0
1.2726083328497075
0.0
1.3059830506652133
0.0
1.523167566123177
0.0
5.234485265710017
0.047619047619047616
1.7277986546105413
0.09523809523809523
0.9737870780618798
0.0
0.7953111814795266
0.041666666666666664
3.946191292647805
0.05263157894736842
4.623698502787858
0.09090909090909091
2.845374848610867
0.041666666666666664
6.031540529109459
0.0
2.082275497943415
0.0
3.2814654306038475
0.037037037037037035
2.3766281012631345
0.0
3.0622315424498434
0.0
3.0704769370644738
0.0
4.788939897083281
0.0
1.1090059020022192
0.05
1.8843862332884018
0.0
2.181737785556781
0.0
2.455553334197106
0.0
3.4180454814419865
0.0
3.303299818531544
0.0
4.239951743238342
0.0
1.1152877303890738
0.058823529411764705
1.4162141251445581
0.0
1.2385562728161326
0.0
4.486983776177551
0.0
1.1580132065783506
0.0
3.1366391675

0.043478260869565216
1.3234711227713711
0.05
5.8141739236104435
0.0
1.3495977018274903
0.0
4.175587443382111
0.0
3.8366046396077773
0.0
3.0998438060730225
0.0
4.070018032557825
0.043478260869565216
0.7451104120889189
0.0
2.8192861291745683
0.05
3.9422225447576738
0.0
3.1707036044209795
0.0
1.9379945287545624
0.0
1.9765719820642307
0.05263157894736842
1.4431912754338485
0.0
0.3113475832960198
0.0
1.5712359600112538
0.0
3.1005208576815475
0.0
2.6534859345342054
0.05555555555555555
2.727401410451325
0.0
3.906154657529387
0.0
4.814167418470586
0.0
1.1034634054477688
0.041666666666666664
3.913684798113566
0.05
5.067870874787681
0.041666666666666664
4.225565175373282
0.058823529411764705
2.6689704166144317
0.0
1.2574387246059295
0.05
3.005485023524368
0.0
4.068090952628534
0.047619047619047616
2.2078185027350146
0.0
2.0995978872755274
0.0
3.379618817712671
0.0
3.223454412134324
0.0
2.4272454654773106
0.0
1.0304678162610665
0.0
2.132609614949607
0.045454545454545456
2.5145878962662773
0.0
1.2

0.038461538461538464
0.7852925961693629
0.0
2.857252285661826
0.0
5.8509784093456165
0.125
0.15033624490386316
0.047619047619047616
1.3603825397464342
0.0
1.6405283680371334
0.05
2.413665054656252
0.05
4.69712359749979
0.0
4.713287347033514
0.05263157894736842
0.684527195734253
0.0
1.8170962584687864
0.0
1.0996893350217491
0.0
3.934290457208292
0.0
2.1136967412157026
0.0
2.893043721178424
0.0
1.4445228341138519
0.0
5.313221596345222
0.0
2.6905382662245545
0.04
3.3119876434990663
0.0
5.2680767507091195
0.05263157894736842
2.581172722011864
0.0
3.406300447312157
0.058823529411764705
2.971653557385637
0.0
1.3361584339219605
0.0
4.23025356754072
0.045454545454545456
2.4959500022781627
0.0
1.5267191235296467
0.0
4.3421898325976445
0.0
0.961939622503765
0.047619047619047616
2.6705786031052754
0.03571428571428571
2.867188688557418
0.08333333333333333
1.1890327603855118
0.0
1.125873237334107
0.0
2.572158820076632
0.0
4.196671729648662
0.0
3.1188036279096236
0.10526315789473684
1.16740813621756

4.620627761793071
0.05263157894736842
1.6631683762969172
0.04
5.219895185746228
0.0
4.918587324754267
0.0
3.0671067310274136
0.05
5.072756856236094
0.0
5.257479874582442
0.0
2.7660397143500908
0.045454545454545456
4.8430342845608045
0.05
5.089924617720232
0.05263157894736842
1.4377035472186812
0.045454545454545456
3.2129400490930045
0.04
4.794125952314123
0.0
5.21754138469667
0.0
2.0612447298402663
0.0
5.796506705907795
0.0
4.98530677102427
0.0
3.784038959574017
0.05555555555555555
4.756607133758226
0.058823529411764705
2.488797173426321
0.0
3.2916692452403886
0.0
1.1496721214411614
0.045454545454545456
0.829394207881031
0.0
1.5589563902061703
0.0
1.9408568449073482
0.043478260869565216
1.1139960244924532
0.1
3.068186412021188
0.0
1.3487934066052354
0.0
3.022286078391793
0.0
1.4098520864271795
0.0
2.248885681898833
0.0
2.262403520585466
0.0
0.5951663121720647
0.047619047619047616
1.0814710749473466
0.05555555555555555
3.403866791463154
0.0
2.9370625342051566
0.045454545454545456
0.7269

0.04
1.553343353751465
0.05
3.7204803899685857
0.0
3.4011073148678936
0.0
3.071113392157953
0.0
8.139593429343064
0.041666666666666664
1.907233594403295
0.0
4.458959993014206
0.08
0.49562335089900017
0.0
3.81398318385736
0.0
2.080438807324856
0.0
1.7973897234680154
0.0
3.0769053675729263
0.047619047619047616
5.3290498810366485
0.0
2.8854347636843007
0.0
3.533399504429197
0.041666666666666664
5.60184891924848
0.047619047619047616
5.234158501542109
0.0
4.928662131184345
0.05555555555555555
5.7009154233769905
0.0
6.114728220152768
0.0
6.627466955269175
0.043478260869565216
5.822898942160041
0.045454545454545456
1.4891063695534272
0.0
1.1063111991848078
0.0
1.276915742603393
0.0
4.783917752512913
0.0
1.0478022170171424
0.0
2.136698948877208
0.037037037037037035
5.242249682829058
0.045454545454545456
4.5325768783953535
0.041666666666666664
1.7913312037944333
0.04
5.3333048488559305
0.13043478260869565
0.45031063948395356
0.0
1.9367331712676485
0.058823529411764705
1.2918757397001037
0.03846

0.05
2.432612886858542
0.0
3.804202192213915
0.045454545454545456
2.4700123006126797
0.10526315789473684
2.6575373702206506
0.045454545454545456
2.4160645580905347
0.05555555555555555
0.7586135595465325
0.041666666666666664
4.583001812110525
0.058823529411764705
2.3451371562316012
0.0
5.192233972751161
0.045454545454545456
1.434655711500442
0.0
4.456736418616721
0.0
2.562956729245418
0.0
1.5536177899025905
0.0
1.5864540446223845
0.047619047619047616
0.8264568263985169
0.0
4.102818663142045
0.0
5.621278304013925
0.037037037037037035
0.5779223605048962
0.047619047619047616
6.0844840463268905
0.05555555555555555
0.0025712981843593833
0.0
0.28342115839399445
0.0
1.7015831837662652
0.0
1.145178299116148
0.1111111111111111
0.02498588694047722
0.037037037037037035
3.167002135637737
0.058823529411764705
3.6820947100367514
0.0
2.7167656953926795
0.0
1.0513623471548583
0.0
1.647111493574362
0.05
2.6804090299608583
0.0
1.639669901001753
0.0
1.6735562863346947
0.0
5.8871963102967815
0.0
2.01828706

0.05555555555555555
0.7804017254630565
0.0
6.003646437048464
0.047619047619047616
0.7729953926003723
0.04
3.3382243287449076
0.0
4.790377903597446
0.0
3.198640119695187
0.0
4.0690408966346565
0.0
1.694142920101566
0.05555555555555555
2.8291368092498876
0.05
5.604062385618629
0.038461538461538464
1.2391162474927555
0.0
3.101477744811683
0.0
1.9631105764652668
0.05
5.172019221873372
0.0
2.696472382731595
0.0
0.18552394063670147
0.05263157894736842
6.117841926526477
0.0
3.2179104351550447
0.0
3.0548344537294816
0.0
1.5426648214655874
0.0
1.8226240593841023
0.0
1.8304187506623106
0.047619047619047616
3.4150251752434664
0.0
2.920116084548631
0.047619047619047616
4.16968154902874
0.05
3.10048503815243
0.0
5.0242822472166
0.0
3.9277016063803396
0.0
2.2515192357810774
0.0
2.139433294506774
0.0
1.2320300863454592
0.05263157894736842
4.97290573966082
0.0
2.643345203077575
0.045454545454545456
2.0866731844501625
0.05
0.3752453075284029
0.0
3.324108172440705
0.0
2.9917867788270205
0.0
5.7161019989

0.0
1.5620396393915987
0.041666666666666664
2.6414570563513706
0.04
5.33066971425041
0.05
2.8354107676966684
0.0
2.6953365455539218
0.0
3.8369812146992337
0.043478260869565216
2.3725829161004715
0.043478260869565216
5.823759326318348
0.0
4.002478613878398
0.0
1.9571003607037047
0.0
5.7385383684935505
0.0
1.2517400570032333
0.0
1.7106203879543656
0.0
4.724840176226108
0.0
3.149278760354235
0.05
4.65814941653103
0.0
1.9499804153526514
0.0
1.2237313633905025
0.0
4.479644166951887
0.045454545454545456
6.019134052941962
0.0
5.869748470717875
0.045454545454545456
6.32053807473735
0.041666666666666664
1.1728635622646586
0.13043478260869565
0.4382236889155693
0.0
4.656076257861956
0.043478260869565216
0.9229858928212463
0.0
2.9012053245907485
0.0
3.2949520654090776
0.0
4.17659008302857
0.0
2.118342801871468
0.0
1.0837093378519438
0.0
4.073800098698009
0.0
0.9138104854324266
0.0
3.8552108010583472
0.0
2.1371259192235845
0.0
4.744292442577286
0.0
3.8095315138556525
0.0
4.646431783617036
0.0
0.76

0.0
2.373584665228595
0.041666666666666664
1.905698142776735
0.0
5.83149821912833
0.0
3.9785116427901612
0.0
4.842959185178449
0.0
3.93262307912738
0.0
4.130543072847568
0.034482758620689655
2.6023722477960103
0.0
2.6173273529549
0.0
6.138716846761921
0.0
2.8148951142054575
0.038461538461538464
5.662574842551093
0.043478260869565216
2.762596381401848
0.047619047619047616
3.3250468115767506
0.05555555555555555
4.641120974511617
0.0
2.066973534604878
0.10526315789473684
2.437759523147608
0.1111111111111111
3.937830512102639
0.041666666666666664
2.436972502225379
0.043478260869565216
3.5497506918203947
0.04
5.33066971425041
0.041666666666666664
0.7210380096696085
0.0
2.863432892758274
0.0
3.7002950652383113
0.05555555555555555
4.9535751622747455
0.0
1.1859301983196149
0.0
2.7358178630112264
0.05555555555555555
5.2113495101776595
0.0
3.071746323674495
0.05555555555555555
1.0851919845567062
0.0
2.5675422517406674
0.047619047619047616
1.5606452313367187
0.0
1.092239241803005
0.0
2.6426859274

1.1577579720129594
0.058823529411764705
3.231813816122872
0.0
1.2957410155953937
0.0
3.0631825783931514
0.0
1.270863647092221
0.0
1.1700176687262762
0.0
3.4284360464199732
0.0
4.913208818353801
0.0
1.8779520341518179
0.0
1.9062301511439206
0.029411764705882353
1.2259826524770254
0.0
1.2508189013986222
0.0
4.60903176264961
0.04
4.974156561172016
0.0
3.078332402536305
0.0
5.340694327585508
0.0
0.9816153438564095
0.0
3.7986172449135136
0.037037037037037035
0.5839481740702177
0.0
0.822032460087321
0.05263157894736842
3.542346631048106
0.0
1.1460259024855017
0.0
2.139967004090858
0.0
2.346175109660916
0.0
2.1155805922588833
0.0
2.6152787112892204
0.041666666666666664
2.6081097336999934
0.05
1.3151335367962838
0.0
2.821739257402306
0.0
2.077372889425083
0.0
3.0335283087656983
0.0
0.9054200530494676
0.0
3.9370700907442515
0.0
2.8262998632677614
0.0
6.459596825751839
0.0
2.570226053886198
0.041666666666666664
0.9014881459255198
0.0
5.260150480690107
0.0
6.453453559604721
0.05263157894736842
2.

0.05
0.3922891454430237
0.047619047619047616
1.342087397215136
0.0
2.794411777856313
0.047619047619047616
4.924158276669528
0.05263157894736842
5.168677974961482
0.0
1.2323026696091193
0.0
2.319793033950018
0.0
5.286703007076216
0.045454545454545456
5.875346002924128
0.0
2.903942516634842
0.0
1.7529041769653861
0.0
1.2035608398115996
0.041666666666666664
6.259603779269122
0.058823529411764705
2.996070220070644
0.0
1.3022974232602929
0.0
5.488541710022166
0.0
3.068406408845142
0.0
1.5202138387135629
0.0
3.9794520006310248
0.0
3.2166908650321893
0.0
6.964686884426988
0.0
1.7736613683468925
0.0
3.406432157362395
0.0
5.947242529571971
0.047619047619047616
4.37050600033167
0.0
4.636463958407454
0.045454545454545456
4.607443318196452
0.0
2.4280758829303144
0.0
3.2793974557935406
0.0
3.6466403319832303
0.0
8.163135767017103
0.0
6.550302222287337
0.0
4.610566647445017
0.043478260869565216
5.820491792047787
0.0
3.1996807995146743
0.09523809523809523
0.2764266133944239
0.0
4.998252046639878
0.11

## Underneath I have sorted the findings using similarity scores , and distance metric 

In [477]:
df_copy.sort_values(by='similarity', ascending=False)

Unnamed: 0,address,latitude,longitude,Comma_delimitted_address,zip,tokens,similarity,distance
0,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.052021,11.396546,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1,0
5201,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.051969,11.398205,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1,0.00165996
7974,"rose g-8 , mangalam aananda, opposite sanganer...",71.062842,11.351647,"[Rose G-8 , mangalam Aananda, opposite sanga...",302029,"{mangalam, railway, station, aananda, jaipur,3...",0.210526,0.0461843
7369,"rose g-8 , mangalam aananda, opposite sanganer...",71.058382,11.359776,"[Rose G-8 , mangalam Aananda, opposite sanga...",302029,"{mangalam, railway, station, aananda, jaipur,3...",0.210526,0.0373161
8244,"41 sunder nagar sanganer railway station, ke s...",70.999621,11.356692,"[41 sunder nagar sanganer railway station, ke...",302029,"{railway, station, samne, jaipur,302029, nagar...",0.2,0.065834
5702,"flat 306, block iris, mangalam ananda,near san...",71.045985,11.359348,"[Flat 306, block iris, mangalam ananda, Near...",302029,"{mangalam, railway, station, block, station,30...",0.190476,0.0376848
6491,"bright cotton b4, khatri nagar, near sanganer ...",70.892897,10.848949,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818,0.570248
8660,"bright cotton b4, khatri nagar, near sanganer ...",70.891893,10.846765,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818,0.572625
4248,"bright cotton b4, khatri nagar, near sanganer ...",70.892586,10.847497,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818,0.57173
4041,"bright cotton b4, khatri nagar, near sanganer ...",70.892786,10.848944,"[Bright Cotton B4, Khatri Nagar, Near Sangan...",302029,"{over, bright, 302029, b4, cotton, nagar, mans...",0.181818,0.570284


In [478]:
df_copy.sort_values(by='distance', ascending=True)

Unnamed: 0,address,latitude,longitude,Comma_delimitted_address,zip,tokens,similarity,distance
0,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.052021,11.396546,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1,0
5201,"jaipur,h.no.- 408, lavender mangalam, anand na...",71.051969,11.398205,"[JAIPUR, H.NO.- 408, LAVENDER MANGALAM, ANAN...",302029,"{mangalam, station, 302029, nagar, india,30202...",1,0.00165996
7971,"lavender 207 mangalam aananda sanganer,mansaro...",71.051058,11.394790,"[lavender 207 Mangalam aananda Sanganer, mansa...",302029,"{lavender, mangalam, aananda, jaipur,302029, 2...",0.166667,0.00200284
6903,"104, lavendar,, mangalam aananda,sanganer,302029",71.053568,11.394492,"[104, Lavendar, , Mangalam Aananda, Sanganer...",302029,"{mangalam, lavendar, 104, aananda, sanganer,30...",0.0555556,0.0025713
2076,"104, lavendar,mangalam's aananda, sanganer,in ...",71.052389,11.403921,"[104, Lavendar, Mangalam's Aananda, Sanganer...",302029,"{front, mangalam, railway, aananda, of, sanagn...",0.130435,0.00738427
7965,"sunflower 410, manglam aananda,near rampura ro...",71.064959,11.394044,"[Sunflower 410, Manglam Aananda, Near Rampura...",302029,"{manglam, aananda, rampura, jaipur,302029, sag...",0,0.0131774
3111,"211 sunflower, manglam aananda city,opposite s...",71.064646,11.392212,"[211 Sunflower, Manglam Aananda City, Opposit...",302029,"{manglam, aananda, city, station,302029, rly, ...",0.0454545,0.0133479
8866,"flat no. 304,sun flower,manglam aananda, near ...",71.065814,11.411296,"[Flat no. 304, Sun flower, Manglam aananda, n...",302011,"{sanganer,302011, manglam, station, sun, aanan...",0.125,0.0201938
6156,"indrajeet singh , apollo pharmacy ,mangalam an...",71.071097,11.406526,"[indrajeet singh , Apollo Pharmacy , mangalam...",302029,"{pharmacy, mangalam, club, no, singh, house, s...",0.04,0.0215289
6907,"g-03, orchid ,mangalam ananda ,road sanganer,n...",71.071004,11.412793,"[G-03, orchid , Mangalam Ananda , road Sangan...",302029,"{mangalam, sanganer, orchid, g-03, n/a,302029,...",0.111111,0.0249859


## Inferences to be drawn from the two tables :

** Jaccord Similarity and Distance between to addresses have a seemingly inverse relationship, which is in line with intuition as the more similar an address is the closer it should be**

**Anomalies in DATA: 
  There are various repetions of same addresses in our database with slight latitude/longitude differences which can be      attributed to GPS noise. We could potentially aggregate our location metrics for these repeating entries **
 
** Paying close attention to the table above element indexed '7965' has a similarity score of '0' but has the 5th lowest distance seperation when rank ordered based on distance, this is counterintuitive as low distance seperation should co-relate with high similarity. 
This can be explained by the fact that element indexed '7965' has tokens that are mispelled versions of tokens from our comparision set (eg. Manglam/Mangalam , Anand/Ananda...). Transforming to mitigate these spelling errors can definitely increase our similarity scores. **

 


# Prediction

In [482]:
print('input address')
address = str(input()) #Input new address

input address
JAIPUR,H.NO.- 408, LAVENDER MANGALAM, ANAND NAGAR SANGANER, RAILWAY STATION, JAIPUR - 302029 ,JAIPUR H O-302029 ,Rajasthan INDIA,302029


In [489]:
address= address.lower()
tokens = nltk.word_tokenize(address)
exceptions = [',','jaipur','rajasthan' ,'near', 'road', 'apartment', 'adjacent','sector']
tokens_final=set(tokens).difference(exceptions)
tokens_final

{'-',
 '302029',
 '408',
 'anand',
 'h',
 'h.no.-',
 'india,302029',
 'lavender',
 'mangalam',
 'nagar',
 'o-302029',
 'railway',
 'sanganer',
 'station'}

In [491]:
for i in range(len(df_copy)):
    intersection = tokens_final.intersection(df_copy['tokens'].iloc[i])
    jaccard_score= float(len(intersection))/((len(df_copy.tokens.iloc[0])) + len(df_copy.tokens.iloc[i]) - len(intersection))
    print(jaccard_score)
    df_copy['similarity'].iloc[i] = jaccard_score

1.0
0.0
0.0
0.041666666666666664
0.0
0.0
0.0
0.05
0.05263157894736842
0.037037037037037035
0.0
0.05263157894736842
0.0
0.0
0.0
0.05
0.0
0.0
0.041666666666666664
0.038461538461538464
0.0
0.045454545454545456
0.05
0.05263157894736842
0.0
0.08333333333333333
0.05263157894736842
0.0
0.0
0.125
0.0
0.0
0.1
0.0
0.05
0.0
0.0
0.0
0.0
0.0
0.09090909090909091
0.047619047619047616
0.0
0.0
0.05555555555555555
0.038461538461538464
0.0
0.0
0.058823529411764705
0.0
0.0
0.0
0.03571428571428571
0.0
0.0
0.0
0.0
0.041666666666666664
0.05555555555555555
0.045454545454545456
0.0
0.0
0.0
0.047619047619047616
0.0
0.0
0.0
0.0
0.04
0.0
0.0
0.0
0.05
0.05263157894736842
0.0
0.0
0.05555555555555555
0.0
0.05555555555555555
0.0
0.0
0.0
0.0
0.0
0.0
0.038461538461538464
0.0
0.08695652173913043
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.043478260869565216
0.043478260869565216
0.0
0.0
0.041666666666666664
0.0
0.0
0.05
0.0
0.0
0.0
0.05
0.0
0.0
0.0
0.0
0.0
0.1111111111111111
0.04545454545454

0.0
0.043478260869565216
0.05
0.05263157894736842
0.0
0.0
0.045454545454545456
0.0
0.0
0.0
0.05555555555555555
0.034482758620689655
0.04
0.045454545454545456
0.0
0.0
0.0
0.038461538461538464
0.0
0.03571428571428571
0.045454545454545456
0.05555555555555555
0.043478260869565216
0.0
0.0
0.0
0.0
0.0
0.03571428571428571
0.0
0.0
0.0
0.0
0.0
0.045454545454545456
0.047619047619047616
0.0
0.0
0.058823529411764705
0.047619047619047616
0.0
0.0
0.0
0.045454545454545456
0.0
0.10526315789473684
0.08
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.038461538461538464
0.05
0.037037037037037035
0.043478260869565216
0.0
0.043478260869565216
0.0
0.15789473684210525
0.0
0.047619047619047616
0.0
0.043478260869565216
0.05555555555555555
0.043478260869565216
0.0
0.047619047619047616
0.0
0.043478260869565216
0.047619047619047616
0.0
0.0
0.0
0.0
0.045454545454545456
0.047619047619047616
0.0
0.05
0.0
0.0
0.0
0.05263157894736842
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.038461538461538464
0.03703703703

0.05
0.0
0.07407407407407407
0.0
0.0
0.0
0.05263157894736842
0.0
0.043478260869565216
0.0
0.05555555555555555
0.0
0.0
0.0
0.09523809523809523
0.045454545454545456
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.045454545454545456
0.03571428571428571
0.0
0.05555555555555555
0.0
0.03571428571428571
0.0
0.0
0.05263157894736842
0.047619047619047616
0.045454545454545456
0.0
0.0
0.0
0.0
0.05
0.045454545454545456
0.08333333333333333
0.0
0.0
0.05
0.05555555555555555
0.038461538461538464
0.05263157894736842
0.0
0.0
0.0
0.0
0.0
0.047619047619047616
0.0
0.0
0.0
0.03125
0.043478260869565216
0.0
0.05
0.08
0.0
0.0
0.047619047619047616
0.05263157894736842
0.0
0.0
0.0
0.043478260869565216
0.0
0.0
0.05
0.0
0.0
0.0
0.034482758620689655
0.05
0.0
0.0
0.0
0.0
0.0
0.0
0.041666666666666664
0.041666666666666664
0.08333333333333333
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.047619047619047616
0.045454545454545456
0.0
0.0
0.05
0.05
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.0
0.05
0.07407407407407407
0.0
0.05
0.0
0.0

0.0
0.0
0.043478260869565216
0.0
0.03225806451612903
0.0
0.047619047619047616
0.05555555555555555
0.047619047619047616
0.08695652173913043
0.0
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.05555555555555555
0.0
0.0
0.08
0.0
0.0
0.0
0.0
0.034482758620689655
0.0
0.04
0.0
0.0
0.038461538461538464
0.041666666666666664
0.0
0.0
0.05
0.09523809523809523
0.0
0.05
0.0
0.0
0.05
0.05263157894736842
0.0
0.0
0.0
0.0
0.0
0.05
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.05
0.0
0.038461538461538464
0.0
0.0
0.0
0.047619047619047616
0.047619047619047616
0.05
0.0
0.0
0.05
0.047619047619047616
0.0
0.0
0.0
0.058823529411764705
0.0
0.0
0.0
0.0
0.0
0.045454545454545456
0.0
0.0
0.0
0.0
0.043478260869565216
0.0
0.047619047619047616
0.0
0.03571428571428571
0.0
0.0
0.0
0.0
0.047619047619047616
0.037037037037037035
0.0
0.0
0.1
0.0
0.045454545454545456
0.0
0.041666666666666664
0.0
0.041666666666666664
0.0
0.047619047619047616
0.0
0.0
0.0
0.0
0.047619047619047616
0.041666666666666664
0.05263157894736842
0.0
0.0476190476

0.0
0.0
0.0
0.05
0.041666666666666664
0.043478260869565216
0.0
0.05
0.05263157894736842
0.0
0.0
0.043478260869565216
0.0
0.0
0.07692307692307693
0.045454545454545456
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.045454545454545456
0.0
0.0
0.05555555555555555
0.05263157894736842
0.0
0.058823529411764705
0.0
0.0
0.05
0.0
0.0
0.0
0.0
0.034482758620689655
0.041666666666666664
0.0
0.05
0.0
0.0
0.043478260869565216
0.0
0.0
0.05
0.0
0.03225806451612903
0.0
0.0
0.0
0.0
0.045454545454545456
0.04
0.0
0.0
0.0
0.0
0.045454545454545456
0.0
0.043478260869565216
0.0
0.05555555555555555
0.09090909090909091
0.0
0.05
0.0
0.03571428571428571
0.0
0.045454545454545456
0.0
0.043478260869565216
0.0
0.0
0.0
0.041666666666666664
0.05
0.0
0.0
0.0
0.09090909090909091
0.0
0.0
0.041666666666666664
0.0
0.0
0.0
0.0
0.045454545454545456
0.0
0.041666666666666664
0.043478260869565216
0.0
0.05
0.0
0.043478260869565216
0.08695652173913043
0.0
0.0
0.0
0.0
0.09523809523809523
0.0
0.045454545454545456
0.0
0.05263157894736842
0.

0.0
0.0
0.0
0.0
0.0
0.034482758620689655
0.045454545454545456
0.0
0.04
0.043478260869565216
0.0
0.0
0.0
0.0
0.05263157894736842
0.045454545454545456
0.0
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.0
0.0
0.0
0.0
0.05
0.038461538461538464
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.041666666666666664
0.0
0.0
0.0
0.047619047619047616
0.041666666666666664
0.045454545454545456
0.0
0.041666666666666664
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.0
0.05555555555555555
0.043478260869565216
0.05263157894736842
0.0
0.0
0.0
0.05263157894736842
0.047619047619047616
0.047619047619047616
0.05263157894736842
0.0
0.041666666666666664
0.0
0.0
0.045454545454545456
0.0
0.0
0.04
0.05555555555555555
0.0
0.0
0.043478260869565216
0.041666666666666664
0.05263157894736842
0.0
0.0
0.041666666666666664
0.0
0.0
0.0
0.047619047619047616
0.04
0.0
0.04
0.08333333333333333
0.043478260869565216
0.041666666666666664
0.0
0.05263157894736842
0.04
0.0
0.0
0.0
0.0
0.04
0.03571428571428571
0.0
0.0
0.0
0.0
0.03846

0.0
0.0
0.04
0.0
0.038461538461538464
0.0
0.05555555555555555
0.0
0.05263157894736842
0.0
0.13043478260869565
0.0
0.058823529411764705
0.0
0.034482758620689655
0.038461538461538464
0.05263157894736842
0.05263157894736842
0.045454545454545456
0.05
0.1
0.041666666666666664
0.03571428571428571
0.0
0.08
0.04
0.043478260869565216
0.0
0.041666666666666664
0.037037037037037035
0.041666666666666664
0.047619047619047616
0.041666666666666664
0.05
0.0
0.043478260869565216
0.0
0.0
0.05263157894736842
0.0
0.037037037037037035
0.041666666666666664
0.0
0.043478260869565216
0.0
0.0
0.045454545454545456
0.038461538461538464
0.05555555555555555
0.0
0.041666666666666664
0.0
0.0
0.0
0.0
0.047619047619047616
0.0
0.0
0.0
0.0
0.13636363636363635
0.09090909090909091
0.0
0.0
0.0
0.0
0.05
0.045454545454545456
0.0
0.0
0.047619047619047616
0.05555555555555555
0.05263157894736842
0.0
0.0
0.05555555555555555
0.05263157894736842
0.05
0.0
0.041666666666666664
0.0
0.0
0.05
0.0
0.041666666666666664
0.03225806451612903


0.05
0.045454545454545456
0.0
0.05263157894736842
0.05263157894736842
0.0
0.0
0.0
0.0
0.045454545454545456
0.0
0.0
0.0
0.038461538461538464
0.09090909090909091
0.0
0.0
0.0
0.058823529411764705
0.0
0.05263157894736842
0.0
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.05555555555555555
0.0
0.05
0.045454545454545456
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.041666666666666664
0.0
0.0
0.05
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.08695652173913043
0.0
0.0
0.0
0.0
0.05263157894736842
0.09523809523809523
0.038461538461538464
0.0
0.047619047619047616
0.0
0.0
0.058823529411764705
0.0
0.05
0.0
0.0
0.0
0.0
0.05555555555555555
0.047619047619047616
0.0
0.043478260869565216
0.0
0.0
0.0
0.0
0.1
0.047619047619047616
0.0
0.0
0.0
0.0
0.2
0.05263157894736842
0.08695652173913043
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.043478260869565216
0.0
0.043478260869565216
0.0
0.0
0.0
0.0
0.043478260869565216
0.05
0.0
0.0
0.0
0.0
0.0
0.0
0.03571428571428571
0.0
0.0
0.08695652173913043
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.047619

0.0
0.09090909090909091
0.0
0.0
0.0
0.047619047619047616
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.047619047619047616
0.05263157894736842
0.0
0.05
0.1111111111111111
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.0
0.045454545454545456
0.0
0.0
0.0
0.0
0.0
0.05
0.0
0.0
0.05
0.0
0.0
0.05555555555555555
0.0
0.0
0.0
0.047619047619047616
0.0
0.0
0.0
0.047619047619047616
0.0
0.05
0.047619047619047616
0.0
0.047619047619047616
0.05263157894736842
0.0
0.0
0.0
0.045454545454545456
0.0
0.0
0.0
0.041666666666666664
0.058823529411764705
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.047619047619047616
0.0
0.045454545454545456
0.0
0.0
0.0
0.0
0.0
0.0
0.043478260869565216
0.0
0.09523809523809523
0.0
0.1111111111111111
0.0
0.0
0.0
0.0
0.045454545454545456
0.0
0.0
0.043478260869565216
0.0
0.0
0.0
0.0
0.08695652173913043
0.09523809523809523
0.041666666666666664
0.0
0.0
0.0
0.0
0.0
0.05263157894736842
0.0
0.0
0.05555555555555555
0.05
0.0
0.043478260869565216
0.0
0.04
0.0
0.045454545454545456
0.0
0.0555555555555555

In [496]:
df_copy.sort_values(by='similarity', ascending=False).iloc[0].address #Nearest address based on only the address input

'jaipur,h.no.- 408, lavender mangalam, anand nagar sanganer, railway station, jaipur - 302029 ,jaipur h o-302029 ,rajasthan india,302029'