In [1]:
import pandas as pd
import numpy as np
import re
import collections
import time
import recordlinkage
import jellyfish

In [2]:
# load openhouse
openhouse = pd.read_csv('open-houses-montreal.csv',encoding='utf-8')

property_assess_full = pd.read_csv('montreal-property-assessment.csv',encoding='utf-8')

N_muncipality = 20

In [3]:
# the following routines are based on the assumptions:
# 1) there is neighbourhood name in the address column
# 2) address is at the fourth column

openhouse.head()

Unnamed: 0,muncipality_id,listing_title,category,features,listing_price,sales_type,price,address,longitude,latitude,...,broker_3_name,broker_3_category,broker_3_firm,broker_3_firm_type,broker_4_details_url,broker_4_name,broker_4_category,broker_4_firm,broker_4_firm_type,retrieved_at
0,830.0,House for sale,House,"5 Beds, 4 Baths",1199000.0,sale,1199000.0,"7, Place Raymond, Kirkland, Neighbourhood Nort...",-73.892341,45.443783,...,,,,,,,,,,2018-07-25
1,815.0,Condo for sale,Condo,"2 Beds, 1 Bath",174900.0,sale,174900.0,"6801, boulevard des Roseraies, apt. 303, Anjou...",-73.555285,45.597026,...,,,,,,,,,,2018-07-25
2,843.0,Condo for sale,Condo,"2 Beds, 2 Baths",749000.0,sale,749000.0,"1210, boulevard De Maisonneuve Ouest, apt. 21A...",-73.575657,45.499809,...,,,,,,,,,,2018-07-25
3,836.0,Condo for sale,Condo,"2 Beds, 2 Baths",289000.0,sale,289000.0,"120, Place Donnacona, apt. 302, Dollard-Des Or...",-73.809583,45.48685,...,,,,,,,,,,2018-07-25
4,845.0,Triplex for sale,Triplex,"3 Beds, 1 Bath",498000.0,sale,498000.0,"2555 - 2559, Rue Lyall, Mercier/Hochelaga-Mais...",-73.530962,45.583558,...,,,,,,,,,,2018-07-25


In [4]:
# drop duplications
openhouse = openhouse.drop_duplicates(['category','sales_type','address'])

# keep openhouse with muncipality id
openhouse = openhouse[~openhouse.muncipality_id.isna()]

# keep only the useful columns
openhouse = openhouse[['muncipality_id','category','listing_price','sales_type','address','longitude',
                       'latitude','year_built','retrieved_at']]

# split the address
openhouse.address = openhouse.address.apply(lambda x: [i.replace(' - ','-').rstrip(' ') for i in x.split(',')])

print 'Lenth of openhouse dataset:',len(openhouse)

Lenth of openhouse dataset: 32568


In [5]:
muncipality_name = dict()

for muncipality_id in np.unique(openhouse.muncipality_id):
    
    if len(openhouse[openhouse.muncipality_id==muncipality_id]) > N_muncipality:
    
        L = [j for i in openhouse[openhouse.muncipality_id==muncipality_id].address for j in i]

        c = collections.Counter(L)

        muncipality_name[muncipality_id] = c.most_common()[0][0]

In [6]:
neighbourhood = [np.nan] * len(openhouse)
start = time.time()
for i in range(len(openhouse)):
        
    try:
        openhouse.iloc[i,4].remove(muncipality_name[openhouse.muncipality_id.iloc[i]])
    except:
        pass
    
    for j in openhouse.iloc[i,4]:
        if 'Neighbourhood' in j:
            neighbourhood[i] = j
            openhouse.iloc[i,4].remove(j)

openhouse['neighbourhood'] = neighbourhood

In [7]:
len_address = openhouse.address.apply(lambda x: len(x))
collections.Counter(len_address)

Counter({1: 122, 2: 17407, 3: 15017, 4: 18, 5: 3, 6: 1})

# consider cases when len_address==2 or 3

In [8]:
# cases when len_address == 3

openhouse_3 = openhouse[(len_address==3)][openhouse.address[(len_address==3)].apply(lambda x: len(set(['apt.','suite'])
                                                               .intersection(set(x[2].split(' '))))>0)]

openhouse_3['no.'] = openhouse_3.address.apply(lambda x: x[0])

openhouse_3['street'] = openhouse_3.address.apply(lambda x: x[1].rstrip(' '))

openhouse_3['apt'] = openhouse_3.address.apply(lambda x: x[2].replace('apt.','').replace('suite','').replace(' ',''))

# consider cases when len_address==2

openhouse_2 = openhouse[(len_address==2)]

openhouse_2.loc[:,'street'] = openhouse_2.address.apply(lambda x: x[1].rstrip(' '))

openhouse_2.loc[:,'no.'] = openhouse_2.address.apply(lambda x: x[0])

# concat the two

house_main = pd.concat([openhouse_2, openhouse_3]).sort_index()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


# property assessment

In [9]:
property_assess = property_assess_full.drop_duplicates(['full_address','neighbourhood_unit_number','taxable_value_of_building'])

rx = '[' + re.escape(''.join(['(',')','#','&',':'])) + ']'

property_assess.loc[:,'full_address'] = property_assess.full_address.apply(lambda x: re.sub(rx,' ',x)
                                                                           .replace('LOT','').replace('ARR','')
                                                                           .replace('.','').replace(' - ','-')
                                                                           .rstrip(', ').split(','))

In [10]:
property_assess.head()

Unnamed: 0,city,full_address,borough,lot_number,serial_number,predominant_use,neighbourhood_unit_number,file_no,owner_name,owner_status_for_education_tax,...,reference_date_of_contract,land_value,building_value,total_value,reference_date_of_contract_at_previous_role,value_of_property_at_previous_role,taxable_value_of_building,non_taxable_value_of_building,source_url,retrieved_at
0,Montreal,[6937-6951 Avenue Mousseau],Arrondissement d'Anjou,1111111,0152-14-4347-1-000-0000,Logement,709.0,1 - F00206600,"CALABRESE, FRANCO",Personne physique,...,2015-01-07,146800.0,168400.0,315200.0,2012-01-07,303100.0,315200.0,0.0,https://servicesenligne2.ville.montreal.qc.ca/...,2018-07-11
1,Montreal,[Boulevard Henri-Bourassa Est],Arrondissement de Rivière-des-Prairies - Point...,13386581345162169733716973391870578,9652-86-8832-6-000-0000,Espace de terrain non aménagé et non exploité ...,1937.0,30 - F89999057,"AZZOUZ, GABRIEL",Personne physique,...,2015-01-07,483700.0,0.0,483700.0,2012-01-07,432800.0,483700.0,0.0,https://servicesenligne2.ville.montreal.qc.ca/...,2018-07-11
2,Montreal,[9229 Rue Pierre-Bonne],Arrondissement de Rivière-des-Prairies - Point...,14405751505829,9955-69-7452-7-000-0000,Autres industries de produits manufacturés,1928.0,30 - F86610432,VENTILATION MAXIMUM LTEE,Personne morale,...,2015-01-07,1078700.0,4421300.0,5500000.0,2012-01-07,5300000.0,5500000.0,0.0,https://servicesenligne2.ville.montreal.qc.ca/...,2018-07-11
3,Montreal,[4330-4336 Rue de Verdun],Arrondissement de Verdun,11837121623462,9935-26-2157-9-000-0000,Logement,1088.0,28 - F00318300,"OLANICK, NATALIE",Personne physique,...,2015-01-07,65300.0,337200.0,402500.0,2012-01-07,365900.0,402500.0,0.0,https://servicesenligne2.ville.montreal.qc.ca/...,2018-07-11
4,Montreal,[438-440 Rue Saint-Pierre],Arrondissement de Ville-Marie,1692936,0040-21-8599-5-000-0000,Logement,2042.0,30 - F14015100,9368-2839 QUÉBEC INC.,Personne morale,...,2015-01-07,1485300.0,1460400.0,2945700.0,2012-01-07,2805400.0,2945700.0,0.0,https://servicesenligne2.ville.montreal.qc.ca/...,2018-07-11


In [11]:
collections.Counter(property_assess.full_address.apply(lambda x: len(x)))

Counter({1: 356002, 2: 128582, 3: 2160, 4: 35, 5: 16, 6: 1, 8: 1})

In [12]:
property2 = property_assess[property_assess.full_address.apply(lambda x: len(x)==2)]

property2.loc[:,'street_no.']  = property2['full_address'].apply(lambda x: x[0])

property2.loc[:,'apt']  = property2['full_address'].apply(lambda x: x[1])

In [13]:
L = [j for i in property2.apt for j in i.split(' ') if len(j)>0]

c = collections.Counter(L)

c = c.most_common()

exceptions = ['SUD','NORD','PENTHOUSE','EST','OUEST','PH','PH-A','PH-B','PH-C','PH-D','PH-K','PH-H','PH-N','PH-G','PH-E','PH-P','PH-T']
list_of_words = ['']
for i in c:
    all_char = True
    for j in i[0]:
        if j.isnumeric():
            all_char = False
            break
    if all_char and len(i[0])==1:
        all_char = False
    if all_char and i[0] in exceptions:
        all_char = False
    if i[1]==1:
        all_char = False
    if all_char:
        list_of_words.append(i[0])

property2.apt = property2.apt.apply(lambda x: ' '.join([i for i in x.rstrip(' ')
                                                        .split(' ') if i not in list_of_words]))

property2.loc[property2['apt'].apply(lambda x: len(x)==0),'apt'] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [14]:
def get_number(x):
    words = x.split(' ')
    if words[0].replace('-','').isnumeric():
        return words[0],' '.join(words[1:])
    else:
        return np.nan,' '.join(words)

In [15]:
temp = property2['street_no.'].apply(lambda x: get_number(x))

property2.loc[:,'no.'] = temp.apply(lambda x: x[0])

property2.loc[:,'street'] = temp.apply(lambda x: x[1].rstrip(' '))

In [16]:
property1 = property_assess[property_assess['full_address'].apply(lambda x: len(x)==1)]

temp = property1['full_address'].apply(lambda x: get_number(x[0]))

property1.loc[:,'no.'] = temp.apply(lambda x: x[0])

property1.loc[:,'street'] = temp.apply(lambda x: x[1].rstrip(' '))

property1.loc[:,'apt'] = np.nan

In [17]:
prop_main = pd.concat([property1,property2]).sort_index()
len(prop_main)

484584

In [18]:
L = [j for i in prop_main.street.apply(lambda x:x.split(' ')) for j in i]

c = collections.Counter(L)

prop_main.loc[:,'street_ind'] = prop_main.street.apply(lambda x: x.split(' ')
                                                       [np.argsort([c[i] for i in x.split(' ')])[0]])

prop_main.loc[:,'street_ind'] = prop_main.street_ind.apply(lambda x: x if len(x)>0 else u'')

In [19]:
L = [j for i in house_main.street.apply(lambda x:x.split(' ')) for j in i]

c = collections.Counter(L)

house_main.loc[:,'street_ind'] = house_main.street.apply(lambda x: x.split(' ')
                                                         [np.argsort([c[i] for i in x.split(' ')])[0]])

house_main.loc[:,'street_ind'] = house_main.street_ind.apply(lambda x: x if len(x)>0 else u'')

In [20]:
start = time.time()

pc = recordlinkage.SortedNeighbourhoodIndex(on='street_ind')

pairs2 = pc.index(house_main,prop_main)

print 'Index takes:',time.time()-start

print len(pairs2)

Index takes: 20.5508220196
29843121


In [28]:
compare_house = recordlinkage.Compare()

compare_house.string('street_ind','street_ind', method='jarowinkler', threshold=0.95, label='street')

compare_house.exact('apt','apt',label='apt')

compare_house.exact('no.','no.',label='no.')

<Compare>

In [29]:
start = time.time()

feature = compare_house.compute(pairs2, house_main.fillna(value=-1), prop_main.fillna(value=-1))

print 'Take',time.time()-start

Take 1312.79974508


In [32]:
indexs = feature[feature.sum(axis=1)>2]

In [183]:
map_house_prop = pd.DataFrame()

In [180]:
house_ind = []
pro_ind = []
similarity = []

level1_ind = indexs.index.get_level_values(0)

for i in level1_ind:
    
    if len(indexs.loc[i])==1:
        house_ind.append(i)
        pro_ind.append(indexs.loc[i].index.values[0])
        similarity.append(jellyfish.jaro_winkler(house_main.loc[i].street_ind, prop_main.loc[indexs.loc[i].index.values[0]].street_ind))
    else:
        similarity_sample = []
        for j in indexs.loc[i].index:
            similarity_sample.append(jellyfish.jaro_winkler(house_main.loc[i].street_ind, prop_main.loc[j].street_ind))  
       # I = [k for k in ]
        
        for k in range(len(similarity_sample)):
            if similarity_sample[k]>0.999999999:
                house_ind.append(i)
                pro_ind.append(indexs.loc[i].index[k])
                similarity.append(similarity_sample[k])

In [187]:
map_house_prop['house_ind'] = house_ind
map_house_prop['property_ind'] = pro_ind
map_house_prop['similarity'] = similarity

map_house_prop.drop_duplicates(inplace=True)

In [228]:
all_ind = list(house_main.index)
for i in map_house_prop.house_ind:
    try:
        all_ind.remove(i)
    except:
        pass
    
def find_ind(x,N):
    try:
        N = float(N)
    except:
        pass
    if isinstance(x, list):
        return len(x)==2 and N>=float(x[0]) and N<=float(x[1])
    else: 
        return False

In [353]:
ind_found_inrange = []
prop_ind = []

start = time.time()
for (i,r) in house_main.loc[all_ind][['street_ind','no.','apt']].iterrows():
    
    processed_no = prop_main[(prop_main.street_ind==r.street_ind)]['no.'].apply(lambda x: 
                                                                            x.split('-') if isinstance(x, unicode) else x)

    found_ind = processed_no[processed_no.apply(lambda x: find_ind(x,r['no.']))].index
    
    for j in found_ind:
        print i,'found'
        ind_found_inrange.append(i)
        prop_ind.append(j)
        
print 'Took', time.time()-start

5 found
14 found
14 found
18 found
18 found
31 found
45 found
54 found
55 found
61 found
64 found
69 found
70 found
72 found
83 found
86 found
87 found
94 found
139 found
143 found
155 found
155 found
166 found
166 found
174 found
176 found
176 found
176 found
178 found
179 found
196 found
196 found
222 found
226 found
231 found
233 found
233 found
236 found
239 found
239 found
244 found
248 found
255 found
262 found
282 found
282 found
283 found
292 found
292 found
294 found
296 found
340 found
352 found
358 found
358 found
358 found
391 found
392 found
395 found
396 found
398 found
402 found
409 found
412 found
413 found
426 found
464 found
466 found
470 found
470 found
479 found
513 found
513 found
516 found
516 found
554 found
555 found
557 found
562 found
565 found
566 found
568 found
569 found
570 found
572 found
576 found
578 found
580 found
581 found
582 found
586 found
660 found
662 found
668 found
673 found
673 found
674 found
675 found
675 found
681 found
691 found
691 found

3978 found
3981 found
3997 found
4000 found
4026 found
4026 found
4026 found
4026 found
4026 found
4026 found
4026 found
4033 found
4040 found
4041 found
4047 found
4048 found
4048 found
4048 found
4048 found
4048 found
4048 found
4048 found
4048 found
4052 found
4054 found
4054 found
4057 found
4058 found
4068 found
4071 found
4074 found
4076 found
4077 found
4078 found
4078 found
4084 found
4084 found
4098 found
4108 found
4116 found
4122 found
4173 found
4177 found
4178 found
4187 found
4193 found
4201 found
4208 found
4210 found
4211 found
4211 found
4213 found
4222 found
4222 found
4226 found
4226 found
4246 found
4252 found
4262 found
4268 found
4269 found
4270 found
4301 found
4301 found
4304 found
4329 found
4334 found
4334 found
4336 found
4342 found
4345 found
4346 found
4348 found
4357 found
4416 found
4423 found
4423 found
4440 found
4463 found
4463 found
4498 found
4551 found
4563 found
4568 found
4568 found
4569 found
4577 found
4577 found
4579 found
4579 found
4588 found

7627 found
7627 found
7631 found
7635 found
7639 found
7639 found
7647 found
7647 found
7734 found
7755 found
7760 found
7772 found
7773 found
7777 found
7783 found
7784 found
7788 found
7798 found
7800 found
7803 found
7807 found
7809 found
7809 found
7814 found
7818 found
7818 found
7823 found
7824 found
7824 found
7824 found
7826 found
7856 found
7867 found
7879 found
7879 found
7894 found
7925 found
7932 found
7941 found
7944 found
7945 found
7946 found
7947 found
7953 found
7961 found
7967 found
7967 found
7971 found
7972 found
7973 found
7973 found
7988 found
7988 found
7989 found
7990 found
7994 found
8046 found
8078 found
8078 found
8081 found
8083 found
8085 found
8086 found
8087 found
8092 found
8093 found
8093 found
8095 found
8095 found
8104 found
8104 found
8109 found
8110 found
8114 found
8115 found
8115 found
8116 found
8117 found
8119 found
8119 found
8120 found
8120 found
8120 found
8126 found
8126 found
8142 found
8147 found
8157 found
8163 found
8174 found
8190 found

10853 found
10855 found
10857 found
10859 found
10870 found
10874 found
10874 found
10892 found
10892 found
10925 found
10943 found
10961 found
10961 found
10961 found
10961 found
10961 found
10961 found
10961 found
10961 found
10981 found
10982 found
10983 found
10986 found
10987 found
10987 found
10988 found
10990 found
10990 found
10991 found
10994 found
10998 found
11001 found
11003 found
11015 found
11016 found
11016 found
11022 found
11030 found
11035 found
11041 found
11081 found
11086 found
11149 found
11153 found
11160 found
11161 found
11162 found
11165 found
11165 found
11168 found
11171 found
11177 found
11178 found
11178 found
11185 found
11185 found
11186 found
11190 found
11190 found
11193 found
11196 found
11196 found
11198 found
11200 found
11201 found
11204 found
11204 found
11207 found
11218 found
11220 found
11224 found
11227 found
11233 found
11234 found
11235 found
11238 found
11241 found
11241 found
11245 found
11292 found
11292 found
11306 found
11308 found
1130

14229 found
14233 found
14235 found
14242 found
14244 found
14245 found
14249 found
14250 found
14251 found
14251 found
14254 found
14256 found
14256 found
14264 found
14271 found
14271 found
14273 found
14273 found
14279 found
14306 found
14310 found
14310 found
14330 found
14335 found
14336 found
14336 found
14338 found
14338 found
14338 found
14346 found
14354 found
14367 found
14389 found
14389 found
14392 found
14392 found
14417 found
14430 found
14466 found
14466 found
14470 found
14471 found
14473 found
14474 found
14475 found
14475 found
14477 found
14481 found
14481 found
14488 found
14491 found
14493 found
14494 found
14500 found
14503 found
14504 found
14515 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14520 found
14537 found
14537 found
14571 found
14610 found
14614 found
14617 found
14619 found
14625 found
14625 found
1462

18398 found
18416 found
18419 found
18420 found
18423 found
18427 found
18429 found
18429 found
18450 found
18453 found
18454 found
18485 found
18507 found
18509 found
18510 found
18519 found
18527 found
18530 found
18560 found
18569 found
18591 found
18603 found
18638 found
18649 found
18649 found
18650 found
18650 found
18652 found
18655 found
18657 found
18659 found
18659 found
18663 found
18663 found
18664 found
18666 found
18668 found
18670 found
18670 found
18670 found
18672 found
18672 found
18681 found
18684 found
18692 found
18692 found
18693 found
18696 found
18697 found
18698 found
18698 found
18699 found
18746 found
18758 found
18799 found
18819 found
18822 found
18823 found
18830 found
18837 found
18837 found
18842 found
18845 found
18846 found
18851 found
18851 found
18851 found
18851 found
18851 found
18851 found
18851 found
18851 found
18853 found
18856 found
18856 found
18856 found
18858 found
18865 found
18870 found
18871 found
18872 found
18874 found
18880 found
1891

21857 found
21858 found
21867 found
21871 found
21876 found
21882 found
21882 found
21882 found
21885 found
21887 found
21894 found
21894 found
21898 found
21898 found
21954 found
21959 found
21968 found
21976 found
21976 found
21988 found
21989 found
22017 found
22047 found
22050 found
22050 found
22054 found
22055 found
22067 found
22121 found
22121 found
22135 found
22140 found
22169 found
22174 found
22187 found
22202 found
22202 found
22207 found
22210 found
22210 found
22214 found
22227 found
22229 found
22231 found
22237 found
22252 found
22258 found
22261 found
22262 found
22262 found
22291 found
22332 found
22332 found
22353 found
22356 found
22358 found
22367 found
22370 found
22379 found
22384 found
22384 found
22392 found
22398 found
22400 found
22406 found
22411 found
22412 found
22414 found
22426 found
22460 found
22462 found
22467 found
22477 found
22489 found
22492 found
22500 found
22500 found
22500 found
22500 found
22500 found
22500 found
22500 found
22500 found
2250

25493 found
25493 found
25496 found
25498 found
25504 found
25505 found
25506 found
25511 found
25514 found
25516 found
25519 found
25537 found
25537 found
25537 found
25537 found
25540 found
25543 found
25550 found
25552 found
25553 found
25554 found
25559 found
25562 found
25565 found
25566 found
25568 found
25571 found
25587 found
25587 found
25599 found
25616 found
25622 found
25628 found
25631 found
25632 found
25633 found
25637 found
25640 found
25643 found
25643 found
25646 found
25647 found
25648 found
25653 found
25653 found
25659 found
25660 found
25661 found
25666 found
25672 found
25673 found
25674 found
25676 found
25682 found
25692 found
25692 found
25694 found
25699 found
25699 found
25699 found
25699 found
25700 found
25704 found
25704 found
25705 found
25706 found
25707 found
25712 found
25718 found
25718 found
25721 found
25732 found
25734 found
25736 found
25738 found
25746 found
25746 found
25746 found
25746 found
25746 found
25746 found
25746 found
25746 found
2574

31058 found
31076 found
31094 found
31123 found
31157 found
31161 found
31202 found
31211 found
31242 found
31242 found
31242 found
31242 found
31242 found
31242 found
31242 found
31242 found
31244 found
31244 found
31247 found
31253 found
31275 found
31282 found
31313 found
31313 found
31316 found
31325 found
31325 found
31325 found
31325 found
31325 found
31342 found
31347 found
31372 found
31379 found
31400 found
31400 found
31435 found
31453 found
31461 found
31493 found
31494 found
31510 found
31510 found
31518 found
31526 found
31542 found
31542 found
31570 found
31598 found
31618 found
31622 found
31623 found
31624 found
31631 found
31633 found
31637 found
31639 found
31641 found
31644 found
31654 found
31655 found
31661 found
31671 found
31674 found
31674 found
31678 found
31678 found
31679 found
31680 found
31682 found
31682 found
31684 found
31684 found
31689 found
31691 found
31697 found
31707 found
31707 found
31708 found
31713 found
31715 found
31723 found
31732 found
3173

33768 found
33776 found
33776 found
33783 found
33785 found
33794 found
33804 found
33816 found
33816 found
33836 found
33842 found
33842 found
33844 found
33848 found
33848 found
33850 found
33850 found
33850 found
33856 found
33856 found
33858 found
33858 found
33858 found
33858 found
33858 found
33858 found
33858 found
33858 found
33861 found
33866 found
33873 found
33877 found
33878 found
33878 found
33885 found
33885 found
33885 found
33893 found
33896 found
33898 found
33900 found
33909 found
33912 found
33914 found
33919 found
33933 found
33933 found
33937 found
33940 found
33948 found
33948 found
33948 found
33948 found
33948 found
33948 found
33948 found
33951 found
33951 found
33953 found
33964 found
33965 found
33987 found
33991 found
33995 found
33995 found
34006 found
34012 found
34012 found
34026 found
34026 found
34026 found
34026 found
34026 found
34026 found
34028 found
34030 found
34030 found
34034 found
34034 found
34037 found
34037 found
34039 found
34039 found
3403

In [371]:
house_range = pd.DataFrame()

house_range['house_ind'] = ind_found_inrange

house_range['property_ind'] = prop_ind

In [372]:
len(np.unique(house_range.house_ind))

4493

In [375]:
map_house_prop.append(house_range,ignore_index=True)

Unnamed: 0,house_ind,pro_ind,similarity
0,3600,138930,1.000000
1,12376,13985,1.000000
2,16480,427778,1.000000
3,19306,209213,1.000000
4,20778,14055,1.000000
5,24480,209221,1.000000
6,15939,174099,1.000000
7,34162,205520,1.000000
8,9072,85407,1.000000
9,9945,109901,0.971429


In [376]:
for i in house_range.house_ind:
    try:
        all_ind.remove(i)
    except:
        pass

In [380]:
house_main.loc[all_ind]

Unnamed: 0,address,apt,category,latitude,listing_price,longitude,muncipality_id,neighbourhood,no.,retrieved_at,sales_type,street,year_built,street_ind
27,"[300, Avenue des Sommets, apt. 1916]",1916,Condo,45.450053,859000.0,-73.548180,819.0,Neighbourhood Île-des-Soeurs,300,2018-07-25,sale,Avenue des Sommets,2005.0,Sommets
37,"[424-424A, Rue Bourbonnais]",,Duplex,45.430223,410000.0,-73.638041,820.0,,424-424A,2018-07-25,sale,Rue Bourbonnais,1959.0,Bourbonnais
48,"[12620, Rue Sherbrooke Est]",,Condo,45.651538,139900.0,-73.510712,846.0,Neighbourhood Pointe-aux-Trembles,12620,2018-07-25,sale,Rue Sherbrooke Est,1985.0,Sherbrooke
51,"[9400F, Rue Sherbrooke Est]",,4plex,45.611054,0.0,-73.526552,814.0,,9400F,2018-07-25,sale,Rue Sherbrooke Est,0.0,Sherbrooke
52,"[300, Avenue des Sommets, apt. 1916]",1916,Condo / Apartment,45.450053,0.0,-73.548180,819.0,Neighbourhood Île-des-Soeurs,300,2018-07-25,rent,Avenue des Sommets,,Sommets
57,"[1310, boulevard René-Lévesque Ouest, apt. 907]",907,Condo / Apartment,45.496688,0.0,-73.571646,843.0,Neighbourhood Golden Square Mile,1310,2018-07-25,rent,boulevard René-Lévesque Ouest,2018.0,René-Lévesque
66,"[3683, Rue Hutchison, apt. C]",C,Condo / Apartment,45.509809,0.0,-73.576268,842.0,Neighbourhood Le Plateau-Mont-Royal,3683,2018-07-25,rent,Rue Hutchison,2015.0,Hutchison
75,"[1288, Avenue des Canadiens-de-Montréal, apt...",PH4804,Condo / Apartment,45.495975,0.0,-73.570748,843.0,Neighbourhood Central West,1288,2018-07-25,rent,Avenue des Canadiens-de-Montréal,2016.0,Canadiens-de-Montréal
91,"[817, 16e Avenue (P.-a.-T.)]",,House,45.643964,425000.0,-73.494404,846.0,Neighbourhood Pointe-aux-Trembles,817,2018-07-24,sale,16e Avenue (P.-a.-T.),2018.0,16e
95,"[4800, Rue Resther, apt. 402]",402,Condo,45.527564,0.0,-73.585767,842.0,Neighbourhood Le Plateau-Mont-Royal,4800,2018-07-24,sale,Rue Resther,0.0,Resther


In [447]:
np.unique(prop_main[(prop_main.street_ind==u'Deslauriers')]['no.'])

array([nan, nan, nan, nan, u'10', u'103', u'115', u'119', u'12',
       u'120-128', u'123', u'1245', u'1345', u'14', u'1544', u'1546',
       u'1548', u'1549', u'1550', u'1552', u'1554', u'1556', u'1558',
       u'1560', u'1562', u'1564', u'1566', u'1568', u'1570', u'1572',
       u'16', u'160', u'165', u'19', u'2-6', u'20', u'200-222', u'23',
       u'24', u'250', u'255', u'27', u'290', u'295-371', u'31', u'32',
       u'33', u'34', u'36', u'375-455', u'38', u'380', u'39', u'390',
       u'400', u'42', u'430-450', u'45', u'457-485', u'46', u'470-472',
       u'48', u'491-533', u'500-530', u'52', u'55', u'550', u'56',
       u'604-664', u'605-607', u'61', u'611-625', u'62', u'666-678',
       u'67', u'68', u'690-700', u'7', u'706-730', u'71', u'74', u'75',
       u'79', u'8', u'80', u'800-828', u'825', u'83', u'835-855', u'86',
       u'865-873', u'87', u'91', u'95'], dtype=object)

In [448]:
len(all_ind)

5966

In [None]:
property_assess_full[property_assess_full.owner_address=='303 NOTRE-DAME E 3.500, MONTREAL QUEBEC, H2Y 3Y8']