# Distances computing

In [1]:
import pandas as pd
from geopy.distance import great_circle as GRC
import time


In [2]:
df_dvf = pd.read_csv('immo_lyon17_22_last.csv')
df_interest = pd.read_csv('final_interest_dataframe_filtered.csv')

In [3]:
df_dvf.head(2)

Unnamed: 0.1,Unnamed: 0,index,valeur_fonciere,adresse_numero,adresse_nom_voie,nom_commune,id_parcelle,type_local,surface_reelle_bati,nombre_pieces_principales,longitude,latitude,year,month,trim,index_prix
0,10026,24429,6196748.0,71.0,RUE MONTAGNY,Lyon 8e Arrondissement,69388000CK0097,Appartement,4.0,1.0,4.850547,45.733555,2018,10,2018-T4,3.4
1,14588,94779,26138036.0,7.0,RUE DE LA REPUBLIQUE,Lyon 1er Arrondissement,69381000AS0064,Appartement,20.0,1.0,4.836266,45.766464,2019,9,2019-T3,4.0


In [4]:
df_dvf.shape

(51125, 16)

In [5]:
df_interest.head(2)

Unnamed: 0,longitude,latitude,name,category
0,4.854419,45.74687,Crèche Blandan,kindergarten
1,4.869292,45.757747,L'Arc en ciel,kindergarten


In [6]:
df_interest.shape

(1589, 4)

### Adding 1 new column per dataframe:
- tuple_gps according to this form: (latitude, longitude) to compute more easily 

In [7]:
list_gps = []

for longitude, latitude in zip(df_dvf.longitude, df_dvf.latitude):
    list_gps.append((latitude, longitude))
    

In [8]:
df_dvf['tuple_gps'] = list_gps

In [9]:
df_dvf.head(5)

Unnamed: 0.1,Unnamed: 0,index,valeur_fonciere,adresse_numero,adresse_nom_voie,nom_commune,id_parcelle,type_local,surface_reelle_bati,nombre_pieces_principales,longitude,latitude,year,month,trim,index_prix,tuple_gps
0,10026,24429,6196748.0,71.0,RUE MONTAGNY,Lyon 8e Arrondissement,69388000CK0097,Appartement,4.0,1.0,4.850547,45.733555,2018,10,2018-T4,3.4,"(45.733555, 4.850547)"
1,14588,94779,26138036.0,7.0,RUE DE LA REPUBLIQUE,Lyon 1er Arrondissement,69381000AS0064,Appartement,20.0,1.0,4.836266,45.766464,2019,9,2019-T3,4.0,"(45.766464, 4.836266)"
2,26384,187992,25025000.0,18.0,RUE DE L ARBRE SEC,Lyon 1er Arrondissement,69381000AS0078,Appartement,27.0,1.0,4.835651,45.766366,2020,12,2020-T4,6.4,"(45.766366, 4.835651)"
3,17591,102152,23332406.0,4.0,RUE ETIENNE RICHERAND,Lyon 3e Arrondissement,69383000EI0081,Appartement,26.0,1.0,4.866821,45.763442,2019,4,2019-T2,4.3,"(45.763442, 4.866821)"
4,14388,94209,17473294.0,24.0,RUE NEUVE,Lyon 2e Arrondissement,69382000AC0045,Appartement,21.0,1.0,4.836262,45.765118,2019,9,2019-T3,4.0,"(45.765118, 4.836262)"


In [10]:
list_gps_2 = []

for longitude, latitude in zip(df_interest.longitude, df_interest.latitude):
    list_gps_2.append((latitude, longitude))

df_interest['tuple_gps'] = list_gps_2

In [11]:
df_interest['category'].unique()

array(['kindergarten', 'school', 'college', 'university', 'doctors',
       'dentist', 'clinic', 'pharmacy', 'hospital', 'monument',
       'station_metro_A', 'station_metro_B', 'station_metro_C',
       'station_metro_D', 'bakery', 'supermarket',
       'educational_institution', 'medical_supply', 'arret_tram_T1',
       'arret_tram_T2', 'arret_tram_T3', 'arret_tram_T4', 'arret_tram_T5',
       'arret_tram_T6'], dtype=object)

### Creating 1 dataframe / category

In [None]:
list_dataframes = []

In [14]:
mask_school =   (df_interest['category']=='school')
#(df_interest['category']=='kindergarten') 
#(df_interest['category']=='college') 
#(df_interest['category']=='university') 
#(df_interest['category']=='educational_institution')
df_school = df_interest.loc[mask_school,:]
list_dataframes.append({"school": df_school})

# mask_health =  (df_interest['category']=='doctors') | \
#         (df_interest['category']=='dentist') | \
#         (df_interest['category']=='pharmacy') | \
#         (df_interest['category']=='hospital') | \
#         (df_interest['category']=='medical_supply') | \
#         (df_interest['category']=='clinic') 
# df_health = df_interest.loc[mask_health,:]
# list_dataframes.append({"health": df_health})

mask_bakery =  (df_interest['category']=='bakery') 
df_bakery = df_interest.loc[mask_bakery,:]
list_dataframes.append({"bakery": df_bakery})

mask_supermarket =  (df_interest['category']=='supermarket') 
df_supermarket = df_interest.loc[mask_supermarket,:]
list_dataframes.append({"supermarket": df_supermarket})

# mask_monument =  (df_interest['category']=='monument') 
# df_monument = df_interest.loc[mask_monument,:]
# list_dataframes.append({"monument": df_monument})

mask_metro =  (df_interest['category']=='station_metro_A') | \
        (df_interest['category']=='station_metro_B') | \
        (df_interest['category']=='station_metro_C') | \
        (df_interest['category']=='station_metro_D') 
df_metro = df_interest.loc[mask_metro,:]
list_dataframes.append({"metro": df_metro})

mask_tram =  (df_interest['category']=='arret_tram_T1') | \
        (df_interest['category']=='arret_tram_T2') | \
        (df_interest['category']=='arret_tram_T3') | \
        (df_interest['category']=='arret_tram_T4') | \
        (df_interest['category']=='arret_tram_T5') | \
        (df_interest['category']=='arret_tram_T6') 
df_tram = df_interest.loc[mask_tram,:]
list_dataframes.append({"tram": df_tram})

### Computing the closest distance between the appointment and each category

In [15]:
def calcul_distances(df_biens_immo, dict_df_interest):

    columname = list(dict_df_interest.keys())[0]
    print(columname)
    df_interest_name = dict_df_interest[columname]
    print(df_interest_name.shape)
    list_closest_computed_distance = []
    list_closest_gps_position = []
    

    for coordinate_dvf in df_biens_immo.tuple_gps:
        point1 = coordinate_dvf
        dict_coordinates = {}
        dict_results = {}
        
   
        for count, coordinate_interest in enumerate(df_interest_name.tuple_gps):
            #print(count)
            point2 = coordinate_interest
            result = GRC(point1,point2).m
            result = round(result,2)
            dict_coordinates[count] = point2
            dict_results[count] = result
    
        key= min(dict_results, key=dict_results.get)
        list_closest_gps_position.append(dict_coordinates.get(key))
        list_closest_computed_distance.append(dict_results.get(key))

    df_dvf[f'closest_{columname}_(m)'] = list_closest_computed_distance
    df_dvf[f'closest_{columname}_position'] = list_closest_gps_position

    return df_dvf

In [16]:
for count, dict_df in enumerate(list_dataframes):
    df_global = calcul_distances(df_dvf, dict_df)

school
(366, 5)
bakery
(279, 5)
supermarket
(154, 5)
metro
(44, 5)
tram
(116, 5)


### Saving our final dataframe

In [17]:
timestr = time.strftime("%Y%m%d-%H%M%S")
df_dvf.to_csv(f'save_output/{timestr}-full.csv', index=False)

### Checking our results

In [18]:
df_full = pd.read_csv(f'save_output/{timestr}-full.csv')

In [19]:
df_full.head(5)

Unnamed: 0.1,Unnamed: 0,index,valeur_fonciere,adresse_numero,adresse_nom_voie,nom_commune,id_parcelle,type_local,surface_reelle_bati,nombre_pieces_principales,...,closest_school_(m),closest_school_position,closest_bakery_(m),closest_bakery_position,closest_supermarket_(m),closest_supermarket_position,closest_metro_(m),closest_metro_position,closest_tram_(m),closest_tram_position
0,10026,24429,6196748.0,71.0,RUE MONTAGNY,Lyon 8e Arrondissement,69388000CK0097,Appartement,4.0,1.0,...,280.7,"(45.7346644003816, 4.8537957)",277.67,"(45.7353340003816, 4.8530576)",312.26,"(45.7357634003815, 4.8530323)",1120.01,"(45.737541, 4.837294)",490.44,"(45.72953, 4.853131)"
1,14588,94779,26138036.0,7.0,RUE DE LA REPUBLIQUE,Lyon 1er Arrondissement,69381000AS0064,Appartement,20.0,1.0,...,63.83,"(45.767023968382, 4.83644685592725)",143.52,"(45.7672747003771, 4.8348262)",119.18,"(45.7653965003773, 4.8364032)",148.2,"(45.767785, 4.836012)",971.12,"(45.75888, 4.842474)"
2,26384,187992,25025000.0,18.0,RUE DE L ARBRE SEC,Lyon 1er Arrondissement,69381000AS0078,Appartement,27.0,1.0,...,95.73,"(45.767023968382, 4.83644685592725)",119.59,"(45.7672747003771, 4.8348262)",110.04,"(45.7664184003772, 4.8342344)",160.25,"(45.767785, 4.836012)",986.43,"(45.75888, 4.842474)"
3,17591,102152,23332406.0,4.0,RUE ETIENNE RICHERAND,Lyon 3e Arrondissement,69383000EI0081,Appartement,26.0,1.0,...,242.84,"(45.7656024197006, 4.86636338305401)",214.96,"(45.7638234003775, 4.8641044)",37.08,"(45.7635565003776, 4.8663721)",699.31,"(45.767025, 4.859412)",384.09,"(45.764314, 4.86203)"
4,14388,94209,17473294.0,24.0,RUE NEUVE,Lyon 2e Arrondissement,69382000AC0045,Appartement,21.0,1.0,...,97.82,"(45.7652058788412, 4.8375167949069)",241.73,"(45.7661227003772, 4.8334984)",14.1,"(45.7650821003774, 4.8360876)",194.91,"(45.763393, 4.835816)",844.6,"(45.75888, 4.842474)"
