# Lecture des données de géolocalisation des Locations
> Pour des raison de performance de plotly express, obligé d'afficher sur un échantillon de 2%.

> Au delà de cette limite, l'affichage crashe car trop de points

In [3]:
import plotly.express as px
import os
import pandas as pd

folder_path = '../data/processed/Rentals'
output_file = os.path.join(folder_path, 'merged_rentals_data_clean.csv')

chunksize = 100000  # Number of rows per chunk
chunks = pd.read_csv(output_file, sep=',', chunksize=chunksize, index_col=None, low_memory=False)

# Process chunks
df_rentals = pd.concat(chunk for chunk in chunks)

# Reduce dataset size by sampling (e.g., 10% of the data)
df_rentals = df_rentals.sample(frac=0.02, random_state=42)

# Calculate the IQR for 'prix_bien'
Q1 = df_rentals["prix_bien"].quantile(0.25)  # First quartile (25th percentile)
Q3 = df_rentals["prix_bien"].quantile(0.75)  # Third quartile (75th percentile)
IQR = Q3 - Q1  # Interquartile range

# Define the lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Filter the DataFrame to remove outliers
df_rentals = df_rentals[(df_rentals["prix_bien"] >= lower_bound) & (df_rentals["prix_bien"] <= upper_bound)]

fig = px.scatter_map(df_rentals, 
                     lat="mapCoordonneesLatitude", 
                     lon="mapCoordonneesLongitude", 
                     color="prix_bien", 
                     height=600, 
                     width=800,
                     hover_name="date", 
                     hover_data="typedebien", 
                     size="prix_bien", 
                     size_max=15, 
                     zoom = 6,
                     center = {"lat": 48.866667, "lon": 2.333333},)
fig.update_geos(projection_type="natural earth")
# Update layout to move the legend below the graph
fig.update_layout(title="Scatter Map Prices")
fig.show()