In [1]:
import boto3
import pandas as pd
import os
import plotly.express as px
from dotenv import load_dotenv, find_dotenv

#### Configuration AWS S3, préparation de l’upload et objectif de visualisation

Dans cette section, on configure l’accès à AWS S3 pour pouvoir stocker le fichier final 
(`hotels_weather_final.csv`) dans un bucket.  

Les étapes sont les suivantes :
1. Charger les variables d’environnement (clés AWS) depuis le fichier `.env`.  
2. Définir les constantes utiles : nom du bucket, chemin du fichier sur S3, fichier local.  
3. Créer un client S3 avec la librairie `boto3` pour interagir avec le service.  

L’objectif final est de disposer d’un fichier centralisé sur S3, qui servira de source unique pour 
réaliser des visualisations interactives (ex. carte des meilleures destinations selon la météo, 
ou classement des hôtels) directement à partir de ce jeu de données nettoyé et enrichi.

In [None]:
# Find and load environment variables from .env file
env_path = find_dotenv()
load_dotenv(dotenv_path=env_path, override=True)

# Load AWS credentials from environment variables
AWS_ACCESS_KEY_ID = os.getenv('AWS_KEY')
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_KEY')

# Define S3 parameters
S3_BUCKET = 'flodussartprojectkayak'                      # name of the S3 bucket
S3_FILE_KEY = 'projectKayack/src/final_ter.csv'           # path of the file in the bucket
LOCAL_FILE = 'data/hotels_weather_final_ter.csv'               # local file to upload


In [None]:
# Create an S3 client using boto3
s3 = boto3.client(
    's3',
    aws_access_key_id=os.getenv('AWS_KEY'),               # AWS access key
    aws_secret_access_key=os.getenv('AWS_SECRET_KEY'),    # AWS secret key
    region_name='eu-north-1'                              # AWS region
)

In [None]:
# Download the file from S3 and save it locally
s3.download_file(S3_BUCKET, S3_FILE_KEY, LOCAL_FILE)
print(f"File downloaded: {LOCAL_FILE}")

Fichier téléchargé : hotels_weather_final_ter.csv


In [None]:
# Reload the downloaded file with pandas
# 'utf-8-sig' encoding ensures proper handling of special characters (accents, etc.)
df = pd.read_csv("data/hotels_weather_final_ter.csv", encoding='utf-8-sig')
df.head()

Unnamed: 0,hotel_id,hotel_name,hotel_url,hotel_rating,hotel_description,hotel_latitude,hotel_longitude,city_id,city_name,city_latitude,city_longitude,temp_max,humidity,wind_speed,clouds,pop,good_weather_score
0,1,Aigues Marines,https://www.booking.com/hotel/fr/aigues-marine...,9.5,L’établissement Aigues Marines vous accueille ...,43.55972,4.218698,1,Aigues-Mortes,43.566152,4.19154,19.544286,53.714286,5.901429,29.571429,1.0,6.610714
1,2,Appartements 3 étoiles terrasse ou patio intra...,https://www.booking.com/hotel/fr/appartement-3...,9.6,L’établissement Appartements 3 étoiles terrass...,43.565358,4.19275,1,Aigues-Mortes,43.566152,4.19154,19.544286,53.714286,5.901429,29.571429,1.0,6.610714
2,3,Artemia Aigues-Mortes - Hotel avec piscine,https://www.booking.com/hotel/fr/le-royal-hote...,9.1,Featuring free WiFi and a seasonal outdoor swi...,43.576396,4.197818,1,Aigues-Mortes,43.566152,4.19154,19.544286,53.714286,5.901429,29.571429,1.0,6.610714
3,4,Au Cœur des Remparts,https://www.booking.com/hotel/fr/au-coeur-des-...,9.9,L’hébergement Au Cœur des Remparts se trouve à...,43.565401,4.192973,1,Aigues-Mortes,43.566152,4.19154,19.544286,53.714286,5.901429,29.571429,1.0,6.610714
4,5,Chez Céline et Sébastien,https://www.booking.com/hotel/fr/chez-celine-e...,9.4,L’hébergement Chez Céline et Sébastien se situ...,43.570192,4.195081,1,Aigues-Mortes,43.566152,4.19154,19.544286,53.714286,5.901429,29.571429,1.0,6.610714


In [None]:
# Group data to compute the average weather score per city
top5_cities = df.groupby(['city_name', 'city_latitude', 'city_longitude'])['good_weather_score'] \
                .mean().reset_index()

# Sort cities by weather score (descending) and keep the top 5
top5_cities = top5_cities.sort_values(by='good_weather_score', ascending=False).head(5)

# Add a rounded score column for cleaner hover display
top5_cities['score'] = top5_cities['good_weather_score'].round(1)

# Create a scatter mapbox to visualize the top 5 cities
fig = px.scatter_mapbox(
    top5_cities,
    lat="city_latitude",           # latitude of the city
    lon="city_longitude",          # longitude of the city
    size="good_weather_score",     # circle size represents the score
    color="good_weather_score",    # color scale represents the score
    hover_name="city_name",        # city name on hover
    hover_data={"score": True, "good_weather_score": False},  # display rounded score only
    color_continuous_scale=px.colors.sequential.Rainbow,
    zoom=4,
    height=600
)

# Customize layout: map style, centered title, and styled color bar
fig.update_layout(
    mapbox_style="carto-positron",
    title={
        'text': "Top 5 Cities by Weather Score",
        'x': 0.5,
        'xanchor': 'center'
    },
    coloraxis_colorbar=dict(
        title="Weather Score",
        tickformat=".0f",
        ticks="outside"
    ),
    margin=dict(l=0, r=0, t=50, b=0)  # remove extra margins
)

# Display the interactive map
fig.show()
