In [17]:
from dataset import WeatherDataset, HappinessDataset
import logging
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import os

In [18]:
WEATHER_DATA_PATH = 'data/temperature_means.csv'

In [19]:
# Processed data for weather

if os.path.exists(WEATHER_DATA_PATH):
    temperature_means = pd.read_csv(WEATHER_DATA_PATH)
else:
    
    # Preprocess
    WEATHER_DATA = WeatherDataset.from_kaggle()
    WEATHER_DATA._data['year'] = pd.to_datetime(WEATHER_DATA._data['date']).dt.year
    weather_data = WEATHER_DATA._data[WEATHER_DATA._data['year'].isin(list(map(int,HAPPY_DATA.get_years())))]
    temperature_means = weather_data.groupby(['city_name'])['avg_temp_c'].mean().reset_index()

    
    from  geopy.geocoders import Nominatim

    geolocator = Nominatim(user_agent="happiness_analysis_cuni")

    # Function to retrieve geolocation data
    def get_location_data(city_name):
        try:
            location = geolocator.geocode(city_name)
            if location:
                return location.latitude, location.longitude, location.altitude
            else:
                return None, None, None
        except:
            return None, None, None
        

    city_coords = [get_location_data(city) for city in temperature_means['city_name'].values]
    temperature_means[['latitude', 'longitude', 'altitude']] = city_coords
    temperature_means.to_csv(WEATHER_DATA_PATH, index=False)

city_df = temperature_means.copy()

In [20]:
city_df.head()

Unnamed: 0,city_name,avg_temp_c,latitude,longitude,altitude
0,Aalborg,9.608488,57.046263,9.921526,0.0
1,Abakan,3.174583,53.72068,91.440602,0.0
2,Abha,19.940576,18.216428,42.50436,0.0
3,Abidjan,27.337729,5.320357,-4.016107,0.0
4,Aboisso,26.82087,5.712626,-3.20761,0.0


In [21]:
HAPPY_DATA = HappinessDataset.from_kaggle()


happiness_data_frames = []

# Step 2: Loop through each year available in the happiness dataset
for year in HAPPY_DATA.get_years():
    # Retrieve the DataFrame for the specific year
    year_df = HAPPY_DATA[year].copy()  # Accessing each year's data directly
    year_df['year'] = int(year)  # Adding the 'year' column for merging
    happiness_data_frames.append(year_df)
happiness_data_df = pd.concat(happiness_data_frames, ignore_index=True)




In [22]:
happiness_data_df.head()

Unnamed: 0,country,region,happiness_score,gdp_per_capita,social_support,healthy_life_expectancy,freedom_to_make_life_choices,generosity,perceptions_of_corruption,year
0,Switzerland,Western Europe,7.587,1.39651,1.34951,0.94143,0.66557,0.29678,0.41978,2015
1,Iceland,Western Europe,7.561,1.30232,1.40223,0.94784,0.62877,0.4363,0.14145,2015
2,Denmark,Western Europe,7.527,1.32548,1.36058,0.87464,0.64938,0.34139,0.48357,2015
3,Norway,Western Europe,7.522,1.459,1.33095,0.88521,0.66973,0.34699,0.36503,2015
4,Canada,North America and ANZ,7.427,1.32629,1.32261,0.90563,0.63297,0.45811,0.32957,2015


In [23]:
import plotly.express as px

# Plot with Plotly Express
fig = px.scatter_mapbox(
    city_df,
    lat="latitude",
    lon="longitude",
    color="avg_temp_c",  # Color by average temperature
    hover_name="city_name",
    hover_data={"latitude": False, "longitude": False, "altitude": True},
    color_continuous_scale="RdYlBu_r",  # Red-to-blue color scale
    size_max=15,
    zoom=1  # Adjust zoom level as needed
)

# Map style
fig.update_layout(
    mapbox_style="open-street-map",
    title="Average Temperature by City"
)

# Display map
fig.show()


### Temperature and happiness


In [24]:
import plotly.express as px
import plotly.graph_objects as go

# Choropleth Map for Happiness Scores
fig = px.choropleth(
    happiness_data_df,
    locations="country",
    locationmode="country names",
    color="happiness_score",
    hover_name="country",
    hover_data={
        "gdp_per_capita": True,
        "social_support": True,
        "healthy_life_expectancy": True,
        "freedom_to_make_life_choices": True,
        "generosity": True,
        "perceptions_of_corruption": True,
        "year": True,
    },
    color_continuous_scale="YlGnBu",
    title="Global Happiness Score and Average Temperature by City (Bigger red circle, the higher temperature)"
)

# Scatter Overlay for Temperature Data
fig.add_trace(
    go.Scattergeo(
        lon=city_df['longitude'],
        lat=city_df['latitude'],
        text=city_df['city_name'] + '<br>Avg Temp: ' + city_df['avg_temp_c'].astype(str) + "°C",
        marker=dict(
            size=(city_df['avg_temp_c'] / 1.4).clip(lower=5),  # Use absolute value to avoid negatives
            color='red',
            opacity=0.7
        ),
        name="Temperature"
    )
)

# Update layout for combined visualization
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True),
    legend=dict(title="Legend"),
    margin={"r":0,"t":50,"l":0,"b":0}
)

# Display combined map
fig.show()
