In [29]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import kagglehub


In [30]:

# Download latest version
path = kagglehub.dataset_download("berkeleyearth/climate-change-earth-surface-temperature-data")

print("Path to dataset files:", path)

Path to dataset files: /Users/danielhung/.cache/kagglehub/datasets/berkeleyearth/climate-change-earth-surface-temperature-data/versions/2


In [25]:
import os
import pandas as pd

# Assume `path` is from kagglehub
city_file = os.path.join(path, "GlobalLandTemperaturesByCity.csv")
country_file = os.path.join(path, "GlobalLandTemperaturesByCountry.csv")
global_file = os.path.join(path, "GlobalTemperatures.csv")

# Load datasets
df_city = pd.read_csv(city_file)
df_country = pd.read_csv(country_file)
df_global = pd.read_csv(global_file)

# Convert 'dt' column to datetime
df_city['dt'] = pd.to_datetime(df_city['dt'])
df_country['dt'] = pd.to_datetime(df_country['dt'])
df_global['dt'] = pd.to_datetime(df_global['dt'])

# Drop rows with missing temperature values
df_city_clean = df_city.dropna(subset=['AverageTemperature'])
df_country_clean = df_country.dropna(subset=['AverageTemperature'])
df_global_clean = df_global.dropna(subset=['LandAverageTemperature'])

# Filter for data from 1900 onwards (optional)
df_city_clean = df_city_clean[df_city_clean['dt'].dt.year >= 1900]
df_country_clean = df_country_clean[df_country_clean['dt'].dt.year >= 1900]
df_global_clean = df_global_clean[df_global_clean['dt'].dt.year >= 1900]

In [26]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_global_clean['dt'],
    y=df_global_clean['LandAverageTemperature'],
    mode='lines',
    name='Global Avg Temp'
))

fig.update_layout(
    title='🌍 Global Land Average Temperature Over Time',
    xaxis_title='Year',
    yaxis_title='Temperature (°C)',
    template='plotly_dark'
)

fig.show()

In [31]:
df_global_clean.head()

Unnamed: 0,dt,LandAverageTemperature,LandAverageTemperatureUncertainty,LandMaxTemperature,LandMaxTemperatureUncertainty,LandMinTemperature,LandMinTemperatureUncertainty,LandAndOceanAverageTemperature,LandAndOceanAverageTemperatureUncertainty
1800,1900-01-01,1.461,0.276,7.193,0.465,-4.102,0.395,13.142,0.142
1801,1900-02-01,3.098,0.416,9.181,0.604,-2.814,0.626,13.777,0.173
1802,1900-03-01,5.492,0.261,11.377,0.327,-0.68,0.61,14.4,0.141
1803,1900-04-01,8.223,0.292,13.972,0.342,2.131,0.394,15.17,0.151
1804,1900-05-01,11.385,0.357,17.415,0.329,5.179,0.379,15.955,0.159


In [27]:
usa_df = df_country_clean[df_country_clean['Country'] == 'United States']

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=usa_df['dt'],
    y=usa_df['AverageTemperature'],
    mode='lines',
    name='USA Avg Temp'
))

fig.update_layout(
    title='🇺🇸 Average Temperature in the United States',
    xaxis_title='Year',
    yaxis_title='Temperature (°C)',
    template='plotly_white'
)

fig.show()

In [28]:
cities = ['London', 'New York', 'Tokyo']
filtered = df_city_clean[df_city_clean['City'].isin(cities)]

fig = go.Figure()

for city in cities:
    city_data = filtered[filtered['City'] == city]
    fig.add_trace(go.Scatter(
        x=city_data['dt'],
        y=city_data['AverageTemperature'],
        mode='lines',
        name=city
    ))

fig.update_layout(
    title='🏙️ Temperature Comparison: London, New York, Tokyo',
    xaxis_title='Year',
    yaxis_title='Temperature (°C)',
    template='plotly'
)

fig.show()