In [1]:
pip install mrjob

Defaulting to user installation because normal site-packages is not writeable
Collecting mrjob
  Downloading mrjob-0.7.4-py2.py3-none-any.whl.metadata (7.3 kB)
Downloading mrjob-0.7.4-py2.py3-none-any.whl (439 kB)
Installing collected packages: mrjob
Successfully installed mrjob-0.7.4
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [29]:
import pandas as pd
from collections import defaultdict

# Load dataset
df = pd.read_csv('weather_data.csv')

# Print columns to debug
print(df.columns)

# Ensure no leading/trailing spaces in column names
df.columns = df.columns.str.strip()

# Extract year and temperature
df['Year'] = pd.to_datetime(df['Date_Time']).dt.year
df['Temperature'] = df['Temperature_C'].astype(float)

# Mapper: Emit (year, temperature)
mapped_data = df[['Year', 'Temperature']].values

# Reducer: Aggregate by year and compute average temperature
yearly_temps = defaultdict(list)
for year, temp in mapped_data:
    yearly_temps[year].append(temp)

avg_temps = {year: sum(temps)/len(temps) for year, temps in yearly_temps.items()}

# Find hottest and coolest years
hottest_year = max(avg_temps, key=avg_temps.get)
coolest_year = min(avg_temps, key=avg_temps.get)

print(f"Coolest Year: {hottest_year} with Min Temp: {df['Temperature'].min()}°C")
print(f"Hottest Year: {coolest_year} with Max Temp: {df['Temperature'].max()}°C")


Index(['Location', 'Date_Time', 'Temperature_C', 'Humidity_pct',
       'Precipitation_mm', 'Wind_Speed_kmh'],
      dtype='object')
Coolest Year: 2024.0 with Min Temp: -19.96931109358452°C
Hottest Year: 2024.0 with Max Temp: 39.99980055990208°C
