# IS 362 - Project 2: Climate Data
## Dataset 3

Source: Climate comparison data for Boston and New York City

## Loading the Data

This is in wide format with months as columns.

In [None]:
import pandas as pd
import numpy as np

# Climate data for Boston
boston_data = {
    'City': 'Boston',
    'Metric': ['Avg Temp', 'Precipitation'],
    'Jan': [29, 4.3],
    'Feb': [32, 3.9],
    'Mar': [42, 4.5],
    'Apr': [53, 4.4],
    'May': [64, 3.6],
    'Jun': [74, 3.5],
    'Jul': [80, 3.2],
    'Aug': [78, 3.2],
    'Sep': [71, 3.6],
    'Oct': [60, 4.0],
    'Nov': [47, 4.5],
    'Dec': [37, 4.6]
}

# NYC data
nyc_data = {
    'City': 'NYC',
    'Metric': ['Avg Temp', 'Precipitation'],
    'Jan': [32, 3.2],
    'Feb': [35, 3.1],
    'Mar': [46, 3.5],
    'Apr': [57, 4.0],
    'May': [68, 4.3],
    'Jun': [78, 3.8],
    'Jul': [84, 4.2],
    'Aug': [83, 4.0],
    'Sep': [76, 3.9],
    'Oct': [65, 3.3],
    'Nov': [50, 3.7],
    'Dec': [39, 3.5]
}

df_boston = pd.DataFrame([boston_data['City']] * 2, columns=['City'])
df_boston['Metric'] = boston_data['Metric']
for month in ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']:
    df_boston[month] = [boston_data[month][0], boston_data[month][1]]

df_nyc = pd.DataFrame([nyc_data['City']] * 2, columns=['City'])
df_nyc['Metric'] = nyc_data['Metric']
for month in ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']:
    df_nyc[month] = [nyc_data[month][0], nyc_data[month][1]]

df = pd.concat([df_boston, df_nyc], ignore_index=True)

print("Data shape:", df.shape)
print("\nBoston data:")
print(df_boston)
print("\nNYC data:")
print(df_nyc)

## Cleaning the Data

Data has months as columns. Need to convert to long format with each month as a row.

In [None]:
df_clean = df.copy()

# Check ranges
month_cols = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
temp_data = df_clean[df_clean['Metric'] == 'Avg Temp'][month_cols]
precip_data = df_clean[df_clean['Metric'] == 'Precipitation'][month_cols]

print("Temperature range:", temp_data.min().min(), "to", temp_data.max().max(), "°F")
print("Precipitation range:", precip_data.min().min(), "to", precip_data.max().max(), "inches")
print("\nNo null values:", df_clean.isnull().sum().sum() == 0)

## Converting to Long Format

In [None]:
# Melt to long format
df_long = pd.melt(
    df_clean,
    id_vars=['City', 'Metric'],
    value_vars=month_cols,
    var_name='Month',
    value_name='Value'
)

print("New shape:", df_long.shape)
print("\nFirst 15 rows:")
print(df_long.head(15))

## Analysis

In [None]:
# Temperature comparison
print("Average temperature by city:")
temp_by_city = df_long[df_long['Metric'] == 'Avg Temp'].groupby('City')['Value'].mean()
print(temp_by_city)

print("\nTemperature range by city:")
for city in ['Boston', 'NYC']:
    city_temp = df_long[(df_long['City'] == city) & (df_long['Metric'] == 'Avg Temp')]['Value']
    print(f"{city}: {city_temp.min()}°F to {city_temp.max()}°F")

In [None]:
# Precipitation
print("Total annual precipitation:")
for city in ['Boston', 'NYC']:
    precip = df_long[(df_long['City'] == city) & (df_long['Metric'] == 'Precipitation')]['Value'].sum()
    print(f"{city}: {precip:.1f} inches")

print("\nAverage monthly precipitation:")
precip_by_city = df_long[df_long['Metric'] == 'Precipitation'].groupby('City')['Value'].mean()
print(precip_by_city)

In [None]:
# Pivot to see side-by-side
print("Temperature comparison by month:")
temp_pivot = df_long[df_long['Metric'] == 'Avg Temp'].pivot(index='Month', columns='City', values='Value')
print(temp_pivot)

print("\n" + "-"*40)
print("\nPrecipitation comparison by month:")
precip_pivot = df_long[df_long['Metric'] == 'Precipitation'].pivot(index='Month', columns='City', values='Value')
print(precip_pivot)

## Summary

**Transformations:**
- Converted from wide format (months as columns) to long format (months as rows)
- Added month and metric identifiers
- Data spans both temperature and precipitation

**Key Differences:**
- NYC is warmer on average than Boston
- Boston gets more precipitation annually
- Both cities follow similar seasonal patterns (cold in winter, hot in summer)
- Precipitation is fairly consistent throughout the year for both cities

**Notes:**
- Temperature in Fahrenheit, Precipitation in inches
- Data represents climate normals (long-term averages)
- All values within expected ranges