# ✅ Checkpoint 02 — pandas Basics

**Goal**
- Load/create DataFrames, filter & sort, add computed columns, groupby/aggregate, and merge.

**Rules**
- Fill only where marked as `# TODO`
- Do not change test cells (🔒)
- Run all cells before submitting

**References**
- pandas docs: https://pandas.pydata.org/docs/


In [None]:
# 🔧 Setup
import numpy as np
import pandas as pd
from utils.grader import (
    check_array, check_value, check_dataframe_columns,
    check_series_index_values, check_len
)
np.random.seed(42)


In [None]:
# Small in-memory data we'll use throughout
data = {
    'city': ['Ann Arbor','Kalamazoo','Detroit','Grand Rapids','Lansing'],
    'temp_f': [68, 77, 59, 90, 82],
    'rain': [False, True, False, False, True],
    'date': pd.to_datetime(['2025-08-20','2025-08-20','2025-08-20','2025-08-20','2025-08-20'])
}


In [None]:
# Q1) Create a DataFrame 'df' from the dict 'data' with columns in order: city, temp_f, rain, date
# TODO: assign to variable 'df'
df = ...  # TODO

# 🔒 Test
check_dataframe_columns(df, ['city','temp_f','rain','date'])
check_value(df.iloc[0]['city'], 'Ann Arbor')
check_len(df, 5)


In [None]:
# Q2) Filter rows where rain == False, sort by temp_f descending, reset index → 'df_dry'
# TODO: assign to variable 'df_dry'
df_dry = ...  # TODO

# 🔒 Test
check_len(df_dry, 3)
check_value(df_dry.iloc[0]['temp_f'], 90)
check_dataframe_columns(df_dry, ['city','temp_f','rain','date'])


In [None]:
# Q3) Add a Celsius column: temp_c = round((temp_f - 32) * 5/9, 1)
# TODO: create 'temp_c' column on df
...

# 🔒 Test
check_value(float(df.loc[df['city']=='Grand Rapids','temp_c'].iloc[0]), round((90-32)*5/9,1))
check_dataframe_columns(df, ['city','temp_f','rain','date','temp_c'])


In [None]:
# Q4) Group by 'rain' and compute mean temp_c → 'avg_temp_by_rain' (Series indexed by rain boolean)
# TODO: assign to variable 'avg_temp_by_rain'
avg_temp_by_rain = ...  # TODO

# 🔒 Test (values checked approximately)
check_series_index_values(avg_temp_by_rain, {False, True})
mean_false = avg_temp_by_rain.loc[False]
mean_true = avg_temp_by_rain.loc[True]
check_value(round(float(mean_false),1), round(((68-32)*5/9 + (59-32)*5/9 + (90-32)*5/9)/3, 1))
check_value(round(float(mean_true),1), round(((77-32)*5/9 + (82-32)*5/9)/2, 1))


In [None]:
# Q5) Merge: create a DataFrame 'city_region' with columns city and region, then left-merge onto df → 'df_merged'
city_region = pd.DataFrame({
    'city': ['Ann Arbor','Kalamazoo','Detroit','Grand Rapids','Lansing'],
    'region': ['SE','SW','SE','W','C']
})
# TODO: left-merge on 'city' to produce df_merged
df_merged = ...  # TODO

# 🔒 Test
check_dataframe_columns(df_merged, ['city','temp_f','rain','date','temp_c','region'])
check_value(set(df_merged['region']), {'SE','SW','W','C'})


### ✅ Submit
- All tests above passed
- Save notebook and commit to your repo
