# Module 2 – Python for Data Work Assessment (Instructor Version)

**Instructor / Grading Template**

This notebook contains **hidden assessment logic** and must NOT be shared with students.

Purpose:
- Inject student code programmatically
- Run automated tests
- Produce authoritative scores

**Assessment Data Sources:**
Students must download real data from:
1. DJIA from WSJ or Yahoo Finance -> `djia_data.csv`
2. USD/GBP from FRED (DEXUSUK) -> `fx_usd_gbp.csv`  
3. Federal Funds Rate from FRED (FEDFUNDS) -> `fed_funds_rate.csv`

**Total Points:** 100 (20 points per task)

In [None]:
# === HIDDEN: SCORING SETUP ===
__assessment_scores = {}
__assessment_feedback = {}

def record_score(exercise, points, max_points, feedback=None):
    __assessment_scores[exercise] = (points, max_points)
    if feedback:
        __assessment_feedback[exercise] = feedback

In [None]:
# === HIDDEN: IMPORTS FOR TESTING ===
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.set_option('display.max_columns', 10)
sns.set_style('whitegrid')

## Task 1 — Load & Inspect DJIA (20 points)

In [None]:
# === HIDDEN TEST: Task 1 ===
# Tests loading REAL DJIA data from djia_data.csv
points = 0
feedback = []

# Helper to find column by normalized name
def find_col(df, name):
    for c in df.columns:
        if c.strip().lower() == name.lower():
            return c
    return None

try:
    # Check djia_df exists
    assert 'djia_df' in globals() or 'djia_df' in dir(), "djia_df not defined"
    points += 5
    feedback.append("✓ djia_df variable created")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Error checking djia_df: {type(e).__name__}")

try:
    # Check required columns (flexible: strip whitespace, case-insensitive)
    required_cols = {'date', 'open', 'high', 'low', 'close'}
    actual_cols_normalized = {c.strip().lower() for c in djia_df.columns}
    missing = required_cols - actual_cols_normalized
    assert not missing, f"Missing columns: {missing}. Have: {list(djia_df.columns)}"
    points += 5
    feedback.append("✓ All required columns present (Date, Open, High, Low, Close)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except NameError:
    feedback.append("✗ Cannot check columns - djia_df not defined")
except Exception as e:
    feedback.append(f"✗ Column check error: {type(e).__name__}")

try:
    # Check data is sorted by date (oldest first)
    date_col = find_col(djia_df, 'date')
    dates = pd.to_datetime(djia_df[date_col])
    assert dates.is_monotonic_increasing, "Data not sorted by date (oldest first)"
    points += 5
    feedback.append("✓ Data sorted by date (oldest first)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Date sorting check failed: {type(e).__name__}")

try:
    # Check Date is datetime type and data has reasonable size
    date_col = find_col(djia_df, 'date')
    assert pd.api.types.is_datetime64_any_dtype(djia_df[date_col]), "Date column not converted to datetime"
    assert len(djia_df) >= 100, f"Only {len(djia_df)} rows - expected 100+ rows of data"
    points += 5
    feedback.append(f"✓ Date is datetime type, {len(djia_df)} rows loaded")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Date type check failed: {type(e).__name__}")

record_score('Task 1', points, 20, feedback)

## Task 2 — Cleaning & Feature Engineering (20 points)

In [None]:
# === HIDDEN TEST: Task 2 ===
# Tests cleaning and feature engineering on real DJIA data
points = 0
feedback = []

# Helper to find column by normalized name
def find_col(df, name):
    for c in df.columns:
        if c.strip().lower() == name.lower():
            return c
    return None

try:
    # Check Date is datetime
    date_col = find_col(djia_df, 'date')
    assert pd.api.types.is_datetime64_any_dtype(djia_df[date_col]), "Date column not datetime type"
    points += 5
    feedback.append("✓ Date column is datetime type")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except NameError:
    feedback.append("✗ djia_df not defined - complete Task 1 first")
except Exception as e:
    feedback.append(f"✗ Date type check failed: {type(e).__name__}")

try:
    # Check Daily_Return column exists
    dr_col = find_col(djia_df, 'daily_return')
    assert dr_col is not None, "Daily_Return column not found - did you create it?"
    points += 5
    feedback.append("✓ Daily_Return column created")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Daily_Return check failed: {type(e).__name__}")

try:
    # Check Daily_Return has values
    dr_col = find_col(djia_df, 'daily_return')
    non_null_count = djia_df[dr_col].notna().sum()
    total_rows = len(djia_df)
    assert non_null_count >= total_rows - 5, f"Daily_Return has too many NaN values ({total_rows - non_null_count} missing)"
    points += 5
    feedback.append(f"✓ Daily_Return has {non_null_count} valid values")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Daily_Return values check failed: {type(e).__name__}")

try:
    # Check Daily_Return is reasonable
    dr_col = find_col(djia_df, 'daily_return')
    mean_abs_return = djia_df[dr_col].abs().mean()
    assert 0.01 < mean_abs_return < 5, f"Daily_Return values seem wrong (mean abs = {mean_abs_return:.2f}%)"
    points += 5
    feedback.append(f"✓ Daily_Return values reasonable (mean abs = {mean_abs_return:.2f}%)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Daily_Return validation failed: {type(e).__name__}")

record_score('Task 2', points, 20, feedback)

## Task 3 — Visual Analysis (20 points)

In [None]:
# === HIDDEN TEST: Task 3 ===
# Visual tasks - award points for having the data ready for plotting
points = 0
feedback = []

# Helper to find column by normalized name
def find_col(df, name):
    for c in df.columns:
        if c.strip().lower() == name.lower():
            return c
    return None

try:
    # Check djia_df exists with Close column for plotting
    assert 'djia_df' in globals(), "djia_df not defined"
    close_col = find_col(djia_df, 'close')
    assert close_col is not None, "Close column not found for time-series plot"
    assert len(djia_df) > 0, "djia_df is empty"
    points += 10
    feedback.append("✓ Close price data available for time-series plot")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Close column check failed: {type(e).__name__}")

try:
    # Check Daily_Return exists for histogram
    dr_col = find_col(djia_df, 'daily_return')
    assert dr_col is not None, "Daily_Return column not found for histogram"
    valid_count = djia_df[dr_col].notna().sum()
    assert valid_count > 0, "Daily_Return has no valid values for histogram"
    points += 10
    feedback.append(f"✓ Daily_Return data available for histogram ({valid_count} values)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Daily_Return check failed: {type(e).__name__}")

record_score('Task 3', points, 20, feedback)

## Task 4 — Multi-Dataset Analysis (20 points)

In [None]:
# === HIDDEN TEST: Task 4 ===
# Tests loading REAL FX data from FRED (fx_usd_gbp.csv)
points = 0
feedback = []

# Helper to find column by normalized name
def find_col(df, name):
    for c in df.columns:
        if c.strip().lower() == name.lower():
            return c
    return None

try:
    # Check fx_df exists
    assert 'fx_df' in globals(), "fx_df not defined - did you load fx_usd_gbp.csv?"
    points += 5
    feedback.append("✓ fx_df variable created")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ fx_df check failed: {type(e).__name__}")

try:
    # Check required columns
    date_col = find_col(fx_df, 'date')
    usd_col = find_col(fx_df, 'usd_gbp')
    fx_ret_col = find_col(fx_df, 'fx_return')
    missing = []
    if date_col is None: missing.append('Date')
    if usd_col is None: missing.append('USD_GBP')
    if fx_ret_col is None: missing.append('FX_Return')
    assert not missing, f"Missing columns: {missing}"
    points += 5
    feedback.append("✓ All required columns present (Date, USD_GBP, FX_Return)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except NameError:
    feedback.append("✗ fx_df not defined")
except Exception as e:
    feedback.append(f"✗ Column check failed: {type(e).__name__}")

try:
    # Check USD_GBP values are reasonable
    usd_col = find_col(fx_df, 'usd_gbp')
    mean_rate = fx_df[usd_col].mean()
    assert 1.0 < mean_rate < 1.6, f"USD_GBP mean={mean_rate:.3f} outside expected range (1.0-1.6)"
    points += 5
    feedback.append(f"✓ USD_GBP values reasonable (mean = {mean_rate:.3f})")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ USD_GBP validation failed: {type(e).__name__}")

try:
    # Check Date is datetime and data size
    date_col = find_col(fx_df, 'date')
    assert pd.api.types.is_datetime64_any_dtype(fx_df[date_col]), "Date column not datetime type"
    assert len(fx_df) >= 50, f"Only {len(fx_df)} rows - expected 50+ rows of FX data"
    points += 5
    feedback.append(f"✓ Date is datetime type, {len(fx_df)} rows loaded")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Date/size check failed: {type(e).__name__}")

record_score('Task 4', points, 20, feedback)

## Task 5 — Macro Insight (20 points)

In [None]:
# === HIDDEN TEST: Task 5 ===
# Tests loading REAL Fed Funds Rate data from FRED (fed_funds_rate.csv)
points = 0
feedback = []

# Helper to find column by normalized name
def find_col(df, name):
    for c in df.columns:
        if c.strip().lower() == name.lower():
            return c
    return None

try:
    # Check rates_df exists
    assert 'rates_df' in globals(), "rates_df not defined - did you load fed_funds_rate.csv?"
    points += 5
    feedback.append("✓ rates_df variable created")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ rates_df check failed: {type(e).__name__}")

try:
    # Check FEDFUNDS column exists and has valid values
    ff_col = find_col(rates_df, 'fedfunds')
    assert ff_col is not None, "FEDFUNDS column not found"
    max_rate = rates_df[ff_col].max()
    assert 0 < max_rate < 10, f"FEDFUNDS max={max_rate:.2f}% outside expected range (0-10%)"
    points += 5
    feedback.append(f"✓ FEDFUNDS column present (max rate = {max_rate:.2f}%)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except NameError:
    feedback.append("✗ rates_df not defined")
except Exception as e:
    feedback.append(f"✗ FEDFUNDS check failed: {type(e).__name__}")

try:
    # Check Date is datetime and data size
    date_col = find_col(rates_df, 'date')
    assert pd.api.types.is_datetime64_any_dtype(rates_df[date_col]), "Date column not datetime type"
    assert len(rates_df) >= 12, f"Only {len(rates_df)} rows - expected 12+ months of data"
    points += 5
    feedback.append(f"✓ Date is datetime type, {len(rates_df)} months of data")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Date/size check failed: {type(e).__name__}")

try:
    # Check analysis_text has substantive content
    assert 'analysis_text' in globals(), "analysis_text variable not defined"
    text_length = len(analysis_text.strip())
    assert text_length > 200, f"Analysis too short ({text_length} chars) - need 5-8 meaningful sentences"
    assert "YOUR ANALYSIS HERE" not in analysis_text, "Replace placeholder text with your analysis"
    assert "YOUR ANSWER HERE" not in analysis_text, "Replace placeholder text with your analysis"
    points += 5
    feedback.append(f"✓ Analysis text provided ({text_length} characters)")
except AssertionError as e:
    feedback.append(f"✗ {e}")
except Exception as e:
    feedback.append(f"✗ Analysis text check failed: {type(e).__name__}")

record_score('Task 5', points, 20, feedback)

In [None]:
# === HIDDEN: WRITE RESULTS ===
import json
import datetime

result = {
    'scores': __assessment_scores,
    'feedback': __assessment_feedback,
    'timestamp': datetime.datetime.now().isoformat()
}

with open('assessment_result.json', 'w') as f:
    json.dump(result, f, indent=2)

print("Assessment Results:")
total = sum(s[0] for s in __assessment_scores.values())
max_total = sum(s[1] for s in __assessment_scores.values())
for task, (pts, max_pts) in __assessment_scores.items():
    print(f"  {task}: {pts}/{max_pts}")
    if task in __assessment_feedback:
        for fb in __assessment_feedback[task]:
            print(f"    {fb}")
print(f"\nTotal: {total}/{max_total}")