## Start Off by loading in the data that you are interested in

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (10, 4)

# If you see errors about missing packages in Colab:
# !pip -q install pandas numpy matplotlib plotly

In [None]:
# Explanation:
# - Goal: Run and understand the steps below
# - What you should check after running:
#   1) The output has the expected shape/columns
#   2) The values look reasonable (no obvious NaNs or impossible values)
#   3) Any figures have clear titles/labels and are saved to disk when required
#
# How to read this code:
# - Imports / configuration come first
# - Then we compute intermediate variables (feature engineering)
# - Then we summarize / visualize
# - Finally, we write a short interpretation in Markdown below the figure/table

from pandas_datareader import data as pdr

def fetch_fred_series(series_id: str, start="1990-01-01", end=None) -> pd.DataFrame:
    """Fetch one FRED series as a DataFrame with a datetime index."""
    if end is None:
        end = pd.Timestamp.today().strftime("%Y-%m-%d")
    s = pdr.DataReader(series_id, "fred", start, end)
    s.columns = [series_id]
    s.index = pd.to_datetime(s.index)  # Ensure date/time column is parsed correctly
    return s

def fetch_many(series_ids, start="1990-01-01"):
    dfs = [fetch_fred_series(s, start=start) for s in series_ids]
    return pd.concat(dfs, axis=1).sort_index()

def infer_freq(index: pd.DatetimeIndex) -> str:
    f = pd.infer_freq(index)
    if f is None:
        return "U"
    f = f.upper()
    if "Q" in f:
        return "Q"
    if "M" in f:
        return "M"
    return "U"

def to_period_end(df: pd.DataFrame, target: str) -> pd.DataFrame:
    # Default: use last observation within each period.
    if target == "M":
        return df.resample("M").last()
    if target == "Q":
        return df.resample("Q").last()
    raise ValueError("target must be 'M' or 'Q'")

def add_common_features(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    for c in out.columns:
        out[f"{c}_lag1"] = out[c].shift(1)
        out[f"{c}_diff1"] = out[c].diff(1)
        out[f"{c}_pct1"] = out[c].pct_change(1) * 100
        out[f"{c}_roll3"] = out[c].rolling(3).mean()
    return out

In [None]:
# ===========================
# STUDENT CHOICE (EDIT HERE)
# ===========================
# Choose 3â€“6 FRED series IDs relevant to your question.
# Search on https://fred.stlouisfed.org and copy the series ID.

series_ids = [
    "UNRATE",
    "CPIAUCSL",
    "FEDFUNDS"
]

# Choose your target variable (must be one of the series_ids)
target_id = "CPIAUCSL"

start_date = "1990-01-01"

In [None]:
df_raw = fetch_many(series_ids, start=start_date)

# Infer each column's native frequency
freqs = {c: infer_freq(df_raw[c].dropna().index) for c in df_raw.columns}  # Handle missing values
freqs

In [None]:
# Rule: if any series is quarterly, use quarterly for everything (safe when mixing).
use_freq = "Q" if any(v == "Q" for v in freqs.values()) else "M"
print("Using frequency:", use_freq)

df = to_period_end(df_raw, use_freq)

# drop rows where target is missing
df = df.dropna(subset=[target_id])  # Handle missing values

# Missing-value strategies:
df_complete = df.dropna()                 # simplest
df_ffill = df.fillna(method="ffill")      # common for time series

df_use = df_complete   # or df_ffill
df_use.head()

In [None]:
# Explanation:
# - Goal: Run and understand the steps below
# - What you should check after running:
#   1) The output has the expected shape/columns
#   2) The values look reasonable (no obvious NaNs or impossible values)
#   3) Any figures have clear titles/labels and are saved to disk when required
#
# How to read this code:
# - Imports / configuration come first
# - Then we compute intermediate variables (feature engineering)
# - Then we summarize / visualize
# - Finally, we write a short interpretation in Markdown below the figure/table

def missing_report(d):
    return d.isna().sum().sort_values(ascending=False)  # Handle missing values

missing_report(df_use), df_use.describe().T.head()

## ASSIGNMENT:

I want you guys to: 

make one plot in matplotlib with multiple variables as lines on dates

make a Scatterplot with a linear regression line

Make a plot that matches the residuals with the date

Make a graphic with multiple plots

Do the same thing in Plotly