# Practical Exercise 2.02: Handling missing values

In [None]:
import pandas as pd
import numpy as np
import pandas_datareader.data as web
import datetime

# Define date range
start_date = datetime.datetime(2010, 1, 1)
end_date = datetime.datetime.today()

# Attempt to get S&P 500 from FRED
try:
    sp500 = web.DataReader("SP500", "fred", start_date, end_date)
    print("S&P 500 data retrieved from FRED")
except Exception as e:
    print(f"Error retrieving S&P 500 from FRED: {e}")

# Check for missing values
print("Missing values before treatment:\n", sp500.isnull().sum())

# Option 1: Forward-fill (repeat last valid value)
sp500_ffill = sp500.ffill()

# Option 2: Linear interpolation
sp500_interp = sp500.interpolate(method='linear')

# Option 3: Drop missing values
sp500_dropna = sp500.dropna()

# Show results
print("Missing values after forward-fill:", sp500_ffill.isnull().sum().sum())
print("Missing values after interpolation:", sp500_interp.isnull().sum().sum())
print("Missing values after dropping rows:", sp500_dropna.isnull().sum().sum())
