# Market Expectations In The Cross-Section Of Present Values
## FINM 32900 Final Project - Group 6

## Imports

In [None]:
import numpy as np
import pandas as pd
import sys
import os

from matplotlib import pyplot as plt
from settings import config
from pathlib import Path
import load_data
import regressions
import pull_CRSP_index
import pull_ken_french_data
from pandas_datareader.famafrench import get_available_datasets
import pandas_datareader.data as web

# Load environment variables
DATA_DIR = Path(config("DATA_DIR"))
OUTPUT_DIR = Path(config("OUTPUT_DIR"))
WRDS_USERNAME = config("WRDS_USERNAME")
START_DATE = config("START_DATE")
END_DATE = config("END_DATE")

## Acquire Data

### Step 1: Fama-French Data

We can initially investigate the available datasets with the following code:

In [None]:
get_available_datasets()

This gives us the full list of available Fama-French data sets. From the paper we are instructed that the "sets of predictor variables are 6, 25, and 100 book-to
market ratios of size- and value-sorted portfolios", which would be:

* 6_Portfolios_2x3
* 25_Portfolios_5x5
* 100_Portfolios_10x10

#### Pulling Ken French data

Using the pull_ken_french_excel function from the pull_ken_french_data.py file, we can pull the data and save ax excel files to the "_data" directory:

In [None]:
bm_6_portfolios = pull_ken_french_data.pull_ken_french_excel(dataset_name="6_Portfolios_2x3",
     data_dir=DATA_DIR,
     log=True,
     start_date=START_DATE,
     end_date=END_DATE)

bm_25_portfolios = pull_ken_french_data.pull_ken_french_excel(dataset_name="25_Portfolios_5x5",
     data_dir=DATA_DIR,
     log=True,
     start_date=START_DATE,
     end_date=END_DATE)

bm_100_portfolios = pull_ken_french_data.pull_ken_french_excel(dataset_name="100_Portfolios_10x10",
     data_dir=DATA_DIR,
     log=True,
     start_date=START_DATE,
     end_date=END_DATE)

We can then load the data using the load_ken_french function:

In [None]:
bm_6_portfolios = load_data.load_ken_french()
bm_6_portfolios.head()

### Step 2: CRSP Market Capitalization Weighted Index Data

Next, we need to acquire the CRSP return data for the market capitalization weighted index data:

In [None]:
crsp = pull_CRSP_index.pull_crsp_value_weighted_index()

And then load that data to see the data set:

In [None]:
crsp_loaded = load_data.load_crsp_index()
crsp_loaded.head()

# Partial Least Squares Regressions For Valuations vs Returns

Once we have the data sources identified and the data acquired, we need to set up the partial least squares regressions to regress the valuations (book-to-market ratios) vs the CRSP value weighted returns. We start with the in-sample series of regressions for the 6 portfolio monthly and annual data followed by the recursive forecast, as follows.

The academic paper provides guidance on how to set up the partial least squares (PLS) regressions, specifically:

"Let time indices represent months. Consider a forecast for the return rτ +12 that is realized over the 12-month period t + 1 to t + 12. First-stage regres-
sions have annual returns on the right-hand side, so the regression takes the form:

vi,τ = φ̂i,0 + φ̂i rτ +12 + ei,τ"

Then:

"The second-stage cross-section regressions are run for months 1, . . . , t − 12, and t. The data for these regressions are value ratios up to date t, and φ̂i , which is t-measurable. The factor estimates F̂1 , . . . , F̂t−12 are used for the third-stage regression, and the factor estimate for month t at F̂t is used to construct the out-of-sample forecast (not as an observation in the third-stage regression). The third-stage regression takes the form

rτ +12 = β0 + β F̂τ + uτ +12"

### 6 Portfolios

We can then run the regressions for the 6 portfolio monthly values as follows:

#### Monthly Regressions

```python
def regress():
    portfolio_datasets = {
        "6-Portfolios": "6_Portfolios_2x3"
    }
    results = {}
    for label, dataset_name in portfolio_datasets.items():
        print(f"\nProcessing {label} Data")
        in_sample = regressions.run_in_sample_pls(
            dataset_name=dataset_name, 
            weighting="BE_FYt-1_to_ME_June_t", 
            h=1, 
            end_date='1980-01-01')
        recursive = regressions.run_recursive_forecast(
            dataset_name=dataset_name, 
            weighting="BE_FYt-1_to_ME_June_t", 
            h=1, 
            start_train_date='1930-01-01', 
            end_train_date='1980-01-01',
            end_forecast_date='2011-01-01')
        regressions.display_results(label, in_sample, recursive)

        results[label] = {
            "R2 In-Sample": in_sample["third_model"].rsquared,  
            "R2 Out-of-Sample": recursive["R2_oos"]
        }

    return results

# Run function and extract summary DataFrame
results = regress()
summary_df_6_monthly = pd.DataFrame(results).T 

# Display the DataFrame
summary_df_6_monthly
```

Our function outputs the plot of the predicated out-of-sample values vs the observed values, as well as a table which is included in the "project.pdf" project summary paper.

```python
# Define the directory and file path
summary_tex_dir = "../reports/tables"
summary_tex_path = os.path.join(summary_tex_dir, "summary_table_6_monthly.tex")

# Ensure the directory exists
os.makedirs(summary_tex_dir, exist_ok=True)

# Export to LaTeX
summary_df_6_monthly.to_latex(summary_tex_path, index=True, float_format="%.6f")
```

### 6 Portfolios

And in a similar manner, the regressions for the 6 portfolio annual values:

#### Annual Regressions

```python
def regress_annual():
    portfolio_datasets = {
        "6-Portfolios": "6_Portfolios_2x3"
    }
    results = {}
    for label, dataset_name in portfolio_datasets.items():
        print(f"\n[Annual] Processing {label} Data")
        in_sample_results = regressions.run_in_sample_pls_annual(
            dataset_name=dataset_name,
            weighting="BE_FYt-1_to_ME_June_t",
            h=1,
            end_date='1980-01-01'
        )
        forecast_series, actual_series, R2_oos = regressions.run_recursive_forecast_annual(
            dataset_name=dataset_name,
            weighting="BE_FYt-1_to_ME_June_t",
            h=1,
            start_train_year=1930,
            end_train_year=1980,
            end_forecast_year=2010,
            n_components=1
        )
        regressions.display_results_annual(label, in_sample_results, forecast_series, actual_series, R2_oos)

        results[label] = {
            "R2 In-Sample": in_sample_results["third_model"].rsquared,  
            "R2 Out-of-Sample": R2_oos
        }
    
    return results

# Run function and extract summary DataFrame
results_annual = regress_annual()
summary_df_6_annual = pd.DataFrame(results_annual).T

# Display the DataFrame
summary_df_6_annual
```


once again, the plot with the predicted vs observed return values as well as the final in-sample and out-of-sample values are export for inclusion in the final summary paper.

```python
# Define the directory and file path
summary_tex_dir = "../reports/tables"
summary_tex_path = os.path.join(summary_tex_dir, "summary_table_6_annual.tex")

# Ensure the directory exists
os.makedirs(summary_tex_dir, exist_ok=True)

# Export to LaTeX
summary_df_6_annual.to_latex(summary_tex_path, index=True, float_format="%.6f")
```

### 25 Portfolios

We confinue with the regressions for the 25 portfolio monthly and annual values:

#### Monthly Regressions

```python
portfolio_datasets = {
    "25-Portfolios": "25_Portfolios_5x5"
}
```

#### Annual Regressions

```python
portfolio_datasets = {
    "25-Portfolios": "25_Portfolios_5x5"
}
```

### 100 Portfolios

Finally, the regressions for the 100 portfolio monthly and annual values:

#### Monthly Regressions

```python
portfolio_datasets = {
    "100-Portfolios": "100_Portfolios_10x10"
}
```

#### Annual Regressions

```python
portfolio_datasets = {
    "100-Portfolios": "100_Portfolios_10x10"
}
```