In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import *

def Pure_Factor_Returns(t, Style,industry):
    # Construct the industry dummy variable matrix using Shenwan industry classification
    ind = industry[industry['date'] == pd.to_datetime('2022-12-30')]
    ind = ind[['code','industry']]
    ind['cons'] = 1
    Industry = ind.pivot_table(index='code', columns='industry', values='cons').fillna(0.)

    # The country factor exposure, which is set to 1 for all entries.
    Style = Style.loc[Style.index.get_level_values('date') == t]
    Style = Style.droplevel(1).sort_index()
    Country = pd.Series(1., index=Style.index, name='Country')
    
    # Align the Industry and Style dataframes
    Industry, Style = Industry.align(Style, join='inner', axis=0)
    X = pd.concat([Country, Industry, Style], axis=1)
    X = X.dropna()
    
    # Heteroscedasticity adjustment and WLS weight
    if pd.to_datetime(t) not in dates[0].values:
        index_end = dates[0][dates[0] < pd.to_datetime(t)].idxmax()
    else:
        index_end =  dates[dates[0] == pd.to_datetime(t)].index.item()
    s = dates[0].iloc[index_end - 1]    
    cap = dqmv_long
    val = cap[(s < cap['date']) & (cap['date'] <= pd.to_datetime(t))]
    val = val[['code','Circulating Market Cap']]
    val.set_index('code', inplace=True)
    V = val.align(X, axis=0, join='right')[0].fillna(0.)
    scale_data = np.sqrt(V) / np.sum(np.sqrt(V))
    scale_data = scale_data['Circulating Market Cap'].values
    V = pd.DataFrame(np.diag(scale_data), index=V.index, columns=V.index)
    
    # Calculate industry weight
    ind = pd.merge(ind, val, on = 'code', how = 'outer')
    industry_weights = ind.groupby('industry')['Circulating Market Cap'].sum() / ind['Circulating Market Cap'].sum()
    industry_weights = industry_weights[Industry.columns]

    # Compute the constraint matrix R
    k = len(X.columns)
    diag_R = np.diag(np.ones(k))
    location = len(industry_weights)
    R = np.delete(diag_R, location, axis=1)
    adj_industry_weights = -industry_weights.div(industry_weights.iloc[-1]).iloc[:-1]
    R[location, 1:location] = adj_industry_weights.values

    # Calculate the factor weights
    W = R@np.linalg.pinv(R.T@X.T@V@X@R)@R.T@X.T@V
    W.index = X.columns

    # Calculate the pure factor returns
    # Filtered price data required
    price = cl_long
    start_date = pd.to_datetime(t) + pd.Timedelta(days=1)
    if start_date not in dates[0].values:
        index_start = dates[0][dates[0] > start_date].idxmin()
        start_date = dates[0][index_start]
    else:
        index_start =  dates[dates[0] == pd.to_datetime(t)].index.item()
    end_date = start_date + pd.Timedelta(days=1)
    
    price = price[(start_date <= price['date']) & (price['date'] < end_date)]
    price['r'] = price.eval('close/pre_close - 1')

    r = price.set_index('code')['r']
    r = r.align(X, join='right')[0].fillna(0.)
    factor_return = W.dot(r).to_frame(name=t).T
    
    return factor_return
