In [1]:
import pandas as pd
import statsmodels.api as sm
import numpy as np
from scipy.optimize import minimize
import pulp
from function_helper import *

df = pd.read_csv('project data.csv',skipfooter=2).dropna()
df = df.rename({'ï»¿date':'date'},axis=1)
df = df[df['date']>199612]
df['date'] = df['date'].astype(str)

In [2]:
prices = pd.read_csv('monthly port prices.gz',compression='gzip',usecols=['date','TICKER','RETX'])
prices['date'] = prices['date'].apply(lambda x:str(x)[:6])

In [3]:
temp_merge = df
for key in prices.groupby('TICKER').groups.keys():
    temp_merge = temp_merge.merge(prices.groupby('TICKER').get_group(key)[['date','RETX']],on='date',how='left')
    temp_merge = temp_merge.rename({'RETX':key},axis=1)


In [4]:
final_df = temp_merge[['date','T-Bill','RMRF','SMB','HML','UMD','AMGN','PFE','TECH','REGN','GILD','BMY','IART','MRK','LLY','ALXN','PRGO','MYL','IDXX','IMGN','BMRN','TEVA']]
final_df = final_df.dropna(how='all')
final_df = final_df.replace({'C':None})
final_df = final_df.astype(float)
final_df['date'] = pd.to_datetime(final_df['date'],format='%Y%m').dt.to_period('M')
final_df = final_df.set_index('date')

In [21]:
import numpy as np
port_holds = pd.read_csv('final_port.csv').drop('Unnamed: 0',axis=1)
FF_factors = final_df[['RMRF','SMB','HML','UMD']]
excess_returns = [final_df[port_holds.values[idx][1:]].subtract(final_df['T-Bill'],axis=0) for idx in range(len(port_holds))]

tables = [[df.loc[:str(idx)+'-12'],FF_factors[:str(idx)+'-12']] for df,idx in zip(excess_returns,range(2006,2017))]


In [22]:
ols_list = ols(tables[0][0],tables[0][1]) 

In [23]:
def optimize_1(returns_df,no_securities,risk_factors_df,ols_list, target_return=0.1):

    init_guess = np.array(no_securities*[1./no_securities,])

    bounds = ((-1.5, 1.5),) * no_securities

    weights = minimize(factor_portfolio_variance, init_guess,
                       args=(risk_factors_df,ols_list,), method='SLSQP',
                       options={'disp': False},
                       constraints=({'type': 'eq', 'fun': lambda inputs: np.sum(inputs)-1}))
                                    #{'type': 'ineq', 
                                    #'args': (returns_df,),
                                    #'fun': lambda inputs, returns_df,:portfolio_return(weights=inputs,returns_df=returns_df)-target_return}))
                                    #{'type':'eq',
                                    #'args':(risk_factors_df,ols_list,),
                                    #'fun':lambda inputs,risk_factors_df,ols_list:factor_portfolio_variance(weights=inputs,risk_factors_df=risk_factors_df,ols_list=ols_list)}
                                    #),bounds=bounds)
    return weights.x

opt_weights = optimize_1(returns_df = tables[0][0],
        no_securities=4,
        risk_factors_df=tables[0][1],
        ols_list=ols_list,
        target_return=1/12)

In [24]:
def get_answer(table):
    weights = optimize_1(returns_df = table[0],no_securities=4,risk_factors_df=table[1],ols_list=ols(table[0],table[1]), target_return=1/12)
    return display_answer(table[1],ols_list,table[1],weights)

In [25]:
ols_tables = [ols(table[0],table[1]) for table in tables]

In [27]:
pd.concat([port_holds,pd.DataFrame([get_answer(table) for table in tables])],axis=1).to_csv('fama_french_analysis.csv')