In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import cvxpy as cp
import plotly.graph_objects as go
from tqdm.auto import tqdm
import statsmodels.api as sm
import warnings
from datetime import datetime, timedelta
from joblib import Parallel, delayed
import requests
from bs4 import BeautifulSoup
from tqdm.auto import tqdm
import time
import csv
from alpha_vantage.fundamentaldata import FundamentalData
from alpha_vantage.timeseries import TimeSeries
import pickle


# print(plt.style.available) #list of available styles
#plt.style.use('ggplot')
# Configura el estilo de Seaborn para que los gráficos se vean más atractivos
sns.set(style="whitegrid")

plt.rcParams['figure.figsize'] = [16, 9]
plt.rcParams['figure.dpi'] = 100
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:

activos = ['ARSVX']

precios_fondo = yf.download(activos, interval='1mo')['Adj Close']


[*********************100%%**********************]  1 of 1 completed


In [3]:

retornos_fondo = np.log(precios_fondo).diff().dropna()


In [4]:
ff_factors = pd.read_csv('../data/F-F_Research_Data_Factors_monthly.csv', index_col=0, parse_dates=True)

ff_factors = ff_factors / 100
ff_factors

Unnamed: 0,Mkt-RF,SMB,HML,RF
1926-07-01,0.0296,-0.0256,-0.0243,0.0022
1926-08-01,0.0264,-0.0117,0.0382,0.0025
1926-09-01,0.0036,-0.0140,0.0013,0.0023
1926-10-01,-0.0324,-0.0009,0.0070,0.0032
1926-11-01,0.0253,-0.0010,-0.0051,0.0031
...,...,...,...,...
2023-07-01,0.0321,0.0208,0.0411,0.0045
2023-08-01,-0.0239,-0.0316,-0.0106,0.0045
2023-09-01,-0.0524,-0.0251,0.0152,0.0043
2023-10-01,-0.0319,-0.0387,0.0019,0.0047


In [5]:
datos_df = pd.concat([ff_factors, retornos_fondo], axis=1, join='inner')
datos_df.columns = list(ff_factors.columns) + ['FONDO']
datos_df.head(5)

Unnamed: 0,Mkt-RF,SMB,HML,RF,FONDO
2005-08-01,-0.0122,-0.0097,0.0132,0.003,0.007576
2005-09-01,0.0049,-0.0065,0.0071,0.0029,0.000943
2005-10-01,-0.0202,-0.0125,0.0042,0.0027,-0.031597
2005-11-01,0.0361,0.0099,-0.0116,0.0031,0.036298
2005-12-01,-0.0025,-0.0042,0.002,0.0032,0.023181


In [6]:
Y = datos_df.loc[:, 'FONDO']-datos_df['RF']
X = datos_df[['Mkt-RF', 'SMB', 'HML']]

X = sm.add_constant(X)

modelo = sm.OLS(Y, X).fit()

print(modelo.summary())


                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.576
Method:                 Least Squares   F-statistic:                     100.1
Date:                Sun, 07 Apr 2024   Prob (F-statistic):           1.17e-40
Time:                        17:21:57   Log-Likelihood:                 404.88
No. Observations:                 220   AIC:                            -801.8
Df Residuals:                     216   BIC:                            -788.2
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0009      0.003     -0.353      0.7

In [7]:
# Definir una función para analizar las fechas en el formato específico
dateparse = lambda x: pd.to_datetime(x, format='%Y%m')

# Leer el archivo CSV con el analizador de fechas personalizado
industries_ff_df = pd.read_csv('../data/5_Industry_Portfolios.csv', 
                               index_col=0, 
                               parse_dates=True, 
                               date_parser=dateparse)

In [8]:
industries_ff_df = industries_ff_df / 100
industries_ff_df

Unnamed: 0,Cnsmr,Manuf,HiTec,Hlth,Other
1926-07-01,0.0543,0.0273,0.0183,0.0177,0.0213
1926-08-01,0.0276,0.0233,0.0241,0.0425,0.0435
1926-09-01,0.0216,-0.0044,0.0106,0.0069,0.0029
1926-10-01,-0.0390,-0.0242,-0.0226,-0.0057,-0.0284
1926-11-01,0.0370,0.0250,0.0307,0.0542,0.0211
...,...,...,...,...,...
2023-08-01,-0.0182,-0.0182,-0.0158,-0.0022,-0.0335
2023-09-01,-0.0494,-0.0395,-0.0582,-0.0471,-0.0341
2023-10-01,-0.0343,-0.0305,-0.0168,-0.0458,-0.0253
2023-11-01,0.0788,0.0522,0.1165,0.0587,0.1041


In [9]:
datos_ind_df = pd.concat([industries_ff_df, datos_df.loc[:, 'FONDO']], axis=1, join='inner')
datos_ind_df.columns = list(industries_ff_df.columns) + ['FONDO']
datos_ind_df = pd.concat([datos_ind_df,ff_factors['RF']], axis=1, join='inner')
datos_ind_df.head(5)

Unnamed: 0,Cnsmr,Manuf,HiTec,Hlth,Other,FONDO,RF
2005-08-01,-0.0333,0.0147,-0.0056,-0.0031,-0.0168,0.007576,0.003
2005-09-01,-0.0129,0.0371,0.0017,-0.0192,0.0128,0.000943,0.0029
2005-10-01,-0.0021,-0.0611,-0.0205,-0.0327,0.0136,-0.031597,0.0027
2005-11-01,0.0276,0.0281,0.0564,0.0158,0.0493,0.036298,0.0031
2005-12-01,-0.0028,0.0107,-0.0186,0.0239,0.0012,0.023181,0.0032


In [10]:
Y = datos_ind_df.loc[:, 'FONDO']-datos_ind_df['RF']
X = datos_ind_df[['Cnsmr', 'Manuf', 'HiTec', 'Hlth ', 'Other']]

X = sm.add_constant(X)

modelo = sm.OLS(Y, X).fit()

print(modelo.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.562
Model:                            OLS   Adj. R-squared:                  0.552
Method:                 Least Squares   F-statistic:                     54.92
Date:                Sun, 07 Apr 2024   Prob (F-statistic):           1.55e-36
Time:                        17:22:15   Log-Likelihood:                 399.81
No. Observations:                 220   AIC:                            -787.6
Df Residuals:                     214   BIC:                            -767.3
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0023      0.003     -0.813      0.4

In [11]:
activos = ['WTM', 'ATKR', 'BJ', 'MGRC', 'UNF']


precios_act = yf.download(activos, interval='1mo')['Adj Close']

[*********************100%%**********************]  5 of 5 completed


In [12]:
retornos_act = np.log(precios_act).diff().dropna()

In [19]:
datos_df_act = pd.concat([retornos_act, datos_ind_df.loc[:, 'RF']], axis=1, join='inner')
datos_df_act.columns = list(retornos_act.columns) + ['RF']
datos_df_act.head(5)

Unnamed: 0,ATKR,BJ,MGRC,UNF,WTM,RF
2018-08-01,0.146027,0.17395,-0.018012,-0.010474,0.016209,0.0016
2018-09-01,-0.031537,-0.096735,-0.062793,-0.064394,0.008499,0.0015
2018-10-01,-0.320246,-0.189818,-0.020027,-0.150466,-0.054004,0.0019
2018-11-01,0.058484,0.053188,0.007621,0.033654,0.046367,0.0018
2018-12-01,-0.028815,-0.052736,-0.037179,-0.076277,-0.079597,0.002


In [20]:
Y = datos_df_act.loc[:, 'ATKR']-datos_df_act['RF']
X = datos_df_act[['BJ', 'MGRC', 'UNF', 'WTM']]

X = sm.add_constant(X)

modelo = sm.OLS(Y, X).fit()

print(modelo.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.411
Model:                            OLS   Adj. R-squared:                  0.371
Method:                 Least Squares   F-statistic:                     10.30
Date:                Sun, 07 Apr 2024   Prob (F-statistic):           2.15e-06
Time:                        17:30:51   Log-Likelihood:                 41.881
No. Observations:                  64   AIC:                            -73.76
Df Residuals:                      59   BIC:                            -62.97
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0111      0.017      0.651      0.5