# Revisión de código para generación de índices

**Fecha:** 4 de Mayo de 2020

**Responsable de revisión:** León Garay

**Código revisado**

In [2]:
#import cupy as cp
import numpy as np
import pandas as pd
import fix_yahoo_finance as yf
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import time

# Extraer los datos de Yahoo Finance

Tomamos 50 acciones en total, de diferentes industrias:

In [4]:
stocks = ['COP','AMT','LIN','LMT','AMZN','WMT','JNJ','VTI','MSFT','GOOG','XOM','CCI','BHP.AX','UNP',
'BABA','NSRGY','RHHBY','VOO','AAPL','FB','CVX','PLD','RIO.L','HON','HD','PG','UNH','BRK-A','V','0700.HK',
'RDSA.AS','0688.HK','AI.PA','RTX','MC.PA','KO','PFE','JPM','005930.KS','VZ','RELIANCE.NS','DLR','2010.SR',
'UPS','7203.T','PEP','MRK','1398.HK','MA','T']
len(stocks)

50

In [5]:
df_o = yf.download(stocks, start='2015-01-01', end='2020-04-30').Open
df_o.shape

[*********************100%***********************]  50 of 50 completed


(1664, 50)

In [6]:
df_c = yf.download(stocks, start='2015-01-01', end='2020-04-30').Close
df_c.shape

[*********************100%***********************]  50 of 50 completed


(1664, 50)

In [7]:
df_c.count().describe()

count      50.000000
mean     1338.080000
std        14.255454
min      1270.000000
25%      1340.000000
50%      1340.000000
75%      1340.000000
max      1364.000000
dtype: float64

Como la mayoría tienen 1340 días informados, tomaremos solo eso 1340 días.

In [8]:
base = df_c['AAPL'].dropna().to_frame()

In [9]:
for i in range(0,50):
    base = base.join(df_c.iloc[:,i].to_frame(), lsuffix='_caller', rsuffix='_other')

In [10]:
base = base.drop(columns=['AAPL_caller'])
base = base.rename(columns={"AAPL_other": "AAPL"})
base.head()

Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,26600.0,24.7048,112.800003,5.77,,,109.330002,89.7864,99.669998,308.519989,...,33.869999,100.779999,118.610001,110.379997,66.254997,188.399994,105.919998,46.959999,85.900002,92.830002
2015-01-05,26660.0,24.951799,113.5,5.8,79.5,7507.0,106.25,87.005997,98.230003,302.190002,...,33.549999,99.120003,114.599998,108.169998,64.792503,185.089996,104.099998,46.57,85.650002,90.290001
2015-01-06,25900.0,24.6059,120.0,5.71,77.0,7300.0,106.260002,86.279999,97.970001,295.290009,...,33.599998,98.919998,112.230003,107.459999,64.375,183.270004,103.080002,47.040001,86.309998,89.809998
2015-01-07,26140.0,24.507099,124.400002,5.75,78.25,7407.0,107.75,86.669601,99.0,298.420013,...,33.169998,99.93,112.849998,108.459999,65.237503,185.559998,104.309998,46.189999,88.599998,90.720001
2015-01-08,26280.0,23.864799,127.300003,5.72,79.25,7554.0,111.889999,90.317703,99.919998,300.459991,...,33.5,104.699997,117.080002,110.410004,66.112503,188.820007,106.150002,47.18,90.470001,92.230003


In [11]:
base.count().describe()

count      50.000000
mean     1324.180000
std        45.854557
min      1051.000000
25%      1340.000000
50%      1340.000000
75%      1340.000000
max      1340.000000
dtype: float64

In [12]:
base = base.fillna(method='ffill')
base = base.fillna(method='bfill')
base.head()

Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,26600.0,24.7048,112.800003,5.77,79.5,7507.0,109.330002,89.7864,99.669998,308.519989,...,33.869999,100.779999,118.610001,110.379997,66.254997,188.399994,105.919998,46.959999,85.900002,92.830002
2015-01-05,26660.0,24.951799,113.5,5.8,79.5,7507.0,106.25,87.005997,98.230003,302.190002,...,33.549999,99.120003,114.599998,108.169998,64.792503,185.089996,104.099998,46.57,85.650002,90.290001
2015-01-06,25900.0,24.6059,120.0,5.71,77.0,7300.0,106.260002,86.279999,97.970001,295.290009,...,33.599998,98.919998,112.230003,107.459999,64.375,183.270004,103.080002,47.040001,86.309998,89.809998
2015-01-07,26140.0,24.507099,124.400002,5.75,78.25,7407.0,107.75,86.669601,99.0,298.420013,...,33.169998,99.93,112.849998,108.459999,65.237503,185.559998,104.309998,46.189999,88.599998,90.720001
2015-01-08,26280.0,23.864799,127.300003,5.72,79.25,7554.0,111.889999,90.317703,99.919998,300.459991,...,33.5,104.699997,117.080002,110.410004,66.112503,188.820007,106.150002,47.18,90.470001,92.230003


In [13]:
base.count().describe()

count      50.0
mean     1340.0
std         0.0
min      1340.0
25%      1340.0
50%      1340.0
75%      1340.0
max      1340.0
dtype: float64

Ya no hay NA's
La función queda como sigue:

In [14]:
def extraer_datos_yahoo(stocks):
    df_c = yf.download(stocks, start='2015-01-01', end='2020-04-30').Close
    base = df_c['AAPL'].dropna().to_frame()
    for i in range(0,50):
        base = base.join(df_c.iloc[:,i].to_frame(), lsuffix='_caller', rsuffix='_other')
    base = base.drop(columns=['AAPL_caller'])
    base = base.rename(columns={"AAPL_other": "AAPL"})
    base = base.fillna(method='ffill')
    base = base.fillna(method='bfill')
    return base

In [15]:
datos = extraer_datos_yahoo(stocks)

[*********************100%***********************]  50 of 50 completed


In [16]:
datos.head()

Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,26600.0,24.7048,112.800003,5.77,79.5,7507.0,109.330002,89.7864,99.669998,308.519989,...,33.869999,100.779999,118.610001,110.379997,66.254997,188.399994,105.919998,46.959999,85.900002,92.830002
2015-01-05,26660.0,24.951799,113.5,5.8,79.5,7507.0,106.25,87.005997,98.230003,302.190002,...,33.549999,99.120003,114.599998,108.169998,64.792503,185.089996,104.099998,46.57,85.650002,90.290001
2015-01-06,25900.0,24.6059,120.0,5.71,77.0,7300.0,106.260002,86.279999,97.970001,295.290009,...,33.599998,98.919998,112.230003,107.459999,64.375,183.270004,103.080002,47.040001,86.309998,89.809998
2015-01-07,26140.0,24.507099,124.400002,5.75,78.25,7407.0,107.75,86.669601,99.0,298.420013,...,33.169998,99.93,112.849998,108.459999,65.237503,185.559998,104.309998,46.189999,88.599998,90.720001
2015-01-08,26280.0,23.864799,127.300003,5.72,79.25,7554.0,111.889999,90.317703,99.919998,300.459991,...,33.5,104.699997,117.080002,110.410004,66.112503,188.820007,106.150002,47.18,90.470001,92.230003


**1.Sobre la documentación del código/de la función**

¿Se encuentran presentes en la implementación los siguientes elementos? Por favor, ingrese explicaciones detalladas.

**a) Descripción concisa y breve de lo que hace el código/la función**

La función no cuenta con descripción.

**b) Descripción de sus argumentos de entrada, su significado y rango de valores que pueden tomar**

Falta mencionar los argumentos, su significado y el rango de valores.

**c) Descripción de los tipos de argumentos de entrada y de salida (por ejemplo, valores enteros, reales, strings, dataframe, matrices, etc)**

Falta mencionar los tipos de argumentos de entrada y salida.

**d) Descripción de la salida de la función, su significado y valores/objetos que deben regresa**

Falta mencionar la salida de la función.


**2. Cumplimiento de objetivos del código/de la función**

Por favor, ingrese explicaciones detalladas.

**a) ¿El código cumple los objetivos para los que fue diseñado?**

Sí.

**b) ¿La salida de la función genera una DataFrame de tamaño #acciones $\times$ #días**

Sí.

**b) ¿La salida de la función genera un DataFrame cuyos elementos son accesibles en coordenadas (x,y)?**

Sí.

**3. Pruebas**

Ocupe la presente sección para hacer diseño de pruebas variando los parámetros que recibe el código la función en diferentes rangos para evaluae su comportamiento y/o detectar posibles fallos

**Test 1**

**Objetivo del test:** Revisar la descarga con distintas conjuntos de acciones y de fechas.

**Implementación del test:**

In [17]:
stocks = ['COP','AMT','LIN','LMT','AMZN','WMT','JNJ','VTI','MSFT','GOOG','XOM','CCI','BHP.AX','UNP',
'BABA','NSRGY','RHHBY','VOO','AAPL','FB','CVX','PLD','RIO.L','HON','HD','PG','UNH','BRK-A','V','0700.HK',
'RDSA.AS','0688.HK','AI.PA','RTX','MC.PA','KO','PFE','JPM','005930.KS','VZ','RELIANCE.NS','DLR','2010.SR',
'UPS','7203.T','PEP','MRK','1398.HK','MA','T']

In [18]:
import random
random.seed(1234)

La función únicamente funciona para las 50 acciones y está definida para un intervalo específico de fechas. Modifcamos la función para seleccionar las acciones de cualquier intervalo de tiempo.

In [19]:
def extraer_datos_yahoo_test1(stocks, start='2015-01-01', end='2020-04-30'):
    df_c = yf.download(stocks,start,end ).Close
    base = df_c['AAPL'].dropna().to_frame()
    for i in range(0,50):
        base = base.join(df_c.iloc[:,i].to_frame(), lsuffix='_caller', rsuffix='_other')
    base = base.drop(columns=['AAPL_caller'])
    base = base.rename(columns={"AAPL_other": "AAPL"})
    base = base.fillna(method='ffill')
    base = base.fillna(method='bfill')
    return base

In [20]:
datos_1 = extraer_datos_yahoo_test1(stocks,start='2015-01-02', end='2020-05-05')
datos_1.head()

[*********************100%***********************]  50 of 50 completed


Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,26600.0,24.7048,112.800003,5.77,79.5,7507.0,109.330002,89.7864,99.669998,308.519989,...,33.869999,100.779999,118.610001,110.379997,66.254997,188.399994,105.919998,46.959999,85.900002,92.830002
2015-01-05,26660.0,24.951799,113.5,5.8,79.5,7507.0,106.25,87.005997,98.230003,302.190002,...,33.549999,99.120003,114.599998,108.169998,64.792503,185.089996,104.099998,46.57,85.650002,90.290001
2015-01-06,25900.0,24.6059,120.0,5.71,77.0,7300.0,106.260002,86.279999,97.970001,295.290009,...,33.599998,98.919998,112.230003,107.459999,64.375,183.270004,103.080002,47.040001,86.309998,89.809998
2015-01-07,26140.0,24.507099,124.400002,5.75,78.25,7407.0,107.75,86.669601,99.0,298.420013,...,33.169998,99.93,112.849998,108.459999,65.237503,185.559998,104.309998,46.189999,88.599998,90.720001
2015-01-08,26280.0,23.864799,127.300003,5.72,79.25,7554.0,111.889999,90.317703,99.919998,300.459991,...,33.5,104.699997,117.080002,110.410004,66.112503,188.820007,106.150002,47.18,90.470001,92.230003


In [21]:
datos_2 = extraer_datos_yahoo_test1(stocks,start='2017-01-01', end='2019-01-01')
datos_2.head()

[*********************100%***********************]  50 of 50 completed


Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-03,36480.0,20.75,189.399994,4.68,92.5,7097.0,116.150002,96.045502,106.150002,753.669983,...,43.02,161.449997,102.519997,115.080002,79.5,206.740005,116.199997,54.580002,68.660004,90.889999
2017-01-04,36160.0,20.700001,189.0,4.65,92.0,7097.0,116.019997,95.954498,106.339996,757.179993,...,42.77,161.910004,103.139999,115.110001,80.150002,207.960007,117.089996,54.52,69.059998,89.889999
2017-01-05,35560.0,21.0,193.300003,4.71,92.5,7049.0,116.610001,96.0,105.970001,780.450012,...,42.650002,162.179993,102.129997,115.169998,81.089996,207.800003,116.860001,54.639999,69.209999,88.550003
2017-01-06,36200.0,21.35,195.100006,4.73,92.5,6930.0,117.910004,96.409103,105.269997,795.98999,...,41.32,162.410004,103.190002,115.400002,82.209999,208.610001,117.230003,53.259998,68.260002,88.5
2017-01-09,37220.0,21.65,195.600006,4.68,92.25,6930.0,118.989998,95.181801,105.019997,796.919983,...,40.799999,161.949997,102.419998,114.769997,81.75,207.949997,116.779999,52.68,68.709999,87.040001


In [22]:
datos_3 = extraer_datos_yahoo_test1(stocks,start='2014-01-01', end='2019-01-01')
datos_3.head()

[*********************100%***********************]  50 of 50 completed


Unnamed: 0_level_0,005930.KS,0688.HK,0700.HK,1398.HK,2010.SR,7203.T,AAPL,AI.PA,AMT,AMZN,...,T,UNH,UNP,UPS,V,VOO,VTI,VZ,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-02,26180.0,21.641399,100.900002,5.2,113.25,6300.0,79.01857,81.261803,79.449997,397.970001,...,34.950001,74.57,83.555,103.32,55.252499,167.630005,95.080002,49.0,78.910004,99.75
2014-01-03,25920.0,21.196699,98.800003,5.06,113.25,6300.0,77.28286,81.744797,79.68,396.440002,...,34.799999,75.099998,83.425003,103.089996,55.290001,167.479996,95.059998,48.419998,78.650002,99.510002
2014-01-06,26140.0,20.6532,99.120003,4.96,113.0,6300.0,77.704285,80.094597,80.190002,393.630005,...,34.959999,74.239998,82.580002,101.75,54.9575,167.059998,94.809998,48.689999,78.209999,99.660004
2014-01-07,26080.0,20.109699,98.800003,4.95,112.75,6270.0,77.148575,78.846901,81.300003,398.029999,...,34.950001,76.510002,83.309998,102.080002,55.377499,168.100006,95.419998,49.299999,78.449997,101.07
2014-01-08,25820.0,21.443701,100.300003,5.04,115.5,6300.0,77.637146,79.506897,81.940002,401.920013,...,34.240002,75.620003,83.464996,102.0,55.555,168.169998,95.489998,48.5,77.830002,100.739998


Principales hallazos del test:

* La función trabaja de manera para intervalos de tiempo distintos.

**4. Resumen detallado de posibles puntos faltantes en implementación**

* La función no tiene documentación que explique los argumentos de extrada, salida ni el rango de valores que puede tomar.
* Modificar la función para aceptar diferentes intervalos de tiempo.

**Sugerencias para resolver los puntos anteriores**

* Para la documentacion es necesario añadir los argumentos que toma de entrada así como el formato en el que tiene estar escrita la fecha. 
* Para modificar los intervalos de tiempo recomiendo utilizar el siguiente código:

```
def extraer_datos_yahoo_test1(stocks, start='2015-01-01', end='2020-04-30'):
    df_c = yf.download(stocks,start,end ).Close
    base = df_c['AAPL'].dropna().to_frame()
    for i in range(0,50):
        base = base.join(df_c.iloc[:,i].to_frame(), lsuffix='_caller', rsuffix='_other')
    base = base.drop(columns=['AAPL_caller'])
    base = base.rename(columns={"AAPL_other": "AAPL"})
    base = base.fillna(method='ffill')
    base = base.fillna(method='bfill')
    return base
```