# Recolección de Datos

In [18]:
# importamos librerias

import pandas as pd
import yfinance as yf
from finta import TA

In [3]:
# importamos la data desde internet

tickers = pd.read_csv("https://pkgstore.datahub.io/core/nyse-other-listings/nyse-listed_csv/data/3c88fab8ec158c3cd55145243fe5fcdf/nyse-listed_csv.csv")

In [4]:
tickers

Unnamed: 0,ACT Symbol,Company Name
0,A,"Agilent Technologies, Inc. Common Stock"
1,AA,Alcoa Inc. Common Stock
2,AA$B,Alcoa Inc. Depository Shares Representing 1/10...
3,AAC,"AAC Holdings, Inc. Common Stock"
4,AAN,"Aaron's, Inc. Common Stock"
...,...,...
3293,ZPIN,"Zhaopin Limited American Depositary Shares, ea..."
3294,ZQK,"Quiksilver, Inc. Common Stock"
3295,ZTR,"Zweig Total Return Fund, Inc. (The) Common Stock"
3296,ZTS,Zoetis Inc. Class A Common Stock


In [5]:
# Creamos una LISTA solo de los simbolos, que es lo que nos interesa
symbols = tickers["ACT Symbol"].unique()

In [6]:
symbols

array(['A', 'AA', 'AA$B', ..., 'ZTR', 'ZTS', 'ZX'], dtype=object)

In [7]:
# DEscargamos data como en el anterior tutorial
df = yf.download(symbols[0], auto_adjust=True, period='5y')

[*********************100%***********************]  1 of 1 completed


In [8]:
# Cambiamos a minusuclas 
df.columns = df.columns.str.lower()

# Renombramos el indice
df.index = df.index.rename("datetime")

In [9]:
df

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-23,65.240311,66.165295,65.066876,65.587181,2260600
2018-04-24,63.968454,65.433017,63.631222,64.209335,3701600
2018-04-25,64.257508,64.633282,62.542434,63.399971,4451600
2018-04-26,63.804650,64.324955,63.496323,63.949181,1978600
2018-04-27,63.920289,64.132266,63.554152,63.862480,1189200
...,...,...,...,...,...
2023-04-17,139.619995,140.210007,138.889999,140.190002,1115500
2023-04-18,140.729996,140.990005,138.210007,139.000000,1218900
2023-04-19,138.369995,139.619995,137.679993,138.929993,1178400
2023-04-20,136.860001,137.050003,134.869995,135.529999,1325900


Guardamos la data como csv en la carpeta price_data

In [10]:
df.to_csv(f"price_data/{symbols[0]}.csv")

In [11]:
# Recargamos el csv y definimos nuevamente el indice porque no se guardó

loaded_csv = pd.read_csv(f"price_data/{symbols[0]}.csv", index_col="datetime")

In [12]:
loaded_csv

Unnamed: 0_level_0,open,high,low,close,volume
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-04-23,65.240311,66.165295,65.066876,65.587181,2260600
2018-04-24,63.968454,65.433017,63.631222,64.209335,3701600
2018-04-25,64.257508,64.633282,62.542434,63.399971,4451600
2018-04-26,63.804650,64.324955,63.496323,63.949181,1978600
2018-04-27,63.920289,64.132266,63.554152,63.862480,1189200
...,...,...,...,...,...
2023-04-17,139.619995,140.210007,138.889999,140.190002,1115500
2023-04-18,140.729996,140.990005,138.210007,139.000000,1218900
2023-04-19,138.369995,139.619995,137.679993,138.929993,1178400
2023-04-20,136.860001,137.050003,134.869995,135.529999,1325900


### Ahora cramos una función para descargar masivamente todo



In [16]:
def download_stock_data(folder="price_data", years=5):
    tickers = pd.read_csv("https://pkgstore.datahub.io/core/nyse-other-listings/nyse-listed_csv/data/3c88fab8ec158c3cd55145243fe5fcdf/nyse-listed_csv.csv")
    symbols = tickers["ACT Symbol"].unique()
    print (symbols)
    for symbol in symbols:
        print (symbol)
        print (f"Guardando info en: {folder}/{symbol}.csv")
        try:
            df = yf.download(symbol, auto_adjust=True, period=f'{str(years)}y')

            # Para eliminar los csv que tengan nula o poca información
            if len(df) > 100:
                df.columns = df.columns.str.lower()
                df.index = df.index.rename("datetime")
                df.to_csv(f"{folder}/{symbol}.csv")
                
        except Exception as e:
            print(f"error with {symbol}")
            print(repr(e))
            continue




In [17]:
download_stock_data()

['A' 'AA' 'AA$B' ... 'ZTR' 'ZTS' 'ZX']
A
Guardando info en: price_data/A.csv
[*********************100%***********************]  1 of 1 completed
AA
Guardando info en: price_data/AA.csv
[*********************100%***********************]  1 of 1 completed
AA$B
Guardando info en: price_data/AA$B.csv
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- AA$B: No data found, symbol may be delisted
AAC
Guardando info en: price_data/AAC.csv
[*********************100%***********************]  1 of 1 completed
AAN
Guardando info en: price_data/AAN.csv
[*********************100%***********************]  1 of 1 completed
AAP
Guardando info en: price_data/AAP.csv
[*********************100%***********************]  1 of 1 completed
AAT
Guardando info en: price_data/AAT.csv
[*********************100%***********************]  1 of 1 completed
AAV
Guardando info en: price_data/AAV.csv
[*********************100%***********************]  1 of 1 completed

1 Failed d

Ahora podemos cargar la función de análisis del episodio anterior, pero la modificamos para que acepte data y no para que la descargue.


In [22]:
def wick_rejection(df:pd.DataFrame) -> pd.DataFrame:



    # Indicadores teçnicos
    df["EMA200"] = TA.EMA(df, period=200)
    df["EMA50"] = TA.EMA(df, period=50)
    df["ATR"] = TA.ATR(df)

    # Condiciones

    c1 = df["close"] > df["EMA200"]
    c2 = (df["low"] < df["EMA50"]) & (df["close"] > df["EMA50"]) & (df["open"] > df["EMA50"])
    long_entry_condition = (c1) & (c2)

    # Corremos 1 el df para definir la entrada de la posicion
    df["entry"] = df.shift(-1)["open"]

    # Shift to find entry point
    df["entry"] = df.shift(-1)["open"]

    # Creamos columnas en 0 para el TP y SL¿
    df["target"] = 0.0
    df["stop"] = 0.0

    # Find entry and exit
    df.loc[long_entry_condition, "stop"] = df["entry"] - df["ATR"]
    df.loc[long_entry_condition, "target"] = df["entry"] + 1.5*df["ATR"]
    
    # Create new df with only entries
    entries = df.loc[long_entry_condition][['entry', 'target', 'stop']].copy()

    return entries
    

Ahora buscamos leer los archivos csv descargados para saber que tickers tenemos, luego analizarlso


In [39]:
from os import path

In [48]:
import glob
filepaths = []
results_dict = {}
for file in glob.glob("price_data\\*.csv"):
    filepaths.append(file)

filepaths

['price_data\\A.csv',
 'price_data\\AA.csv',
 'price_data\\AAC.csv',
 'price_data\\AAN.csv',
 'price_data\\AAP.csv',
 'price_data\\AAT.csv',
 'price_data\\AB.csv',
 'price_data\\ABB.csv',
 'price_data\\ABBV.csv',
 'price_data\\ABC.csv',
 'price_data\\ABEV.csv',
 'price_data\\ABG.csv',
 'price_data\\ABM.csv',
 'price_data\\ABR.csv',
 'price_data\\ABT.csv',
 'price_data\\ACCO.csv',
 'price_data\\ACE.csv',
 'price_data\\ACI.csv',
 'price_data\\ACM.csv',
 'price_data\\ACN.csv',
 'price_data\\ACP.csv',
 'price_data\\ACRE.csv',
 'price_data\\ACT.csv',
 'price_data\\ADC.csv',
 'price_data\\ADM.csv',
 'price_data\\ADPT.csv',
 'price_data\\ADT.csv',
 'price_data\\ADX.csv',
 'price_data\\AEB.csv',
 'price_data\\AED.csv',
 'price_data\\AEE.csv',
 'price_data\\AEG.csv',
 'price_data\\AEH.csv',
 'price_data\\AEL.csv',
 'price_data\\AEM.csv',
 'price_data\\AEO.csv',
 'price_data\\AEP.csv',
 'price_data\\AER.csv',
 'price_data\\AES.csv',
 'price_data\\AET.csv',
 'price_data\\AFB.csv',
 'price_data\\A

Corremos en un bucle que busque todos los tickers y pueda aplicar la función de busqueda 

In [52]:
for path in filepaths[0:15]:
    df = pd.read_csv(path, index_col='datetime')
    symbol = path.split('\\')[-1].split('.')[0]
    print(symbol)
    print(path)
    entry_df = wick_rejection(df)
    results_dict[symbol] = entry_df

A
price_data\A.csv
AA
price_data\AA.csv
AAC
price_data\AAC.csv
AAN
price_data\AAN.csv
AAP
price_data\AAP.csv
AAT
price_data\AAT.csv
AB
price_data\AB.csv
ABB
price_data\ABB.csv
ABBV
price_data\ABBV.csv
ABC
price_data\ABC.csv
ABEV
price_data\ABEV.csv
ABG
price_data\ABG.csv
ABM
price_data\ABM.csv
ABR
price_data\ABR.csv
ABT
price_data\ABT.csv


In [53]:
results_dict

{'A':                  entry      target        stop
 datetime                                      
 2018-06-07   62.966390   64.748226   61.778500
 2018-06-15   63.091650   64.760957   61.978779
 2018-07-19   61.650379   63.012039   60.742606
 2018-08-16   63.244029   65.168833   61.960826
 2018-11-07   65.807308   68.568921   63.966232
 2018-11-20   65.487903   68.233959   63.657199
 2019-01-08   66.173826   69.498265   63.957534
 2019-06-20   71.112166   72.965393   69.876681
 2019-06-24   71.160773   72.966081   69.957234
 2019-06-25   70.626038   72.441762   69.415555
 2019-06-27   71.228828   73.033091   70.025986
 2019-10-18   73.121164   75.451186   71.567816
 2019-10-25   73.912119   75.970116   72.540121
 2020-02-19   83.080866   85.525900   81.450843
 2020-09-21   96.188815   99.959798   93.674827
 2020-09-23   94.940241   98.668037   92.455043
 2020-10-29  100.232899  103.431352   98.100597
 2020-10-30  101.956452  105.157015   99.822743
 2021-01-28  119.575053  123.274473