In [None]:
!pip install yfinance
!pip install finta
from finta import TA
import pandas as pd
import yfinance as yf
import glob
import numpy as np

In [None]:
# function with a 'folder' and 'years' argument. Purpose is to download and save price data as .csv and also in a dictionary.
def download_stock_price_data_dict(folder='', years=5):

  # tickers is a pandas DataFrame object. pd.read_csv function reads csv file from this website which contains all of the stock ticker symbols under it's "ACT Symbol" column.
  tickers = pd.read_csv('https://pkgstore.datahub.io/core/nyse-other-listings/nyse-listed_csv/data/3c88fab8ec158c3cd55145243fe5fcdf/nyse-listed_csv.csv')
  # symbols is a list of symbols we made by taking all the unique symbols from the tickers .csv
  symbols = tickers['ACT Symbol'].unique()
  # empty dictionary to store all stock price DataFrames.
  price_data_dict = {}

  # loop where we go through each symbol 's' in the symbols list (first 20), and then do some things.
  for s in symbols[0:20]:
    try:

      # creates the DataFrame 'df' from the downloaded yf price data over the last years. auto_adjust accounts for stock splits and such. Takes in each stock symbol 's' one at at time.
      price_data_df = yf.download([s], auto_adjust=True, period=f'{str(years)}y')

      # If the length of a 'df' is over 100, then go ahead and do the iteration.
      if len(price_data_df) > 100:
        # make the columns lowercase, rename the index to 'datetime', save the df's to our folder, and save the df's to our dictionary.
        price_data_df.columns = price_data_df.columns.str.lower()
        price_data_df.index = price_data_df.index.rename('datetime')
        price_data_df.to_csv(f'{folder}{s}.csv')
        price_data_dict[s] = price_data_df

    # error catch
    except Exception as e:
      print(f'error with {s}')
      print(repr(e))
      continue

  # Allows for the 'price_data_dict' dictionary object to be accessed elsewhere in the code as a variable or input in another function.   
  return price_data_dict
# Download and saves price data to our price data dictionary.
price_data_dict = download_stock_price_data_dict()

In [36]:
# takes in 'price_data_df' DataFrames as an argument and transforms it to a new DataFrame later called 'entries'.
def create_entry_df(price_data_df:pd.DataFrame) -> pd.DataFrame:
  # Adding various columns to the original 'df'.
  price_data_df['EMA200'] = TA.EMA(price_data_df, period=200)
  price_data_df['EMA50'] = TA.EMA(price_data_df, period=50)
  price_data_df['ATR'] = TA.ATR(price_data_df)
  price_data_df['entry'] = price_data_df.shift(-1).open
  price_data_df['target'] = 0.0
  price_data_df['stop'] = 0.0
  price_data_df['risk:reward'] = 0.0
  price_data_df['target_pct'] = 0.0
  price_data_df['stop_pct'] = 0.0
  price_data_df['target_profit'] = 150
  price_data_df['position_size'] = 0.0

  # Conditions for long entry based on technical analysis indicators.
  c1 = price_data_df.close > price_data_df.EMA200
  c2 = (price_data_df.low < price_data_df.EMA50) & (price_data_df.close > price_data_df.EMA50) & (price_data_df.open > price_data_df.EMA50)
  long_entry_condition = (c1) & (c2)

  # Accesses and modifies all rows in 'df' where long_entry_condition is true, and modifies the 2nd input '' column in those rows to what is specified.
  price_data_df.loc[long_entry_condition, 'target'] = price_data_df.entry + (1.5*price_data_df.ATR)
  price_data_df.loc[long_entry_condition, 'stop'] = price_data_df.entry - price_data_df.ATR
  price_data_df.loc[long_entry_condition, 'risk:reward'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry - price_data_df.stop)
  price_data_df.loc[long_entry_condition, 'target_pct'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'stop_pct'] = -(price_data_df.entry - price_data_df.stop) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'position_size'] = price_data_df.target_profit / price_data_df.target_pct

  # Selects the subset of rows where the price data in DataFrame 'df' satisfy the long entry condition, and then creates a copy of the modified DataFrame denoted 'entries'.
  entries = price_data_df.loc[long_entry_condition][['entry', 'target', 'stop', 'risk:reward', 'target_pct', 'stop_pct', 'target_profit', 'position_size']].copy()

  # Allows for the 'entries' DataFrame to be accessed elsewhere in the code as a variable. 
  return entries

In [44]:
# Create empty list and dictionary for future use
filepaths = []
trade_entries_dict = {}

# Uses the 'glob' module to iterate and find all files with .csv, for each file found, the filepath is appended to the filepaths empty list [ ].
for file in glob.glob('*.csv'):
  filepaths.append(file)

# Using a for loop, we iterate through each filepath in 'filepaths' list.
for path in filepaths:
  # Extracts the file's symbol name by splitting and parsing the path at the first index before the period, and saves it to the symbol variable.
  symbol = path.split('.')[0]
  # Reads the file into a DataFrame called 'df', with the index set to 'datetime'. The parse_dates makes it so the datetime index column is properly a datetime64[ns] object type.
  symbols_df = pd.read_csv(path, index_col='datetime', parse_dates=['datetime'])
  # Calls the 'find_entries' function on 'df' to generate an entirely new DataFrame called 'entry_df', which has the trade entries for a single symbol.
  entry_df = create_entry_df(symbols_df)
  # It adds each DataFrame 'entry_df' which was iterated through for every symbol, and adds it to the 'trad_entries_dict' dictionary, with the symbol name as the key, and the value is the entry trade data.
  trade_entries_dict[symbol] = entry_df

In [45]:
# allows for all rows to be seen if = 'None'. If want less, do = 20.
pd.options.display.max_rows = None

In [46]:
trade_entries_dict

{'A':                  entry      target        stop  risk:reward  target_pct  \
 datetime                                                                  
 2018-03-14   67.492883         NaN         NaN          NaN         NaN   
 2018-03-16   67.203990         NaN         NaN          NaN         NaN   
 2018-04-19   65.665071   68.068150   64.063019          1.5    0.036596   
 2018-08-16   63.347046   65.274988   62.061751          1.5    0.030435   
 2018-11-07   65.914511   68.680625   64.070435          1.5    0.041965   
 2018-11-20   65.594605   68.345141   63.760914          1.5    0.041932   
 2019-01-08   66.281633   69.611481   64.061735          1.5    0.050238   
 2019-06-20   71.227991   73.084233   69.990497          1.5    0.026061   
 2019-06-24   71.276695   73.084940   70.071199          1.5    0.025369   
 2019-06-25   70.741072   72.559750   69.528620          1.5    0.025709   
 2019-06-27   71.344866   73.152066   70.140066          1.5    0.025330   
 2019-1

In [47]:
price_data_dict

{'A':                   open        high         low       close    volume
 datetime                                                            
 2018-03-05   64.758095   65.335861   64.045517   65.046982   1890900
 2018-03-06   65.268446   65.884731   64.950672   65.586220   2944600
 2018-03-07   64.864013   67.271377   64.806238   67.117302   3125800
 2018-03-08   67.136569   67.666185   66.626209   67.261749   2453400
 2018-03-09   67.666175   68.850601   67.406183   68.494308   2061200
 2018-03-12   68.561723   68.879497   68.051363   68.089882   2049400
 2018-03-13   68.369139   68.869868   67.810629   68.060997   1728100
 2018-03-14   68.436544   68.523213   67.107682   67.329155   1866300
 2018-03-15   67.492883   68.041769   67.281041   67.579552   2530400
 2018-03-16   67.329169   67.733605   66.491411   67.309914   3378100
 2018-03-19   67.203990   67.704719   66.501037   66.770660   2639400
 2018-03-20   67.184712   67.473598   66.876570   67.261749   2379100
 2018-03-21   6

In [48]:
# merges the price data and trade entries dictionaries. It's shorthand for creating a new dictionary from existing ones.
# for each key 'key' in price_data_dict, it makes new key-value pair in the new dict. Rows are aligned based on their common index.
price_and_trades_dict = {key: pd.concat([price_data_dict[key], trade_entries_dict[key]], axis=1) for key in price_data_dict}

In [49]:
# merges the price data and trade entries dictionaries. It's shorthand for creating a new dictionary from existing ones.
# for each key 'key' in price_data_dict, it makes new key-value pair in the new dict. Rows are aligned based on their common index.
price_and_trades_dict = {key: pd.concat([price_data_dict[key], trade_entries_dict[key]], axis=1) for key in price_data_dict}
# might need this, converts all 'NaN' into 0.
#for key in price_and_trades_dict:
    #price_and_trades_dict[key] = price_and_trades_dict[key].fillna(0)
    
price_and_trades_dict

{'A':                   open        high         low       close    volume  \
 datetime                                                               
 2018-03-05   64.758095   65.335861   64.045517   65.046982   1890900   
 2018-03-06   65.268446   65.884731   64.950672   65.586220   2944600   
 2018-03-07   64.864013   67.271377   64.806238   67.117302   3125800   
 2018-03-08   67.136569   67.666185   66.626209   67.261749   2453400   
 2018-03-09   67.666175   68.850601   67.406183   68.494308   2061200   
 2018-03-12   68.561723   68.879497   68.051363   68.089882   2049400   
 2018-03-13   68.369139   68.869868   67.810629   68.060997   1728100   
 2018-03-14   68.436544   68.523213   67.107682   67.329155   1866300   
 2018-03-15   67.492883   68.041769   67.281041   67.579552   2530400   
 2018-03-16   67.329169   67.733605   66.491411   67.309914   3378100   
 2018-03-19   67.203990   67.704719   66.501037   66.770660   2639400   
 2018-03-20   67.184712   67.473598   66.87657

In [43]:
# .name should yield 'datetime' and .dtype should yield '<M8[ns]' which is numpy's way of saying datetime64[ns] equivilant in pandas.

#price_and_trades_dict[next(iter(price_and_trades_dict.keys()))].index.name
#price_and_trades_dict[next(iter(price_and_trades_dict.keys()))].index.dtype

#price_data_dict[next(iter(price_data_dict.keys()))].index.name
#price_data_dict[next(iter(price_data_dict.keys()))].index.dtype

#trade_entries_dict[next(iter(trade_entries_dict.keys()))].index.name
#trade_entries_dict[next(iter(trade_entries_dict.keys()))].index.dtype

In [None]:
# takes in 'price_data_df' DataFrames as an argument and transforms it to a new DataFrame later called 'entries'.
def create_entry_df(price_data_df:pd.DataFrame) -> pd.DataFrame:
  # Adding various columns to the original 'df'.
  price_data_df['EMA200'] = TA.EMA(price_data_df, period=200)
  price_data_df['EMA50'] = TA.EMA(price_data_df, period=50)
  price_data_df['ATR'] = TA.ATR(price_data_df)
  price_data_df['entry'] = price_data_df.shift(-1).open
  price_data_df['target'] = 0.0
  price_data_df['stop'] = 0.0
  price_data_df['risk:reward'] = 0.0
  price_data_df['target_pct'] = 0.0
  price_data_df['stop_pct'] = 0.0
  price_data_df['target_profit'] = 150
  price_data_df['position_size'] = 0.0

  # Conditions for long entry based on technical analysis indicators.
  c1 = price_data_df.close > price_data_df.EMA200
  c2 = (price_data_df.low < price_data_df.EMA50) & (price_data_df.close > price_data_df.EMA50) & (price_data_df.open > price_data_df.EMA50)
  long_entry_condition = (c1) & (c2)

  # Accesses and modifies all rows in 'df' where long_entry_condition is true, and modifies the 2nd input '' column in those rows to what is specified.
  price_data_df.loc[long_entry_condition, 'target'] = price_data_df.entry + (1.5*price_data_df.ATR)
  price_data_df.loc[long_entry_condition, 'stop'] = price_data_df.entry - price_data_df.ATR
  price_data_df.loc[long_entry_condition, 'risk:reward'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry - price_data_df.stop)
  price_data_df.loc[long_entry_condition, 'target_pct'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'stop_pct'] = -(price_data_df.entry - price_data_df.stop) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'position_size'] = price_data_df.target_profit / price_data_df.target_pct

  # Selects the subset of rows where the price data in DataFrame 'df' satisfy the long entry condition, and then creates a copy of the modified DataFrame denoted 'entries'.
  entries = price_data_df.loc[long_entry_condition][['entry', 'target', 'stop', 'risk:reward', 'target_pct', 'stop_pct', 'target_profit', 'position_size']].copy()

  # Allows for the 'entries' DataFrame to be accessed elsewhere in the code as a variable. 
  return entries