In [None]:
!pip install yfinance
!pip install finta
from finta import TA
import pandas as pd
import yfinance as yf
import glob
import os

In [None]:
# function with a 'folder' and 'years' argument. Purpose is to download and save price data as .csv and also in a dictionary.
def download_stock_price_data_dict(folder='', years=5):

  # tickers is a pandas DataFrame object. pd.read_csv function reads csv file from this website which contains all of the stock ticker symbols under it's "ACT Symbol" column.
  tickers = pd.read_csv('https://pkgstore.datahub.io/core/nyse-other-listings/nyse-listed_csv/data/3c88fab8ec158c3cd55145243fe5fcdf/nyse-listed_csv.csv')
  # symbols is a list of symbols we made by taking all the unique symbols from the tickers .csv
  symbols = tickers['ACT Symbol'].unique()
  # empty dictionary to store all stock price DataFrames.
  price_data_dict = {}

  # loop where we go through each symbol 's' in the symbols list (first 20), and then do some things.
  for s in symbols[0:20]:
    try:

      # creates the DataFrame 'df' from the downloaded yf price data over the last years. auto_adjust accounts for stock splits and such. Takes in each stock symbol 's' one at at time.
      df = yf.download([s], auto_adjust=True, period=f'{str(years)}y')

      # If the length of a 'df' is over 100, then go ahead and do the iteration.
      if len(df) > 100:
        # make the columns lowercase, rename the index to 'datetime', save the df's to our folder, and save the df's to our dictionary.
        df.columns = df.columns.str.lower()
        df.index = df.index.rename('datetime')
        df.to_csv(f'{folder}{s}.csv')
        price_data_dict[s] = df

    # error catch
    except Exception as e:
      print(f'error with {s}')
      print(repr(e))
      continue

  # Allows for the 'price_data_dict' dictionary object to be accessed elsewhere in the code as a variable or input in another function.   
  return price_data_dict
# Download and saves price data to our price data dictionary.
price_data_dict = download_stock_price_data_dict()

In [4]:
# takes in 'df' the price data as an argument and transforms it to a new DataFrame later called 'entries'.
def find_entries(df:pd.DataFrame) -> pd.DataFrame:
  # Adding various columns to the original 'df'.
  df['EMA200'] = TA.EMA(df, period=200)
  df['EMA50'] = TA.EMA(df, period=50)
  df['ATR'] = TA.ATR(df)
  df['entry'] = df.shift(-1).open
  df['target'] = 0.0
  df['stop'] = 0.0
  df['risk:reward'] = 0.0
  df['target_pct'] = 0.0
  df['stop_pct'] = 0.0
  df['target_profit'] = 150
  df['position_size'] = 0.0

  # Conditions for long entry based on technical analysis indicators.
  c1 = df.close > df.EMA200
  c2 = (df.low < df.EMA50) & (df.close > df.EMA50) & (df.open > df.EMA50)
  long_entry_condition = (c1) & (c2)

  # Accesses and modifies all rows in 'df' where long_entry_condition is true, and modifies the 2nd input '' column in those rows to what is specified.
  df.loc[long_entry_condition, 'target'] = df.entry + (1.5*df.ATR)
  df.loc[long_entry_condition, 'stop'] = df.entry - df.ATR
  df.loc[long_entry_condition, 'risk:reward'] = (df.target - df.entry) / (df.entry - df.stop)
  df.loc[long_entry_condition, 'target_pct'] = (df.target - df.entry) / (df.entry)
  df.loc[long_entry_condition, 'stop_pct'] = -(df.entry - df.stop) / (df.entry)
  df.loc[long_entry_condition, 'position_size'] = df.target_profit / df.target_pct

  # Selects the subset of rows where the price data in DataFrame 'df' satisfy the long entry condition, and then creates a copy of the modified DataFrame denoted 'entries'.
  entries = df.loc[long_entry_condition][['entry', 'target', 'stop', 'risk:reward', 'target_pct', 'stop_pct', 'target_profit', 'position_size']].copy()

  # Allows for the 'entries' DataFrame to be accessed elsewhere in the code as a variable. 
  return entries

In [25]:
# Create empty list and dictionary for future use
filepaths = []
trade_entries_dict = {}

# Uses the 'glob' module to iterate and find all files with .csv, for each file found, the filepath is appended to the filepaths empty list [ ].
for file in glob.glob('*.csv'):
  filepaths.append(file)

# Using a for loop, we iterate through each filepath in 'filepaths' list.
for path in filepaths:
  # Extracts the file's symbol name by splitting and parsing the path at the first index before the period, and saves it to the symbol variable.
  symbol = path.split('.')[0]
  # Reads the file into a DataFrame called 'df', with the index set to 'datetime'. The parse_dates makes it so the datetime index column is properly a datetime64[ns] object type.
  df = pd.read_csv(path, index_col='datetime', parse_dates=['datetime'])
  # Calls the 'find_entries' function on 'df' to generate an entirely new DataFrame called 'entry_df', which has the trade entries for a single symbol.
  entry_df = find_entries(df)
  # It adds each DataFrame 'entry_df' which was iterated through for every symbol, and adds it to the 'trad_entries_dict' dictionary, with the symbol name as the key, and the value is the entry trade data.
  trade_entries_dict[symbol] = entry_df

In [75]:
# allows for all rows to be seen if = 'None'. If want less, do = 20.
pd.options.display.max_rows = 20

In [None]:
trade_entries_dict

In [None]:
price_data_dict

In [70]:
# merges the price data and trade entries dictionaries. It's shorthand for creating a new dictionary from existing ones.
# for each key 'key' in price_data_dict, it makes new key-value pair in the new dict. Rows are aligned based on their common index.
price_and_trades_dict = {key: pd.concat([price_data_dict[key], trade_entries_dict[key]], axis=1) for key in price_data_dict}

In [None]:
price_and_trades_dict
#test