In [None]:
!pip install yfinance
!pip install finta
from finta import TA
import pandas as pd
import yfinance as yf
import glob
import numpy as np

In [None]:
# Function with a 'folder' and 'years' argument. Purpose is to download and save historic price data for each ticker as .csv and also in a dictionary.
def download_stock_price_data_and_save_to_a_dict(folder='', years=5):

  # 'tickers' is a pandas DataFrame object. 'pd.read_csv' function reads csv file from this website which contains all of the stock ticker symbols under it's "ACT Symbol" column.
  tickers = pd.read_csv('https://pkgstore.datahub.io/core/nyse-other-listings/nyse-listed_csv/data/3c88fab8ec158c3cd55145243fe5fcdf/nyse-listed_csv.csv')
  # 'symbols' is a list of symbols we made by taking all the unique symbols from the tickers .csv
  symbols = tickers['ACT Symbol'].unique()
  # empty dictionary to store all stock price DataFrames.
  price_data_dict = {}

  # loop where we go through each symbol 's' in the symbols list (first 20), and then do some things.
  for s in symbols[0:20]:
    try:

      # creates the DataFrame 'price_data_df' from the downloaded yf price data over the last years. auto_adjust accounts for stock splits and such. Takes in each stock symbol 's' one at at time.
      price_data_df = yf.download([s], auto_adjust=True, period=f'{str(years)}y')

      # If the length of the dataframe is over 100, then go ahead and do the iteration.
      if len(price_data_df) > 100:
        # make the columns lowercase, rename the index to 'datetime', save the df's to our folder, and save the df's to our dictionary.
        price_data_df.columns = price_data_df.columns.str.lower()
        price_data_df.index = price_data_df.index.rename('datetime')
        price_data_df.to_csv(f'{folder}{s}.csv')
        price_data_dict[s] = price_data_df

    # error catch
    except Exception as e:
      print(f'error with {s}')
      print(repr(e))
      continue

  # Allows for the 'price_data_dict' dictionary object to be accessed elsewhere in the code as a variable or input in another function.   
  return price_data_dict
# Download and saves price data to our price data dictionary.
price_data_dict = download_stock_price_data_and_save_to_a_dict()

In [3]:
# takes in 'price_data_df' DataFrames as an argument and transforms it to a new DataFrame later called 'entries'.
def create_entry_df(price_data_df:pd.DataFrame) -> pd.DataFrame:
  # Adding various columns to the original 'df'.
  price_data_df['EMA200'] = TA.EMA(price_data_df, period=200)
  price_data_df['EMA50'] = TA.EMA(price_data_df, period=50)
  price_data_df['ATR'] = TA.ATR(price_data_df)
  price_data_df['entry'] = price_data_df.shift(-1).open
  price_data_df['target'] = 0.0
  price_data_df['stop'] = 0.0
  price_data_df['risk:reward'] = 0.0
  price_data_df['target_pct'] = 0.0
  price_data_df['stop_pct'] = 0.0
  price_data_df['target_profit'] = 150
  price_data_df['position_size'] = 0.0

  # Conditions for long entry based on technical analysis indicators.
  c1 = price_data_df.close > price_data_df.EMA200
  c2 = (price_data_df.low < price_data_df.EMA50) & (price_data_df.close > price_data_df.EMA50) & (price_data_df.open > price_data_df.EMA50)
  long_entry_condition = (c1) & (c2)

  # Accesses and modifies all rows in 'df' where long_entry_condition is true, and modifies the 2nd input '' column in those rows to what is specified.
  price_data_df.loc[long_entry_condition, 'target'] = price_data_df.entry + (1.5*price_data_df.ATR)
  price_data_df.loc[long_entry_condition, 'stop'] = price_data_df.entry - price_data_df.ATR
  price_data_df.loc[long_entry_condition, 'risk:reward'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry - price_data_df.stop)
  price_data_df.loc[long_entry_condition, 'target_pct'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'stop_pct'] = -(price_data_df.entry - price_data_df.stop) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'position_size'] = price_data_df.target_profit / price_data_df.target_pct

  # Selects the subset of rows where the price data in DataFrame 'df' satisfy the long entry condition, and then creates a copy of the modified DataFrame denoted 'entries'.
  entries = price_data_df.loc[long_entry_condition][['entry', 'target', 'stop', 'risk:reward', 'target_pct', 'stop_pct', 'target_profit', 'position_size']].copy()

  # Allows for the 'entries' DataFrame to be accessed elsewhere in the code as a variable. 
  return entries

In [4]:
# Create empty list and dictionary for future use
filepaths = []
trade_entries_dict = {}

# Uses the 'glob' module to iterate and find all files with .csv, for each file found, the filepath is appended to the filepaths empty list [ ].
for file in glob.glob('*.csv'):
  filepaths.append(file)

# Using a for loop, we iterate through each filepath in 'filepaths' list.
for path in filepaths:
  # Extracts the file's symbol name by splitting and parsing the path at the first index before the period, and saves it to the symbol variable.
  symbol = path.split('.')[0]
  # Reads the file into a DataFrame called 'symbols_df', with the index set to 'datetime'. The parse_dates makes it so the datetime index column is properly a datetime64[ns] object type.
  symbols_df = pd.read_csv(path, index_col='datetime', parse_dates=['datetime'])
  # Calls the 'create_entry_df' function on 'symbols_df' to generate an entirely new DataFrame called 'entry_df', which has the trade entries for a single symbol.
  entry_df = create_entry_df(symbols_df)
  # It adds each DataFrame 'entry_df' which was iterated through for every symbol, and adds it to 'trade_entries_dict', with the symbol name as the key, and the value is the entry trade data.
  trade_entries_dict[symbol] = entry_df

In [5]:
# allows for all rows to be seen if = 'None'. If want less, do = 20.
pd.options.display.max_rows = None

In [None]:
trade_entries_dict

In [None]:
price_data_dict

In [10]:
# merges the price data and trade entries dictionaries. It's shorthand for creating a new dictionary from existing ones.
# for each 'key' in price_data_dict, it makes new key-value pair in the new dict. Rows are aligned based on their common index.
price_and_trades_dict = {key: pd.concat([price_data_dict[key], trade_entries_dict[key]], axis=1) for key in price_data_dict}
# might need this, converts all 'NaN' into 0:
#for key in price_and_trades_dict:
    #price_and_trades_dict[key] = price_and_trades_dict[key].fillna(0)
    
price_and_trades_dict

{'A':                   open        high         low       close    volume  \
 datetime                                                               
 2018-04-11   63.573407   64.170788   63.419239   63.669758   1391400   
 2018-04-12   64.132246   65.172855   64.026257   64.748901   2180900   
 2018-04-13   65.037961   65.201758   64.440572   64.758537   2043900   
 2018-04-16   65.355922   65.876227   65.028320   65.683517   2887800   
 2018-04-17   66.097832   66.338714   65.587165   65.943672   2682600   
 2018-04-18   66.598867   67.726190   66.146009   67.013184   2678300   
 2018-04-19   66.945739   67.051720   64.903063   65.490814   2435100   
 2018-04-20   65.558261   65.770238   64.864522   65.086136   1917200   
 2018-04-23   65.240304   66.165287   65.066869   65.587173   2260600   
 2018-04-24   63.968454   65.433017   63.631222   64.209335   3701600   
 2018-04-25   64.257527   64.633302   62.542453   63.399990   4451600   
 2018-04-26   63.804669   64.324974   63.49634

In [None]:
# .name should yield 'datetime' and .dtype should yield '<M8[ns]' which is numpy's way of saying datetime64[ns] equivilant in pandas.

#price_and_trades_dict[next(iter(price_and_trades_dict.keys()))].index.name
#price_and_trades_dict[next(iter(price_and_trades_dict.keys()))].index.dtype

#price_data_dict[next(iter(price_data_dict.keys()))].index.name
#price_data_dict[next(iter(price_data_dict.keys()))].index.dtype

#trade_entries_dict[next(iter(trade_entries_dict.keys()))].index.name
#trade_entries_dict[next(iter(trade_entries_dict.keys()))].index.dtype

In [None]:
# CURRENTLY EXPERIMENTING
# takes in 'price_data_df' DataFrames as an argument and transforms it to a new DataFrame later called 'entries'.
def create_entry_df(price_data_df:pd.DataFrame) -> pd.DataFrame:
  # Adding various columns to the original 'df'.
  price_data_df['EMA200'] = TA.EMA(price_data_df, period=200)
  price_data_df['EMA50'] = TA.EMA(price_data_df, period=50)
  price_data_df['ATR'] = TA.ATR(price_data_df)
  price_data_df['entry'] = price_data_df.shift(-1).open
  price_data_df['target'] = 0.0
  price_data_df['stop'] = 0.0
  price_data_df['risk:reward'] = 0.0
  price_data_df['target_pct'] = 0.0
  price_data_df['stop_pct'] = 0.0
  price_data_df['target_profit'] = 150
  price_data_df['position_size'] = 0.0

  # Conditions for long entry based on technical analysis indicators.
  c1 = price_data_df.close > price_data_df.EMA200
  c2 = (price_data_df.low < price_data_df.EMA50) & (price_data_df.close > price_data_df.EMA50) & (price_data_df.open > price_data_df.EMA50)
  long_entry_condition = (c1) & (c2)

  # Accesses and modifies all rows in 'df' where long_entry_condition is true, and modifies the 2nd input '' column in those rows to what is specified.
  price_data_df.loc[long_entry_condition, 'target'] = price_data_df.entry + (1.5*price_data_df.ATR)
  price_data_df.loc[long_entry_condition, 'stop'] = price_data_df.entry - price_data_df.ATR
  price_data_df.loc[long_entry_condition, 'risk:reward'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry - price_data_df.stop)
  price_data_df.loc[long_entry_condition, 'target_pct'] = (price_data_df.target - price_data_df.entry) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'stop_pct'] = -(price_data_df.entry - price_data_df.stop) / (price_data_df.entry)
  price_data_df.loc[long_entry_condition, 'position_size'] = price_data_df.target_profit / price_data_df.target_pct

  # Selects the subset of rows where the price data in DataFrame 'df' satisfy the long entry condition, and then creates a copy of the modified DataFrame denoted 'entries'.
  entries = price_data_df.loc[long_entry_condition][['entry', 'target', 'stop', 'risk:reward', 'target_pct', 'stop_pct', 'target_profit', 'position_size']].copy()

  # Allows for the 'entries' DataFrame to be accessed elsewhere in the code as a variable. 
  return entries