In [1]:
import os
import random
import csv
import datetime

***
Gets ****a random ticker**** from data CSVs located in the path
- path: (str) file path of the data directory
- return: (str) the ticker symbol

In [2]:
def randomTicker(path):
    #get all files in directory
    #extract name of files
    files = os.listdir(path)
    tickers = []
    for f in files:
        if '.csv' in f:
            ticker = f.split('.')[0]
            tickers.append(ticker)
    #get random file
    T = random.choice(tickers)
    return T

***
Gets ****a list**** of tickers from data CSVs located in the path
- path: (str) file path of the data directory
- return: (list) contains all the tickers

In [3]:
def allTickers(path):
    #get all files in directory
    #extract name of files
    files = os.listdir(path)
    tickers = []
    for f in files:
        if '.csv' in f:
            ticker = f.split('.')[0]
            tickers.append(ticker)
    return tickers

***
Gets two dates to run back test for
- path: (str) file path of the data directory
- t: (str) the ticker symbol (from randomTicker function)
- fmt: (str) the format of dates in CSV files
- minDays: (int) minimum number of days required for backtesting
- maxDays: (int) maximum number of days required for backtesting
- return:
    1. (datetime obj) the date to start backtesting
    2. (datetime obj) the date to end backtesting
    3. (int) the number of days between start and end dates

In [4]:
def twoDates(path, t, fmt, minDays=365, maxDays=2500):
    file = os.path.join(path, t+'.csv')
    #iterate through csv and get every date from column 0
    datelist = []
    with open(file, 'r') as f:
        reader = csv.reader(f)
        for r in reader:
            datelist.append(r[0])
    #get first and last date from file
    stDateSt = datelist[1]
    lstDateSt = datelist[-1]
    #convert to datetime objects
    stDate = datetime.datetime.strptime(stDateSt, fmt)
    lstDate = datetime.datetime.strptime(lstDateSt, fmt)
    #get random date to start and end
    daysBetween = lstDate - stDate
    rand1 = 0
    rand2 = 0
    while abs(rand1-rand2) < minDays or abs(rand1-rand2) > maxDays:  #ensure at least a year of backtesting
        rand1 = random.randrange(int(daysBetween.days))
        rand2 = random.randrange(int(daysBetween.days))
    #get date range (datetime objects)
    startDate = stDate + datetime.timedelta(days = min(rand1, rand2))
    endDate = stDate + datetime.timedelta(days = max(rand1, rand2))
    intervalLen = endDate - startDate
    return startDate, endDate, intervalLen.days

***
Runs the randomization (randomTicker) (skips if a ticker is specified)
* path: (str) file path of the data directory
* d: (dict) contains datetime formats
* iterator: (str) the datetime format key
* t (*opt*): (str) the ticker symbol
* minDays (*opt*): minimum number of days required for backtesting
* maxDays *(opt)*: maximum number of days required for backtesting
* return:
    1. (str) the ticker symbol
    2. (datetime obj) the date to start backtesting
    3. (datetime obj) the date to end backtesting
    4. (str) date format
    5. (int) the number of days between start and end dates

In [5]:
def main(path, d, iterator, t=None, minDays=365, maxDays=1500):
    if t == None:
        ticker = randomTicker(path)
    else:
        #make sure ticker is in data
        checkPath = os.path.join(path, t+'.csv')
        if os.path.exists(checkPath):
            ticker = t
        else:
            print("No data found for {} in {}".format(t, path))
            ticker = randomTicker(path)
    fmt = d[iterator]
    start, end, days = twoDates(path, ticker, fmt, minDays, maxDays)
    return ticker, start, end, fmt, days