In [455]:
import pandas as pd, numpy as np, datetime

In [456]:
filename = 'AAPL.csv'
ticker = filename.split('.')[0]
df = pd.read_csv('./data_clean/' + filename, index_col=0, sep=',')

In [457]:
#turn index col (string) into date col

def date_parse(s): #date strings are in form '2015-01-01'
    d = s.split('-')
    return datetime.date(int(d[0]), int(d[1]), int(d[2]))

df.index = df.index.map(date_parse)

In [458]:
#get categorical columns for closing price and trading volume

def price_categorize(col): #start with price. don't replace price values with categories because they're needed to calculate reward/profit
    
    new_col = np.empty(len(col), dtype="<U4")
    for i in range(len(col)):
        if i == 0 or col[i] == col[i-1]:
            new_col[i] = 'same'
            
        elif col[i] < col[i-1]:
            new_col[i] = 'down'
            
        else:
            new_col[i] = 'up'
            
    return new_col

price_movements = price_categorize(df[price_config])

#get categorical volume column
quantiles = (df.volume.quantile(1/3), df.volume.quantile(2/3))

tri = [0] * len(df)

for i in range(len(df['volume'])): #replace volume counts with categories
    if df['volume'][i] <= quantiles[0]:
        tri[i] = 'low'
    elif df['volume'][i] <= quantiles[1]:
        tri[i] = 'med'
    else:
        tri[i] = 'high'

df = df.assign(volume=tri)

In [459]:
def buy(stock_df, portfolio, date, capital, num_contracts=1): 
    
    transaction_cost = stock_df.at[date, price_config] * (num_contracts * 100)
    
    if capital < transaction_cost:
        return False #can't buy if you lack the funds
    
    '''if ticker not in portfolio.keys(): #update portfolio
        portfolio[ticker] = num_contracts
        
    else:
        portfolio[ticker] += num_contracts'''
    
    current_holdings = portfolio[ticker]
        
    return {ticker: current_holdings+num_contracts}, -transaction_cost #approve transaction, return updated one-stock portfolio and profit/loss (-1 * transaction_cost)
    
def sell(stock_df, portfolio, date, num_contracts=1):  #capital is irrelevant for selling (you can sell with any amount of capital, as long as you have enough shares)
    
    if not portfolio[ticker] or num_contracts > portfolio[ticker]:
        return False #can't sell shares you don't own
    
    gain = stock_df.at[date, price_config] * (num_contracts * 100) 
    
    #portfolio[ticker] -= num_contracts #update portfolio
    
    current_holdings = portfolio[ticker]
    
    return {ticker: current_holdings-num_contracts}, gain #approve transaction, return updated one-stock portfolio and gain

In [460]:
import itertools

trend_features = ('up', 'down', 'same')
vol_features = ('low', 'med', 'high')

state_features = ( trend_features, vol_features ) #will expand over time
states = list(itertools.product(*state_features))

print(states) #all combinations of one element from each state feature tuple
#list(itertools.product(*[ ('up', 'down', 'same'), ('low', 'med', 'high'), ('good', 'ok', 'bad'), ('nay', 'kay', 'yay') ]))

[('up', 'low'), ('up', 'med'), ('up', 'high'), ('down', 'low'), ('down', 'med'), ('down', 'high'), ('same', 'low'), ('same', 'med'), ('same', 'high')]


In [484]:
import random
def arg_max(row): #pandas idxmax returns the first index of the max value in case of a tie; i want a random index instead
    maxes = row[row == row.max()]
    return random.choice(maxes.index)
    


'''def get_trend(price1, price2):
    if price2 > price1:
        return 'up' #price went up
    elif price2 < price1:
        return 'down' #price went down
    return 'same' #price stayed the same'''
    

actions = ('buy', 'sell', 'hold')

num_portfolios = 1

#initial_trend = get_trend(df.at[df.index[0], price_config], df.at[df.index[1], price_config])

initial_trend = price_movements[1]
initial_vol = df.at[df.index[1], 'volume']
initial_state = (initial_trend, initial_vol)

price_config = 'close' #use close to determine price
bank = 100000 #set initial investor capital
portfolio = {ticker: 0} #initialize empty portfolio
num_contracts = 1

training_set = df[df.index < datetime.date(2019, 1, 1)]
test_set = df[df.index >= datetime.date(2019, 1, 1)]

def q_learn(df, ticker, bank, risk_tolerance, alpha=0.01, e=1, y=1):
    
    #initialize
    q = pd.DataFrame(0.0, columns=actions, index=pd.MultiIndex.from_tuples(states))
    portfolio = {ticker: 0}
    i = 1
    
    #for testing
    #state_count = {}
    #for state in states:
    #    state_count[state] = 0
    
    action_list = []
    
    for pf in range(num_portfolios): #iterate through the number of portfolios you're processing
        
        s = initial_state
        
        #for testing

        while bank > 0 and i < len(df)-1: #stop on the day before the last (we use the next day's profit/loss to get the reward):
            
            #testing
            #state_count[s] += 1
            
            #y = 5 ** (-i/150) #weight decay function
            
            current_date = df.index[i]
            current_price = df.at[current_date, price_config]
            next_date = df.index[i+1]
            next_price = df.at[next_date, price_config]
            
            #list out what actions you can take (can you buy and/or sell?)
            possible_actions = ['hold']
            
            long = buy(df, portfolio, current_date, bank) #long position
            if long: #transaction succeeds!
                possible_actions.append('buy')
                
            short = sell(df, portfolio, current_date)
            if short: #transaction succeeds!
                possible_actions.append('sell')
            
            
            #a = q.loc[s][possible_actions].idxmax()
            
            
            #factor in epsilon
            if random.random() < e:
                a = random.choice(possible_actions)
            
            else:
                a = arg_max(q.loc[s][possible_actions])
            
            
            #testing
            action_list.append(a)
                
            if a == 'buy':
                portfolio[ticker] += num_contracts #update number of shares held
                bank += long[1] #update account
                reward = (next_price - current_price) * num_contracts * 100 #reward should be profit from next day. if you buy and the price went up, you get a positive reward
            
            if a == 'hold':
                reward = -risk_tolerance #user-specified penalty on holding
            
            if a == 'sell':    
                portfolio[ticker] -= num_contracts #update number of shares held
                bank += short[1] #update account
                reward = (current_price - next_price) * num_contracts * 100 #reward should be profit from next day. if you sell and the price goes down, you get a positive reward
            
            #trend = get_trend(df.at[current_date, price_config], df.at[next_date, price_config])
            trend = price_movements[i+1]
            vol = df.at[next_date, 'volume']
            
            #q_next = q.at[s_next, q.loc[s_next].idxmax()] #should we check if buying or selling is even possible?
            
            
            #list out what actions you can take on the next trading day
            
            possible_actions = ['hold']
            
            long = buy(df, portfolio, next_date, bank) #long position
            if long: #transaction succeeds!
                possible_actions.append('buy')
                
            short = sell(df, portfolio, next_date)
            if short: #transaction succeeds!
                possible_actions.append('sell')
            
            s_next = (trend, vol)
            q_next = q.at[s_next, arg_max(q.loc[s_next][possible_actions])]
            
            #print('before', q.at[s,a])
            q_inc = alpha * ( reward + y * q_next - q.at[s, a])
            #print(a, q_inc)
            
            q.at[s, a] += q_inc
            #print('after', q.at[s,a])
            
            #print(q)
            
            s = s_next
            i += 1
    
    #print(state_count)
    
    #print(action_list)
    bank += next_price * portfolio[ticker] * 100
    return bank, portfolio, q
                    

In [485]:
#function to decay weight, y: 5 ** (-i/150)
train = q_learn(training_set, ticker, bank, 25)

In [486]:
print (train[0], train[1], '\n\n', train[2]) #learned q values

130494.99664306646 {'AAPL': 8} 

                  buy       sell       hold
up   low    4.309297 -26.357830 -11.089901
     med   -3.739815   4.920890 -10.668405
     high   1.932519   8.557609 -10.497309
down low  -11.853338  -9.724246  -9.456253
     med   -3.594570   2.766364  -9.367753
     high   6.136592   6.273947 -10.411421
same low    0.000000   0.000000  -0.216822
     med    0.632900   0.000000   0.000000
     high   0.000000   0.000000   0.000000


In [487]:
test_trends = price_movements[len(training_set):]

initial_trend = test_trends[0]
initial_vol = test_set.at[test_set.index[0], 'volume']
initial_state = (initial_trend, initial_vol)

def q_test(test_data, ticker, bank, q):
    
    #initialize
    portfolio = {ticker: 0}
    i = 0
    
    action_list = []
    
    for pf in range(num_portfolios): #iterate through the number of portfolios you're processing
        
        s = initial_state
        
        #for testing

        while bank > 0 and i < len(test_data)-1: #stop on the day before the last (we use the next day's profit/loss to get the reward):
            
            current_date = test_data.index[i]
            current_price = test_data.at[current_date, price_config]
            next_date = test_data.index[i+1]
            next_price = test_data.at[next_date, price_config]
            
            #list out what actions you can take (can you buy and/or sell?)
            possible_actions = ['hold']
            
            long = buy(df, portfolio, current_date, bank) #long position
            if long: #transaction succeeds!
                possible_actions.append('buy')
                
            short = sell(df, portfolio, current_date)
            if short: #transaction succeeds!
                possible_actions.append('sell')
            
            
            a = arg_max(q.loc[s][possible_actions])
            
            #testing
            action_list.append(a)
                
            if a == 'buy':
                portfolio[ticker] += num_contracts #update number of shares held
                bank += long[1] #update account
                
            if a == 'sell':    
                portfolio[ticker] -= num_contracts #update number of shares held
                bank += short[1] #update account
            
            #trend = get_trend(df.at[current_date, price_config], df.at[next_date, price_config])
            trend = price_movements[i+1]
            vol = df.at[next_date, 'volume']
            s = (trend, vol) #get next state
            
            i += 1
    
    #print(state_count)
    #print(action_list)
    bank += next_price * portfolio[ticker] * 100
    
    return bank, portfolio
                    

In [488]:
test_run = q_test(test_set, ticker, bank, train[2])
print(test_run)

(117626.98059082031, {'AAPL': 1})


In [493]:
profits = []

for i in range(100):
    train_vals = q_learn(training_set, ticker, bank, i)
    test_run = q_test(test_set, ticker, bank, train_vals[2])
    ret_dict = {'profit': test_run[0] - bank, 'shares_held': test_run[1][ticker] * 100}
    profits.append( {i: ret_dict} )

In [494]:
print(profits)

[{'profit': 32509.994506835967, 'shares_held': 500}, {'profit': 23988.98925781253, 'shares_held': 100}, {'profit': 4148.997497558579, 'shares_held': 300}, {'profit': 51127.992248535185, 'shares_held': 500}, {'profit': 16844.98748779297, 'shares_held': 0}, {'profit': 22469.003295898467, 'shares_held': 500}, {'profit': 8123.9837646484375, 'shares_held': 100}, {'profit': 71676.98516845697, 'shares_held': 100}, {'profit': 53560.00518798828, 'shares_held': 500}, {'profit': 53859.00421142578, 'shares_held': 600}, {'profit': 76049.99847412106, 'shares_held': 100}, {'profit': 77013.98468017572, 'shares_held': 0}, {'profit': 38432.00683593753, 'shares_held': 500}, {'profit': 45305.0048828125, 'shares_held': 500}, {'profit': 20137.98980712892, 'shares_held': 400}, {'profit': 8947.001647949219, 'shares_held': 300}, {'profit': 26313.00048828125, 'shares_held': 300}, {'profit': 40213.995361328125, 'shares_held': 100}, {'profit': 49037.99285888672, 'shares_held': 600}, {'profit': 25180.999755859375,