In [1]:
!pip install -r requirements.txt

Collecting argparse (from -r requirements.txt (line 5))
  Using cached argparse-1.4.0-py2.py3-none-any.whl.metadata (2.8 kB)
Using cached argparse-1.4.0-py2.py3-none-any.whl (23 kB)
Installing collected packages: argparse
Successfully installed argparse-1.4.0


In [20]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
import torch
import torch.nn as nn
import torch.nn.functional as F
import argparse
import math
from tqdm import tqdm
import numpy as np
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score

import robin_stocks.robinhood as r
from robin_stocks import *
from datetime import datetime, time, date
import pytz
import requests,json
from scipy import stats
from dataclasses import dataclass
from typing import Union
import threading
import time as t
pst = pytz.timezone('US/Pacific')

login = r.login(username='USERNAME', password='PASS') #robinhood account necessary for making trades and getting new data
account = r.load_account_profile()

current_date = datetime.now()#.date

tickerList = ['AMZN','COST','GOOGL']

In [22]:

def npo2(len):
    """
    Returns the next power of 2 above len
    """

    return 2 ** math.ceil(math.log2(len))

def pad_npo2(X):
    """
    Pads input length dim to the next power of 2

    Args:
        X : (B, L, D, N)

    Returns:
        Y : (B, npo2(L), D, N)
    """

    len_npo2 = npo2(X.size(1))
    pad_tuple = (0, 0, 0, 0, 0, len_npo2 - X.size(1))
    return F.pad(X, pad_tuple, "constant", 0)

class PScan(torch.autograd.Function):
    @staticmethod
    def pscan(A, X):
        # A : (B, D, L, N)
        # X : (B, D, L, N)

        # modifies X in place by doing a parallel scan.
        # more formally, X will be populated by these values :
        # H[t] = A[t] * H[t-1] + X[t] with H[0] = 0
        # which are computed in parallel (2*log2(T) sequential steps (ideally), instead of T sequential steps)

        # only supports L that is a power of two (mainly for a clearer code)
        
        B, D, L, _ = A.size()
        num_steps = int(math.log2(L))

        # up sweep (last 2 steps unfolded)
        Aa = A
        Xa = X
        for _ in range(num_steps-2):
            T = Xa.size(2)
            Aa = Aa.view(B, D, T//2, 2, -1)
            Xa = Xa.view(B, D, T//2, 2, -1)
            
            Xa[:, :, :, 1].add_(Aa[:, :, :, 1].mul(Xa[:, :, :, 0]))
            Aa[:, :, :, 1].mul_(Aa[:, :, :, 0])

            Aa = Aa[:, :, :, 1]
            Xa = Xa[:, :, :, 1]

        # we have only 4, 2 or 1 nodes left
        if Xa.size(2) == 4:
            Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 0]))
            Aa[:, :, 1].mul_(Aa[:, :, 0])

            Xa[:, :, 3].add_(Aa[:, :, 3].mul(Xa[:, :, 2] + Aa[:, :, 2].mul(Xa[:, :, 1])))
        elif Xa.size(2) == 2:
            Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 0]))
            return
        else:
            return

        # down sweep (first 2 steps unfolded)
        Aa = A[:, :, 2**(num_steps-2)-1:L:2**(num_steps-2)]
        Xa = X[:, :, 2**(num_steps-2)-1:L:2**(num_steps-2)]
        Xa[:, :, 2].add_(Aa[:, :, 2].mul(Xa[:, :, 1]))
        Aa[:, :, 2].mul_(Aa[:, :, 1])

        for k in range(num_steps-3, -1, -1):
            Aa = A[:, :, 2**k-1:L:2**k]
            Xa = X[:, :, 2**k-1:L:2**k]

            T = Xa.size(2)
            Aa = Aa.view(B, D, T//2, 2, -1)
            Xa = Xa.view(B, D, T//2, 2, -1)

            Xa[:, :, 1:, 0].add_(Aa[:, :, 1:, 0].mul(Xa[:, :, :-1, 1]))
            Aa[:, :, 1:, 0].mul_(Aa[:, :, :-1, 1])

    @staticmethod
    def pscan_rev(A, X):
        B, D, L, _ = A.size()
        num_steps = int(math.log2(L))

        # up sweep (last 2 steps unfolded)
        Aa = A
        Xa = X
        for _ in range(num_steps-2):
            T = Xa.size(2)
            Aa = Aa.view(B, D, T//2, 2, -1)
            Xa = Xa.view(B, D, T//2, 2, -1)
                    
            Xa[:, :, :, 0].add_(Aa[:, :, :, 0].mul(Xa[:, :, :, 1]))
            Aa[:, :, :, 0].mul_(Aa[:, :, :, 1])

            Aa = Aa[:, :, :, 0]
            Xa = Xa[:, :, :, 0]

        # we have only 4, 2 or 1 nodes left
        if Xa.size(2) == 4:
            Xa[:, :, 2].add_(Aa[:, :, 2].mul(Xa[:, :, 3]))
            Aa[:, :, 2].mul_(Aa[:, :, 3])

            Xa[:, :, 0].add_(Aa[:, :, 0].mul(Xa[:, :, 1].add(Aa[:, :, 1].mul(Xa[:, :, 2]))))
        elif Xa.size(2) == 2:
            Xa[:, :, 0].add_(Aa[:, :, 0].mul(Xa[:, :, 1]))
            return
        else:
            return

        # down sweep (first 2 steps unfolded)
        Aa = A[:, :, 0:L:2**(num_steps-2)]
        Xa = X[:, :, 0:L:2**(num_steps-2)]
        Xa[:, :, 1].add_(Aa[:, :, 1].mul(Xa[:, :, 2]))
        Aa[:, :, 1].mul_(Aa[:, :, 2])

        for k in range(num_steps-3, -1, -1):
            Aa = A[:, :, 0:L:2**k]
            Xa = X[:, :, 0:L:2**k]

            T = Xa.size(2)
            Aa = Aa.view(B, D, T//2, 2, -1)
            Xa = Xa.view(B, D, T//2, 2, -1)

            Xa[:, :, :-1, 1].add_(Aa[:, :, :-1, 1].mul(Xa[:, :, 1:, 0]))
            Aa[:, :, :-1, 1].mul_(Aa[:, :, 1:, 0])

    @staticmethod
    def forward(ctx, A_in, X_in):
        L = X_in.size(1)

        # cloning is requiered because of the in-place ops
        if L == npo2(L):
            A = A_in.clone()
            X = X_in.clone()
        else:
            # pad tensors (and clone btw)
            A = pad_npo2(A_in) # (B, npo2(L), D, N)
            X = pad_npo2(X_in) # (B, npo2(L), D, N)
        
        # prepare tensors
        A = A.transpose(2, 1) # (B, D, npo2(L), N)
        X = X.transpose(2, 1) # (B, D, npo2(L), N)

        # parallel scan (modifies X in-place)
        PScan.pscan(A, X)

        ctx.save_for_backward(A_in, X)
        
        # slice [:, :L] (cut if there was padding)
        return X.transpose(2, 1)[:, :L]
    
    @staticmethod
    def backward(ctx, grad_output_in):
        A_in, X = ctx.saved_tensors

        L = grad_output_in.size(1)

        # cloning is requiered because of the in-place ops
        if L == npo2(L):
            grad_output = grad_output_in.clone()
            # the next padding will clone A_in
        else:
            grad_output = pad_npo2(grad_output_in) # (B, npo2(L), D, N)
            A_in = pad_npo2(A_in) # (B, npo2(L), D, N)

        # prepare tensors
        grad_output = grad_output.transpose(2, 1)
        A_in = A_in.transpose(2, 1) # (B, D, npo2(L), N)
        A = torch.nn.functional.pad(A_in[:, :, 1:], (0, 0, 0, 1)) # (B, D, npo2(L), N) shift 1 to the left (see hand derivation)

        # reverse parallel scan (modifies grad_output in-place)
        PScan.pscan_rev(A, grad_output)

        Q = torch.zeros_like(X)
        Q[:, :, 1:].add_(X[:, :, :-1] * grad_output[:, :, 1:])

        return Q.transpose(2, 1)[:, :L], grad_output.transpose(2, 1)[:, :L]
    
pscan = PScan.apply

@dataclass
class MambaConfig:
    d_model: int # D
    n_layers: int
    dt_rank: Union[int, str] = 'auto'
    d_state: int = 16 # N in paper/comments
    expand_factor: int = 2 # E in paper/comments
    d_conv: int = 4

    dt_min: float = 0.001
    dt_max: float = 0.1
    dt_init: str = "random" # "random" or "constant"
    dt_scale: float = 1.0
    dt_init_floor = 1e-4

    bias: bool = False
    conv_bias: bool = True

    pscan: bool = True # use parallel scan mode or sequential mode when training

    def __post_init__(self):
        self.d_inner = self.expand_factor * self.d_model 

        if self.dt_rank == 'auto':
            self.dt_rank = math.ceil(self.d_model / 16)

class Mamba(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.config = config

        self.layers = nn.ModuleList([ResidualBlock(config) for _ in range(config.n_layers)])
        self.norm_f = RMSNorm(config.d_model)

    def forward(self, x):


        for layer in self.layers:
            x = layer(x)

        x = self.norm_f(x)

        return x
    
    def step(self, x, caches):


        for i, layer in enumerate(self.layers):
            x, caches[i] = layer.step(x, caches[i])

        return x, caches

class ResidualBlock(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.mixer = MambaBlock(config)
        self.norm = RMSNorm(config.d_model)

    def forward(self, x):


        output = self.mixer(self.norm(x)) + x
        return output
    
    def step(self, x, cache):

        output, cache = self.mixer.step(self.norm(x), cache)
        output = output + x
        return output, cache

class MambaBlock(nn.Module):
    def __init__(self, config: MambaConfig):
        super().__init__()

        self.config = config

        # projects block input from D to 2*ED (two branches)
        self.in_proj = nn.Linear(config.d_model, 2 * config.d_inner, bias=config.bias)

        self.conv1d = nn.Conv1d(in_channels=config.d_inner, out_channels=config.d_inner, 
                              kernel_size=config.d_conv, bias=config.conv_bias, 
                              groups=config.d_inner,
                              padding=config.d_conv - 1)
        
        # projects x to input-dependent Δ, B, C
        self.x_proj = nn.Linear(config.d_inner, config.dt_rank + 2 * config.d_state, bias=False)

        # projects Δ from dt_rank to d_inner
        self.dt_proj = nn.Linear(config.dt_rank, config.d_inner, bias=True)

        # dt initialization
        # dt weights
        dt_init_std = config.dt_rank**-0.5 * config.dt_scale
        if config.dt_init == "constant":
            nn.init.constant_(self.dt_proj.weight, dt_init_std)
        elif config.dt_init == "random":
            nn.init.uniform_(self.dt_proj.weight, -dt_init_std, dt_init_std)
        else:
            raise NotImplementedError
        
        # dt bias
        dt = torch.exp(
            torch.rand(config.d_inner) * (math.log(config.dt_max) - math.log(config.dt_min)) + math.log(config.dt_min)
        ).clamp(min=config.dt_init_floor)
        inv_dt = dt + torch.log(-torch.expm1(-dt)) 
        with torch.no_grad():
            self.dt_proj.bias.copy_(inv_dt)

        A = torch.arange(1, config.d_state + 1, dtype=torch.float32).repeat(config.d_inner, 1)
        self.A_log = nn.Parameter(torch.log(A))
        self.D = nn.Parameter(torch.ones(config.d_inner))


        self.out_proj = nn.Linear(config.d_inner, config.d_model, bias=config.bias)

    def forward(self, x):


        _, L, _ = x.shape

        xz = self.in_proj(x) # (B, L, 2*ED)
        x, z = xz.chunk(2, dim=-1) # (B, L, ED), (B, L, ED)

        # x branch
        x = x.transpose(1, 2) # (B, ED, L)
        x = self.conv1d(x)[:, :, :L] # depthwise convolution over time, with a short filter
        x = x.transpose(1, 2) # (B, L, ED)

        x = F.silu(x)
        y = self.ssm(x)

        # z branch
        z = F.silu(z)

        output = y * z
        output = self.out_proj(output) # (B, L, D)

        return output
    
    def ssm(self, x):


        A = -torch.exp(self.A_log.float()) # (ED, N)
        D = self.D.float()

        deltaBC = self.x_proj(x) 

        delta, B, C = torch.split(deltaBC, [self.config.dt_rank, self.config.d_state, self.config.d_state], dim=-1) # (B, L, dt_rank), (B, L, N), (B, L, N)
        delta = F.softplus(self.dt_proj(delta)) # (B, L, ED)

        if self.config.pscan:
            y = self.selective_scan(x, delta, A, B, C, D)
        else:
            y = self.selective_scan_seq(x, delta, A, B, C, D)

        return y
    
    def selective_scan(self, x, delta, A, B, C, D):

        deltaA = torch.exp(delta.unsqueeze(-1) * A) # (B, L, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2) # (B, L, ED, N)

        BX = deltaB * (x.unsqueeze(-1)) # (B, L, ED, N)
        
        hs = pscan(deltaA, BX)

        y = (hs @ C.unsqueeze(-1)).squeeze(3) # (B, L, ED, N) @ (B, L, N, 1) -> (B, L, ED, 1)

        y = y + D * x

        return y
    
    def selective_scan_seq(self, x, delta, A, B, C, D):

        _, L, _ = x.shape

        deltaA = torch.exp(delta.unsqueeze(-1) * A) # (B, L, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(2) # (B, L, ED, N)

        BX = deltaB * (x.unsqueeze(-1)) # (B, L, ED, N)

        h = torch.zeros(x.size(0), self.config.d_inner, self.config.d_state, device=deltaA.device) # (B, ED, N)
        hs = []

        for t in range(0, L):
            h = deltaA[:, t] * h + BX[:, t]
            hs.append(h)
            
        hs = torch.stack(hs, dim=1) # (B, L, ED, N)

        y = (hs @ C.unsqueeze(-1)).squeeze(3) # (B, L, ED, N) @ (B, L, N, 1) -> (B, L, ED, 1)

        y = y + D * x

        return y

    def step(self, x, cache):
        # x : (B, D)
        # cache : (h, inputs)
                # h : (B, ED, N)
                # inputs : (B, ED, d_conv-1)
        
        # y : (B, D)
        # cache : (h, inputs)
        
        h, inputs = cache
        
        xz = self.in_proj(x) # (B, 2*ED)
        x, z = xz.chunk(2, dim=1) # (B, ED), (B, ED)

        # x branch
        x_cache = x.unsqueeze(2)
        x = self.conv1d(torch.cat([inputs, x_cache], dim=2))[:, :, self.config.d_conv-1] # (B, ED)

        x = F.silu(x)
        y, h = self.ssm_step(x, h)

        # z branch
        z = F.silu(z)

        output = y * z
        output = self.out_proj(output) # (B, D)

        # prepare cache for next call
        inputs = torch.cat([inputs[:, :, 1:], x_cache], dim=2) # (B, ED, d_conv-1)
        cache = (h, inputs)
        
        return output, cache

    def ssm_step(self, x, h):
        # x : (B, ED)
        # h : (B, ED, N)

        # y : (B, ED)
        # h : (B, ED, N)

        A = -torch.exp(self.A_log.float()) # (ED, N) # todo : ne pas le faire tout le temps, puisque c'est indépendant de la timestep
        D = self.D.float()
        # TODO remove .float()

        deltaBC = self.x_proj(x) # (B, dt_rank+2*N)

        delta, B, C = torch.split(deltaBC, [self.config.dt_rank, self.config.d_state, self.config.d_state], dim=-1) # (B, dt_rank), (B, N), (B, N)
        delta = F.softplus(self.dt_proj(delta)) # (B, ED)

        deltaA = torch.exp(delta.unsqueeze(-1) * A) # (B, ED, N)
        deltaB = delta.unsqueeze(-1) * B.unsqueeze(1) # (B, ED, N)

        BX = deltaB * (x.unsqueeze(-1)) # (B, ED, N)

        if h is None:
            h = torch.zeros(x.size(0), self.config.d_inner, self.config.d_state, device=deltaA.device) # (B, ED, N)

        h = deltaA * h + BX # (B, ED, N)

        y = (h @ C.unsqueeze(-1)).squeeze(2) # (B, ED, N) @ (B, N, 1) -> (B, ED, 1)

        y = y + D * x

        # todo : pq h.squeeze(1) ??
        return y, h.squeeze(1)

# taken straight from https://github.com/johnma2006/mamba-minimal/blob/master/model.py
class RMSNorm(nn.Module):
    def __init__(self, d_model: int, eps: float = 1e-5):
        super().__init__()

        self.eps = eps
        self.weight = nn.Parameter(torch.ones(d_model))

    def forward(self, x):
        output = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) * self.weight

        return output

In [24]:
#Needs nasdaq data link api to function.

#import nasdaqdatalink  as ndl
#ndl.ApiConfig.api_key  = 'API KEY'

def refreshData():
    spy = ndl.get_table('SHARADAR/SFP', ticker=['SPY'],date = { 'gte': '1996-01-01', 'lte': f'{current_date}' })
    sp = pd.DataFrame(spy)
    sp = sp.drop(columns=['closeunadj','lastupdated','ticker'])
    sp = sp.sort_values(by='date', ascending=True)
    # Convert data types spy
    sp['date'] = pd.to_datetime(sp['date'])
    sp['open'] = sp['open'].astype(float)
    sp['high'] = sp['high'].astype(float)
    sp['low'] = sp['low'].astype(float)
    sp['close'] = sp['close'].astype(float)
    sp['OHLC'] = round((sp['open']+sp['high']+sp['low']+sp['close'])/4,2)
    sp = sp[['date','OHLC']]
    sp = sp[1:]
    sp.to_csv('SPY.csv')
    for i in tickerList:
        data = ndl.get_table('SHARADAR/SEP', ticker=[i],date = { 'gte': '1996-01-01', 'lte': f'{current_date}' })
        df = pd.DataFrame(data)
        #df.reset_index(inplace=True)
        df = df.drop(columns=['closeunadj','lastupdated','ticker'])
        df = df.sort_values(by='date', ascending=True)
        # Convert data types
        df['date'] = pd.to_datetime(df['date'])
        df['open'] = df['open'].astype(float)
        df['high'] = df['high'].astype(float)
        df['low'] = df['low'].astype(float)
        df['close'] = df['close'].astype(float)
        df = pd.merge(df, sp, on='date', how='outer')
        #print(df.info())
        last_date_str = df.iloc[-1]['date']  # Adjust the column name if necessary
        #last_date = datetime.strptime(last_date_str, '%Y-%m-%d')
        # Adjust the date format if necessary
        daily_info = r.get_fundamentals('SPY')
        quote = r.get_quotes('SPY')
        copen = daily_info[0]['open']
        high = daily_info[0]['high']
        low = daily_info[0]['low']
        close = quote[0]['last_trade_price']
        OHLC = round((float(copen) + float(high) + float(low) + float(close))/4,2)
        daily_info = r.get_fundamentals(i)
        quote = r.get_quotes(i)
            #build row of today's data
        date = quote[0]['venue_last_trade_time']
        copen = daily_info[0]['open']
        high = daily_info[0]['high']
        low = daily_info[0]['low']
        close = quote[0]['last_trade_price']
        volume = daily_info[0]['volume']
        new_row = pd.DataFrame({
        'date': [current_date.date()],
        'open': [copen],
        'high': [high],
        'low': [low],
        'close': [close],
        'closeadj': [close],
        'volume': [volume],
        'OHLC': [OHLC]
        })
        df = pd.concat([df, new_row], ignore_index=True)
        df['open'] = df['open'].astype(float)
        df['high'] = df['high'].astype(float)
        df['low'] = df['low'].astype(float)
        df['close'] = df['close'].astype(float)
        #df = df.ffill()
        #df = df.bfill()
        df['change'] = round(df['close'].shift(1) - df['close'],2)
        df['pct_chg'] = round((df['change'] / df['close'].shift(1)) * 100, 2)
        df['date'] = pd.to_datetime(df['date'])
        nstd=[]
        pstd=[]
        lin_reg=[]
        for s in range(100,len(df['date'])):
            x = df['date'].iloc[s-100:s].index
            y = np.round(df['close'].iloc[s-100:s].values,2)
            m,c,*_= stats.linregress(x,y)
            y_fit = m * x + c
            lin_reg.append(y_fit[-1])
        df = df[100:]
        print(len(df))
        df['linreg']= lin_reg
        df['test'] = df['linreg'] / df['linreg'].shift(1)
        print(len(df))
        df = df[df['test'] <= 1.04]
        df = df[df['test'] >= .93]
        print(len(df))
        df = df.drop(columns=['test'])
        df.to_csv(f'{i}.csv', index=False)

In [26]:
%matplotlib widget

num=225
parser = argparse.ArgumentParser()
parser.add_argument('--use-cuda', default=True,
                    help='CUDA training.')
parser.add_argument('--seed', type=int, default=1, help='Random seed.')
parser.add_argument('--epochs', type=int, default=100,
                    help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.004,
                    help='Learning rate.')
parser.add_argument('--wd', type=float, default=1e-5,
                    help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=64,#16 best = 32 64
                    help='Dimension of representations')
parser.add_argument('--layer', type=int, default=16, #2 best = 8 16
                    help='Num of layers')
parser.add_argument('--n-test', type=int, default=num+2,#300
                    help='Size of test set')
parser.add_argument('--ts-code', type=str, default="COST",
                    help='Stock code')                    

args = parser.parse_args([])
args.cuda = args.use_cuda and torch.cuda.is_available()

def evaluation_metric(y_test,y_hat):
    global MSE
    MSE = mean_squared_error(y_test, y_hat)
    RMSE = MSE**0.5
    MAE = mean_absolute_error(y_test,y_hat)
    R2 = r2_score(y_test,y_hat)
    print('%.4f %.4f %.4f %.4f' % (MSE,RMSE,MAE,R2))
    

def set_seed(seed,cuda):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if cuda:
        torch.cuda.manual_seed(seed)

def dateinf(series, n_test):
    lt = len(series)
    #print('Training start',series[0])
    #print('Training end',series[lt-n_test-1])
    #print('Testing start',series[lt-n_test])
    #print('Testing end',series[lt-1])

set_seed(args.seed,args.cuda)

class Net(nn.Module):
    def __init__(self,in_dim,out_dim):
        super().__init__()
        self.config = MambaConfig(d_model=args.hidden, n_layers=args.layer)
        self.mamba = nn.Sequential(
            nn.Linear(in_dim,args.hidden),
            Mamba(self.config),
            nn.Linear(args.hidden,out_dim),
            nn.Tanh()
        )
    
    def forward(self,x):
        x = self.mamba(x)
        return x.flatten()

def PredictWithData(trainX, trainy, testX):
    clf = Net(len(trainX[0]),1)
    opt = torch.optim.Adam(clf.parameters(),lr=args.lr,weight_decay=args.wd)
    xt = torch.from_numpy(trainX).float().unsqueeze(0)
    xv = torch.from_numpy(testX).float().unsqueeze(0)
    yt = torch.from_numpy(trainy).float()
    if args.cuda:
        clf = clf.cuda()
        xt = xt.cuda()
        xv = xv.cuda()
        yt = yt.cuda()
    
    for e in range(args.epochs):
        clf.train()
        z = clf(xt)
        loss = F.mse_loss(z,yt)
        opt.zero_grad()
        loss.backward()
        opt.step()
        #if e%10 == 0 and e!=0:#print('Epoch %d | Lossp: %.4f' % (e, loss.item()))    
    clf.eval()
    mat = clf(xv)
    if args.cuda: mat = mat.cpu()
    yhat = mat.detach().numpy().flatten()
    return yhat
def scanLoop():
    global data1, finalpredicted_stock_price, data, rdf
    buylist = []
    selllist = []
    rdf = pd.DataFrame()
    rtic = []
    rscr = []
    predList= []
    openlist = {}
    account = r.load_account_profile()
    holdings = r.account.build_holdings()
    bp = account['margin_balances']['unallocated_margin_cash']
    amount = round(float(bp) / 10 ,2)  
    print('\n Amount per stock: '+str(amount))
    for symbol,stock in holdings.items(): 
        print(symbol, stock['quantity'])
        openlist[symbol] = stock['quantity']
    for i in tqdm(tickerList):
        data = pd.read_csv(i+'.csv')
        data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
        close = data.pop('close').values
        ratechg = data['pct_chg'].apply(lambda x:0.01*x).values
        data.drop(columns=['change','pct_chg'],inplace=True)
        dat = data.iloc[:,2:].values
        trainX, testX = dat[:-args.n_test, :], dat[-args.n_test:, :]
        trainy = ratechg[:-args.n_test]
        predictions = PredictWithData(trainX, trainy, testX)
        time = data['date'][-args.n_test:]
        data1 = close[-args.n_test:]
        finalpredicted_stock_price = []
        pred = close[-args.n_test-1]
        for l in range(args.n_test):
            pred = close[-args.n_test-1+l]*(1+predictions[l])
            finalpredicted_stock_price.append(pred)
        dateinf(data['date'],args.n_test)
        print('MSE RMSE MAE R2')
        evaluation_metric(data1, finalpredicted_stock_price)
        rtic.append(i)
        rscr.append(round(MSE/data1[-1],3)) #score = MSE / price because smaller prices = smaller mse
        predList.append(finalpredicted_stock_price[-1]-data1[-1]) #pred = model's predicted change
        print(i)
        if data1[-1] < data['linreg'].iloc[-1]:
            print('below') #prints below if price is below linreg of last 100 days
        if i not in openlist:#buy
            if data1[-1] > finalpredicted_stock_price[-1] and data1[-2] < finalpredicted_stock_price[-2] and data1[-1] < data['linreg'].iloc[-1]:
                print(f'buy signal on {i}')
                #order = r.orders.order_buy_fractional_by_price(i, amount, timeInForce='gfd', extendedHours=False, jsonify=True)
                continue
        if i in openlist:#sellsig
            if data1[-1] > data['linreg'].iloc[-1] and finalpredicted_stock_price[-1] > data1[-1] and data1[-2] > finalpredicted_stock_price[-2]:
                print(f'sell signal on {i}')
                #sell = r.orders.order_sell_fractional_by_quantity(i, openlist[i]) 

    
    rdf['ticker']=rtic
    rdf['score']=rscr
    rdf['pred']=predList
    print(rdf.head().sort_values(by='score', ascending=True))

In [28]:
#refreshData() --only works with ndl api
scanLoop()


 Amount per stock: 1502.23
MSFT 0.00721600
VOO 35.10973100


 33%|███▎      | 1/3 [00:08<00:17,  8.78s/it]

MSE RMSE MAE R2
7.1029 2.6651 2.0231 0.9854
AMZN


 67%|██████▋   | 2/3 [00:17<00:08,  8.57s/it]

MSE RMSE MAE R2
79.5136 8.9170 6.3255 0.9924
COST
below


100%|██████████| 3/3 [00:25<00:00,  8.59s/it]

MSE RMSE MAE R2
6.5341 2.5562 1.7447 0.9793
GOOGL
below
  ticker  score      pred
2  GOOGL  0.036  2.946664
0   AMZN  0.037 -0.750879
1   COST  0.093 -3.756961





In [30]:
#Backtest (uses last stock selected in tickerlist)
print(f'testing {rdf['ticker'].iloc[-1]}')
net = 500
openPrice = 0
openShort = 0
for i in range(-num,-1):
    if openPrice == 0:#buy
        if data1[i] > finalpredicted_stock_price[i] and data1[i-1] < finalpredicted_stock_price[i-1] and data1[i] < data['linreg'].iloc[i]:#price goes above pred
            openPrice = net / data1[i]
            print('open date = ', data['date'].iloc[i], 'Buyprice =', data1[i])
            continue
    if openPrice != 0:#sell
        if data1[i] > data['linreg'].iloc[i] and finalpredicted_stock_price[i] > data1[i] and data1[i-1] > finalpredicted_stock_price[i-1]:#pred goes below price
            net = data1[i]*openPrice
            print('\nclose date =', data['date'].iloc[i], 'close price = ', data1[i],'net = ',round(net,2))
            openPrice = 0


print(f'Strategy profit: {(data1[-1]/data1[-num])*100-100:.2f}%')
print(f' Strategy profit: {(net/500)*100-100 :.2f}%')

testing GOOGL
open date =  2023-08-28 00:00:00 Buyprice = 131.01

close date = 2023-09-01 00:00:00 close price =  135.66 net =  517.75
open date =  2023-09-25 00:00:00 Buyprice = 131.11

close date = 2023-10-10 00:00:00 close price =  138.06 net =  545.19
open date =  2023-10-23 00:00:00 Buyprice = 136.5

close date = 2023-11-24 00:00:00 close price =  136.69 net =  545.95
open date =  2023-12-05 00:00:00 Buyprice = 130.99

close date = 2023-12-26 00:00:00 close price =  141.52 net =  589.84
open date =  2024-02-01 00:00:00 Buyprice = 141.16

close date = 2024-02-12 00:00:00 close price =  147.53 net =  616.46
open date =  2024-02-20 00:00:00 Buyprice = 141.12

close date = 2024-03-19 00:00:00 close price =  147.03 net =  642.27
open date =  2024-06-20 00:00:00 Buyprice = 176.3

close date = 2024-06-26 00:00:00 close price =  183.88 net =  669.89
open date =  2024-07-01 00:00:00 Buyprice = 182.99

close date = 2024-07-08 00:00:00 close price =  189.03 net =  692.0
open date =  2024-07-