In [46]:
import torch.nn as nn
import sqlalchemy
import pandas as pd
import torch as T
import pandas_ta
from torch.utils.data import Dataset
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize, StandardScaler


class SqliteDataSet(Dataset):
    def __init__(self,
                 query='',
                 label_col='',
                 transform=None):
        self.label_col = label_col
        self.transform=transform
        self.result_set = all_tickers_preprocess(query).dropna()
        self.result_set.reset_index(drop=True, inplace=True)
        self.length = self.result_set.shape[0]


    def __getitem__(self, index) -> T.Tuple[T.Tensor, int]:
        data = self.result_set.loc[index]
        rate = ( data[self.label_col] - data['Adj Close'] ) / data['Adj Close'] * 100
        # rate = data['rate']
        x_columns =['delta_p5', 'delta_p20', 'delta_p90', 'delta_v5', 'delta_v20', 'delta_v90', 'PV_DELTA_R5','PV_DELTA_R20','PV_DELTA_R90',
                    'PV_DELTA_R20_PRIOR1', 'PV_DELTA_R20_PRIOR2','PV_DELTA_R20_PRIOR3', 'PV_DELTA_R20_PRIOR4', 'PV_DELTA_R20_PRIOR5', 'MFI_14','BULLISH','BEARISH']
        x = T.tensor(data[x_columns].astype(float)).to(T.float32)
        if rate > 5:
            y = 5
        elif 2 < rate < 5:
            y = 4
        elif rate < -5:
            y = 0
        elif -5 <= rate < -2:
            y = 2
        elif -2 <= rate < 0:
            y =1
        else:
            y = 3

        if self.transform:
            x = self.transform(x)
        return x, y

    def __len__(self):
        return self.length







In [47]:
import pandas as pd
import sqlalchemy
import pandas_ta
from sklearn.preprocessing import normalize, StandardScaler


def all_tickers_preprocess(query) -> pd.DataFrame:
    sqlengine = sqlalchemy.create_engine('sqlite:///D:\\sqlite\\data\\finance.db')
    # query = r"""select date,Open ,High ,Low ,Close ,"Adj Close" ,Volume ,MACD_12_26_9 ,MACDh_12_26_9 ,MACDs_12_26_9 ,RSI_14 ,"BBL_5_2.0" ,"BBM_5_2.0" ,"BBU_5_2.0" ,"BBB_5_2.0" ,"BBP_5_2.0" ,OBV ,SMA_20 ,EMA_50 ,STOCHk_14_3_3 ,STOCHd_14_3_3 ,ADX_14 ,DMP_14 ,DMN_14 ,AD ,STDEV_30 ,VWAP_D ,PV_DERIVATIVE ,Y_CLOSE5_MIN,ticker from STOCK_DAILY WHERE ticker in ('TSLA','AAPL')"""
    result_set = pd.read_sql_query(query, sqlengine.connect())
    grouped = result_set.groupby(['ticker'])
    finalResult = pd.DataFrame()
    for name, group in grouped:
        # group.reset_index()
        # group.set_index('Date',inplace=True)
        group = ticker_preprocess(group)
        finalResult = pd.concat([group,finalResult])
    return finalResult

def ticker_preprocess(result_set) -> pd.DataFrame:
    # print('current ticker group {}'.format(resultSet))
    norm_columns = ['delta_p5', 'delta_p20', 'delta_p90', 'delta_v5', 'delta_v20', 'delta_v90', 'PV_DELTA_R5','PV_DELTA_R20','PV_DELTA_R90',
                    'PV_DELTA_R20_PRIOR1', 'PV_DELTA_R20_PRIOR2','PV_DELTA_R20_PRIOR3', 'PV_DELTA_R20_PRIOR4', 'PV_DELTA_R20_PRIOR5', 'MFI_14']
    result_set.ta.vwma(close='Adj Close', append=True, length=5)
    result_set.ta.vwma(close='Adj Close', append=True, length=20)
    result_set.ta.vwma(close='Adj Close', append=True, length=90)
    result_set.ta.mfi(close='Adj Close', append=True)
    result_set['Volume_5'] = result_set['Volume'].rolling(5).mean()
    result_set['Volume_20'] = result_set['Volume'].rolling(20).mean()
    result_set['Volume_90'] = result_set['Volume'].rolling(90).mean()

    result_set['delta_p5'] = result_set['Adj Close'] - result_set['VWMA_5']
    result_set['delta_p20'] = result_set['Adj Close'] - result_set['VWMA_20']
    result_set['delta_p90'] = result_set['Adj Close'] - result_set['VWMA_90']

    result_set['delta_v5'] = result_set['Volume'] - result_set['Volume_5']
    result_set['delta_v20'] = result_set['Volume'] - result_set['Volume_20']
    result_set['delta_v90'] = result_set['Volume'] - result_set['Volume_90']
    
    result_set['PV_DELTA_R5'] = result_set['delta_p5'] /result_set['delta_v5'] 
    result_set['PV_DELTA_R20'] = result_set['delta_p20'] /result_set['delta_v20'] 
    result_set['PV_DELTA_R90'] = result_set['delta_p90'] /result_set['delta_v90'] 

    result_set['PV_DELTA_R20_PRIOR1'] = result_set['PV_DELTA_R20'].shift(1)
    result_set['PV_DELTA_R20_PRIOR2'] = result_set['PV_DELTA_R20'].shift(2)
    result_set['PV_DELTA_R20_PRIOR3'] = result_set['PV_DELTA_R20'].shift(3)
    result_set['PV_DELTA_R20_PRIOR4'] = result_set['PV_DELTA_R20'].shift(4)
    result_set['PV_DELTA_R20_PRIOR5'] = result_set['PV_DELTA_R20'].shift(5)
    
    sd_scaler = StandardScaler()
    result_set[norm_columns] = sd_scaler.fit_transform(result_set[norm_columns])
    return result_set

query = r"""select DISTINCT S.date,Open ,High ,Low ,Close ,"Adj Close" ,Volume ,MACD_12_26_9 ,MACDh_12_26_9 ,MACDs_12_26_9 ,RSI_14 ,"BBL_5_2.0" ,"BBM_5_2.0" ,"BBU_5_2.0" ,"BBB_5_2.0" ,"BBP_5_2.0" ,OBV ,SMA_20 ,EMA_50 ,STOCHk_14_3_3 ,STOCHd_14_3_3 ,ADX_14 ,DMP_14 ,DMN_14 ,AD ,STDEV_30  ,Y_CLOSE5_MIN,ticker, BULLISH,BEARISH 
 from STOCK_DAILY S INNER JOIN AAII_SENTIMENT A ON (date(A.DATE) BETWEEN date(S.DATE, '-7 days') AND date(S.DATE)) AND S.ticker in ('TSLA','AAPL')"""
finalResult = all_tickers_preprocess(query)

x_columns =['delta_p5', 'delta_p20', 'delta_p90', 'delta_v5', 'delta_v20', 'delta_v90', 'PV_DELTA_R5','PV_DELTA_R20','PV_DELTA_R90', 'PV_DELTA_R20_PRIOR1', 'PV_DELTA_R20_PRIOR2','PV_DELTA_R20_PRIOR3', 'PV_DELTA_R20_PRIOR4', 'PV_DELTA_R20_PRIOR5', 'MFI_14','BULLISH','BEARISH']
finalResult = finalResult[x_columns].dropna().astype(float)
# print(type(finalResult[x_columns]))
# print(finalResult[x_columns].shape)
print(finalResult.dtypes)
# x = T.tensor(finalResult.astype(float)).to(T.float32)



# print(len(finalResult))
# finalResult.head()




delta_p5               float64
delta_p20              float64
delta_p90              float64
delta_v5               float64
delta_v20              float64
delta_v90              float64
PV_DELTA_R5            float64
PV_DELTA_R20           float64
PV_DELTA_R90           float64
PV_DELTA_R20_PRIOR1    float64
PV_DELTA_R20_PRIOR2    float64
PV_DELTA_R20_PRIOR3    float64
PV_DELTA_R20_PRIOR4    float64
PV_DELTA_R20_PRIOR5    float64
MFI_14                 float64
BULLISH                float64
BEARISH                float64
dtype: object


In [66]:
query = r"""select DISTINCT S.date,Open ,High ,Low ,Close ,"Adj Close" ,Volume ,MACD_12_26_9 ,MACDh_12_26_9 ,MACDs_12_26_9 ,RSI_14 ,"BBL_5_2.0" ,"BBM_5_2.0" ,"BBU_5_2.0" ,"BBB_5_2.0" ,"BBP_5_2.0" ,OBV ,SMA_20 ,EMA_50 ,STOCHk_14_3_3 ,STOCHd_14_3_3 ,ADX_14 ,DMP_14 ,DMN_14 ,AD ,STDEV_30  ,Y_CLOSE5_MAX,ticker, BULLISH,BEARISH 
from STOCK_DAILY S INNER JOIN AAII_SENTIMENT A ON (date(A.DATE) BETWEEN date(S.DATE, '-7 days') AND date(S.DATE)) AND S.ticker in ('AMGN', 'GOOG', 'MSFT', 'NVDA', 'AMZN', 'META', 'LLY','TXN')"""
all_dataset = SqliteDataSet(query=query,label_col='Y_CLOSE5_MAX')

print(all_dataset.__len__())

train_set, test_set = T.utils.data.random_split(all_dataset,[7164,300])
train_loader = T.utils.data.DataLoader(dataset=train_set,
                                           batch_size=5,
                                           shuffle=True)
verify_loader = T.utils.data.DataLoader(dataset=train_set,
                                           batch_size=1,
                                           shuffle=True)
# 
test_loader = T.utils.data.DataLoader(dataset=test_set,
                                          batch_size=1,
                                          shuffle=False)
examples = iter(test_loader)
example_data, example_targets = next(examples)
example_targets, example_data

7464


(tensor([4]),
 tensor([[ 0.4333, -0.2443, -0.0282, -0.9753, -0.1844,  0.1529, -0.0354,  0.0891,
           0.1174,  0.0849,  0.0250,  0.0667,  0.0547,  0.0535, -0.1757,  0.4026,
           0.2532]]))

In [67]:
import torch


##Fully connected neural network with one hidden layer
# class YhatCrossEntropyLoss(torch.CrossEntropyLoss):
#     def __init__(self)

input_size = 17
hidden_size = 200
num_classes = 6
num_epochs = 20
learning_rate = 0.01
device = T.device('cuda' if T.cuda.is_available() else 'cpu')

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.nn_block1 = nn.Sequential(
            nn.Linear(input_size, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, num_classes),
            # nn.Softmax(dim=1)
        )

    def forward(self, x):
        out = self.nn_block1(x)

        # no activation and no softmax at the end
        return out


model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
# weight = torch.tensor([10.,4.,1.2,1.1,0.9])
weight = torch.tensor([2, 1.5, 1., 1., 1.,1.2])
criterion = nn.CrossEntropyLoss(weight=weight)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    for i, (databatch, labels) in enumerate(train_loader):
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        databatch = databatch.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        # Forward pass and loss calculation
        outputs = model(databatch)
        loss = criterion(outputs, labels)
        # for p in model.parameters():
        #     p.register_hook(lambda grad: torch.clamp(grad, -1, 1 ))

        # Backward and optimize
        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, norm_type=2)

        optimizer.step()

        if (i + 1) % 100 == 0:
            ##print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
            print("out put sample: ", outputs)
            # print(model.parameters())

out put sample:  tensor([[-1.5624e+00,  7.6920e-02, -4.5092e-01,  8.0354e-02,  2.2356e-01,
         -1.1288e-02],
        [-1.4827e+00, -1.7939e-02, -4.0541e-01,  1.2068e-01,  2.1662e-01,
          1.4160e-02],
        [-5.7635e+00,  5.6881e-01, -1.6713e+00, -1.8529e-01,  3.6454e-01,
         -5.2040e-02],
        [-1.7740e+00, -3.4406e-02, -4.7264e-01,  1.1490e-01,  2.6459e-01,
          5.6131e-02],
        [-8.6045e-01,  3.1892e-03, -3.4748e-01,  1.0186e-01,  2.3190e-01,
          2.8907e-02]], grad_fn=<AddmmBackward0>)
out put sample:  tensor([[-4.1607, -0.1930, -1.2098,  0.1422,  0.2197,  0.0496],
        [-2.0126, -0.0152, -0.7162,  0.1629,  0.2243,  0.1501],
        [-3.3265, -0.0264, -0.9663,  0.1895,  0.2197,  0.3435],
        [-3.9174, -0.1689, -1.1678,  0.1294,  0.2277,  0.0052],
        [-2.0384,  0.0540, -1.0647,  0.3090,  0.2258,  0.1371]],
       grad_fn=<AddmmBackward0>)
out put sample:  tensor([[-2.9116, -0.2857, -1.5483,  0.3220,  0.5514, -0.7687],
        [-2.3632, -

In [65]:
from pathlib import Path

torch.save(model.state_dict(), "./tamed/M2_max5_distinct_20240112.pt")

In [68]:
# Test the model: we don't need to compute gradients
import torch as T

with T.no_grad():
    n_correct = 0
    n_samples = len(test_loader.dataset)
    print(n_samples)

    for d_data, labels in test_loader:
        labels = labels.to(device)
        outputs = model(d_data)

        # max returns (output_value ,index)
        _, predicted = T.max(outputs, 1)
        print(labels, predicted)
        n_correct += (predicted == labels).sum().item()

    acc = n_correct / n_samples
    print(f'Accuracy of the network on the {n_samples} test images: {100 * acc} %')

300
tensor([4]) tensor([4])
tensor([3]) tensor([3])
tensor([5]) tensor([4])
tensor([3]) tensor([4])
tensor([4]) tensor([3])
tensor([5]) tensor([4])
tensor([4]) tensor([4])
tensor([3]) tensor([3])
tensor([5]) tensor([3])
tensor([4]) tensor([3])
tensor([5]) tensor([4])
tensor([4]) tensor([4])
tensor([1]) tensor([3])
tensor([3]) tensor([3])
tensor([1]) tensor([3])
tensor([3]) tensor([4])
tensor([2]) tensor([4])
tensor([3]) tensor([4])
tensor([4]) tensor([4])
tensor([3]) tensor([4])
tensor([5]) tensor([4])
tensor([1]) tensor([4])
tensor([5]) tensor([4])
tensor([4]) tensor([4])
tensor([5]) tensor([4])
tensor([3]) tensor([4])
tensor([3]) tensor([4])
tensor([1]) tensor([4])
tensor([4]) tensor([4])
tensor([3]) tensor([4])
tensor([3]) tensor([3])
tensor([4]) tensor([4])
tensor([4]) tensor([3])
tensor([4]) tensor([4])
tensor([3]) tensor([4])
tensor([4]) tensor([4])
tensor([5]) tensor([4])
tensor([4]) tensor([4])
tensor([1]) tensor([4])
tensor([3]) tensor([3])
tensor([3]) tensor([3])
tensor([1]) 

In [17]:
print(model.parameters())

<generator object Module.parameters at 0x0000012D74DEA580>


In [16]:
l_model = NeuralNet(input_size, hidden_size, num_classes)
l_model.load_state_dict(torch.load('./tamed/M1_min5_46v_20240106.pt'))


<All keys matched successfully>

In [17]:
l_model.eval()
# Test the model: we don't need to compute gradients
import torch as T

with T.no_grad():
    n_correct = 0
    n_samples = len(test_loader.dataset)
    print(n_samples)

    for d_data, labels in test_loader:
        labels = labels.to(device)
        outputs = l_model(d_data)

        # max returns (output_value ,index)
        _, predicted = T.max(outputs, 1)
        print(labels, predicted)
        n_correct += (predicted == labels).sum().item()

    acc = n_correct / n_samples
    print(f'Accuracy of the network on the {n_samples} test images: {100 * acc} %')


49
tensor([1]) tensor([0])
tensor([2]) tensor([0])
tensor([2]) tensor([2])
tensor([0]) tensor([0])
tensor([3]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([0])
tensor([3]) tensor([0])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([0])
tensor([4]) tensor([0])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([0])
tensor([2]) tensor([2])
tensor([2]) tensor([0])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([3]) tensor([0])
tensor([1]) tensor([0])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([0]) tensor([0])
tensor([1]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([0])
tensor([3]) tensor([0])
tensor([0]) tensor([0])
tensor([3]) tensor([2])
tensor([3]) tensor([0])
tensor([1]) tensor([2])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([0])
tensor([0]) tensor([0])
tensor([2]) tensor([2])
tensor([1]) tensor([0])
tensor([2]) tensor([0])
tensor([1]) tensor([0])
tensor([3]) t