In [None]:
!cp /kaggle/input/nn-utils/* .

In [None]:
import threading
import queue
import time
start_time = time.time()
import os
from MultiTimeframeCandleManager import *
from datetime import datetime, timedelta
from collections import deque
import numpy as np
from tqdm import tqdm
import copy
import tensorflow as tf
import random
from save_and_load import *
from Candle import Candle
import matplotlib.pyplot as plt
#from tensorflow.keras import mixed_precision
#mixed_precision.set_global_policy('mixed_float16')


# In[6]:

if True:
    try:
        cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu="local")
        tf.config.experimental_connect_to_cluster(cluster_resolver)
        tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
        strategy = tf.distribute.TPUStrategy(cluster_resolver)
        print("use tpu strategy")
    except:
        strategy = tf.distribute.MirroredStrategy()
    strategy


In [None]:


train_data = []

data_dirs = ["/kaggle/input/nn-v1-dataprep-nq2", "/kaggle/input/nn-v1-dataprep-es2", "/kaggle/input/nn-v1-dataprep-ym2"]
data_files= []
for d in data_dirs:
    subfiles = os.listdir(d)
    for f in subfiles:
        if "_train_" in f:
            data_files.append(os.path.join(d,f))


data_index = 0
def load_new_data():
    global data_index
    data_index+=1
    if data_index >= len(data_files):
        data_index = 0

    file = data_files[data_index]
    data = obj_load(file)
    return data
    

data_queue = queue.Queue(maxsize=1)

def data_loader():
    """Background thread that continuously loads data into the queue."""
    while True:
        # Only load new data if there is room in the queue.
        if data_queue.qsize() < data_queue.maxsize:
            new_data = load_new_data()
            print("new data loaded!")
            data_queue.put(new_data)
        else:
            # Sleep briefly to avoid busy waiting.
            time.sleep(1)

# Start the data loader thread as a daemon so it exits when the main program ends.
if False:
    loader_thread = threading.Thread(target=data_loader, daemon=True)
    loader_thread.daemon = True
    loader_thread.start()

# or load all data at once if it fits in memory
if True:
    train_data=[]
    for d in data_dirs:
        subfiles = os.listdir(d)
        for f in subfiles:
            if "_train_" in f:
                    data = obj_load(os.path.join(d,f))
                    train_data.extend(data)

   # info about class distribution
    c0 = 0
    c1 = 0
    c2 = 0
    print("info about y distribution:")
    for x,y in tqdm(train_data):
            if y == -1:
                c0+=1
            if y == 1:
                c1+=1
            if y == 0:
                c2+=1
    print("raw distribution:", c0,c1,c2)
    
    l = len(train_data)
    print("relative distribution:", c0/l,c1/l,c2/l)

In [None]:
with strategy.scope():
#if True:
    
    lrelu = tf.keras.layers.LeakyReLU(0.05)
    
    
    chart_m15 = tf.keras.layers.Input(shape = (60,4))
    chart_m5 = tf.keras.layers.Input(shape = (60,4))
    chart_m1 = tf.keras.layers.Input(shape = (60,4))
    
    pdas = tf.keras.layers.Input(shape = (3*3+3*3+1+12*5+5*3,))
    
    pdas_repeated = tf.keras.layers.Lambda(
    lambda inputs: tf.repeat(tf.expand_dims(inputs, axis = 1), repeats=60, axis=1)
    )(pdas)
    
    concatenated_m5_at = tf.keras.layers.Concatenate(axis=-1)([chart_m5, pdas_repeated])
    m5_at = tf.keras.layers.Dense(512)(concatenated_m5_at)
    m5_at = lrelu(m5_at)
    m5_at = tf.keras.layers.Dense(256)(m5_at)
    m5_at = lrelu(m5_at)
    m5_at = tf.keras.layers.Dense(256)(m5_at)
    m5_at = lrelu(m5_at)
    m5_at = tf.keras.layers.LSTM(512)(m5_at)
    
    concatenated_m1_at = tf.keras.layers.Concatenate(axis=-1)([chart_m1, pdas_repeated])
    m1_at = tf.keras.layers.Dense(512)(concatenated_m1_at)
    m1_at = lrelu(m1_at)
    m1_at = tf.keras.layers.Dense(256)(m1_at)
    m1_at = lrelu(m1_at)
    m1_at = tf.keras.layers.Dense(256)(m1_at)
    m1_at = lrelu(m1_at)
    m1_at = tf.keras.layers.LSTM(512)(m1_at)
    
    minutes = tf.keras.layers.Input(shape = (1,))
    minutes_embed = tf.keras.layers.Embedding(input_dim=60*24, output_dim=8)(minutes)
    minutes_embed_flat = tf.keras.layers.Flatten()(minutes_embed)
    
    f15 = tf.keras.layers.Flatten()(chart_m15)
    f5 = tf.keras.layers.Flatten()(chart_m5)
    f1 = tf.keras.layers.Flatten()(chart_m1)
    
    #c = tf.keras.layers.Concatenate()([f15, f5, f1, pdas, minutes_embed_flat, current_position, scaled_open_profit])
    c = tf.keras.layers.Concatenate()([f15, f5, f1, pdas, minutes_embed_flat, m1_at, m5_at])
    
    
    d = tf.keras.layers.Dense(1024*2)(c)
    d = lrelu(d)
    d = tf.keras.layers.Dropout(0.05)(d)
    
    d = tf.keras.layers.Dense(1024*2)(d)
    d = lrelu(d)
    d = tf.keras.layers.Dropout(0.05)(d)
    
    d = tf.keras.layers.Dense(1024*2)(d)
    d = lrelu(d)
    d = tf.keras.layers.Dropout(0.05)(d)
    
    d = tf.keras.layers.Dense(1024*2)(d)
    d = lrelu(d)
    d = tf.keras.layers.Dropout(0.05)(d)

    
    
    
    output = tf.keras.layers.Dense(3, activation="softmax", dtype="float32")(d)
    
    model = tf.keras.Model(inputs = [chart_m15, chart_m5, chart_m1, pdas, minutes], outputs = output)
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.00001, clipnorm=1.0)
    
    
model.summary()
    
    
    # In[10]:
    


In [None]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=False, reduction=tf.keras.losses.Reduction.NONE)

 
class_counts = np.array([c0, c1, c2])
class_weights = class_counts.sum() / (len(class_counts) * class_counts)
class_weights_tensor = tf.constant(class_weights, dtype=tf.float32)
print("weights:", class_weights_tensor)


tf.function(jit_compile=True) # my gpu does not support this
def tstep(data):
    x, y = data

    with tf.GradientTape() as t:
        model_return = model(x, training=True)
        #print(y, model_return)
        loss = loss_fn(y, model_return)
        #print(loss)
        

        # Apply class weights
        sample_weights = tf.reduce_sum(y * class_weights_tensor, axis=-1)  # Select the correct weight for each sample
        loss = loss * sample_weights  # Multiply loss by sample weights

        loss = tf.reduce_mean(loss)

        
        if loss > 10:
            loss *= 0.1  # Scale down the loss if it's greater than 10, probably data error

    predicted_class = tf.argmax(model_return, axis=-1)
    true_class = tf.argmax(y, axis=-1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predicted_class, true_class), tf.float32))
            
    
    gradient = t.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradient, model.trainable_variables))

    return loss, accuracy


batch_size = 1024

def get_data(_):
    global train_data

    while len(train_data) < 10000:
        if not data_queue.empty():
            new_data = data_queue.get(timeout=1)  # wait up to 1 second for data
            train_data.extend(new_data)
        else:
            # If no new data is available, break out of the loop.
            print("waiting for data....")
            time.sleep(1)
            continue
    
    # Pop a batch of data from the front.
    train_sample = train_data[:batch_size]
    train_data = train_data[batch_size:]

    # Extract multiple input features
    states = [x[0] for x in train_sample]  # Extract input features (list of lists)
    states_array = [
        np.array([sample[i] for sample in states], dtype="float32") for i in range(len(states[0]))
    ]  # Convert each input feature to an array

    # Convert to TensorFlow tensors
    states_tensor = [tf.convert_to_tensor(arr) for arr in states_array]

    # Extract targets and convert to tensor
    targets = []
    for x in train_sample:
      v = x[1]
      if v == 1:
        targets.append([0,1,0])
      elif v == -1:
        targets.append([1,0,0])
      else:
        targets.append([0,0,1])
    
    targets_tensor = tf.convert_to_tensor(np.array(targets, dtype="float32"))

    return states_tensor, targets_tensor  # Return tuple (list of tensors, labels tensor)





train_index = 0
def get_data_2(_):
    global train_index
    train_sample = [train_data[i] for i in range(train_index, train_index + batch_size)]
    train_index += batch_size
    if train_index + batch_size >= len(train_data):
        train_index = 0

    # Extract multiple input features
    states = [x[0] for x in train_sample]  # Extract input features (list of lists)
    states_array = [
        np.array([sample[i] for sample in states], dtype="float32") for i in range(len(states[0]))
    ]  # Convert each input feature to an array

    # Convert to TensorFlow tensors
    states_tensor = [tf.convert_to_tensor(arr) for arr in states_array]

    # Extract targets and convert to tensor
    targets = []
    for x in train_sample:
      v = x[1]
      if v == 1:
        targets.append([0,1,0])
      elif v == -1:
        targets.append([1,0,0])
      else:
        targets.append([0,0,1])
    targets_tensor = tf.convert_to_tensor(np.array(targets, dtype="float32"))

    return states_tensor, targets_tensor  # Return tuple (list of tensors, labels tensor)


In [None]:
@tf.function(reduce_retracing=True)
def run(data):
    return strategy.reduce(tf.distribute.ReduceOp.MEAN, strategy.run(tstep, args = (data,)), axis = None)

import IPython

loss_lt = []
acc_lt = []

for _ in range(100):
    losses_st = []
    acc_st = []
    #for _ in tqdm(range(1000)):
    for _ in range(1000):
        #distributed_data = (strategy.experimental_distribute_values_from_function(get_data))
        distributed_data = (strategy.experimental_distribute_values_from_function(get_data_2))
        loss, acc = run(distributed_data)
        
        #data = get_data(None)
        #loss = tstep(data)
        losses_st.append(loss)
        acc_st.append(acc)
    
    loss_lt.append(np.mean(losses_st))
    acc_lt.append(np.mean(acc_st))

    
    IPython.display.clear_output()
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    # Plot Loss
    ax1.plot(loss_lt, label="Loss", color="red")
    ax1.set_title("Loss over Epochs")
    ax1.set_xlabel("Epochs")
    ax1.set_ylabel("Loss")
    ax1.legend()

    # Plot Accuracy
    ax2.plot(acc_lt, label="Accuracy", color="blue")
    ax2.set_title("Accuracy over Epochs")
    ax2.set_xlabel("Epochs")
    ax2.set_ylabel("Accuracy")
    ax2.legend()

    plt.show()
    print(loss_lt, acc_lt)


In [None]:
model.save("model.keras")
model.save_weights("model.weights.h5")

from ftplib import *


# FTP server details
ftp_server = 'benundmarvpromotions.lima-ftp.de'
ftp_user = 'benundmarvpromotions'
ftp_password = 'defrtrn2343fdsgfcdf'

model_save_name = "model.keras"
file_to_upload = model_save_name
remote_path = '/test/' + model_save_name

# Connect to the FTP server
ftp = FTP(ftp_server)
ftp.login(user=ftp_user, passwd=ftp_password)

ftp.cwd("/")

# Open the file in binary mode and upload it
with open(file_to_upload, 'rb') as file:
    ftp.storbinary(f'STOR {remote_path}', file, blocksize = 1024*1024)

# Close the connection
ftp.quit()

In [None]:
#model.load_weights("model.weights.h5")