In [None]:
import tensorflow as tf
import tensorflow.keras
import tensorflow.keras.datasets
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import tensorflow.keras.optimizers
from tensorflow.keras.callbacks import EarlyStopping
from matplotlib import pyplot as plt
import numpy as np

In [None]:
# Data reading
import pandas as pd
import ast
embeddings = pd.read_csv("better_cleaned_embeddings.csv")
# embeddings['Embedding'] = embeddings['Embedding'].str().replace('[', '').replace(']', '').split(',').astype(float64)
embeddings['Embedding'] = embeddings['Embedding'].apply(ast.literal_eval)

# Convert to numpy array
embedding_matrix = np.vstack(embeddings['Embedding'].values)


In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle

n_samples = len(embedding_matrix)

train_split = int(0.8 * n_samples)
val_split = int(0.9 * n_samples)

X = np.array(embedding_matrix).astype('float32')
y = np.array(embeddings['TLT'].to_numpy()).astype('float32').reshape(-1, 1) # Ensure y is (n_samples, 1)


X_shuffled, y_shuffled = shuffle(X, y)
X_train, X_val, X_test = X_shuffled[:train_split], X_shuffled[train_split:val_split], X_shuffled[val_split:]
y_train, y_val, y_test = y_shuffled[:train_split], y_shuffled[train_split:val_split], y_shuffled[val_split:]
# X_train, X_val, X_test = X[:train_split], X[train_split:val_split], X[val_split:]
# y_train, y_val, y_test = y[:train_split], y[train_split:val_split], y[val_split:]
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
scaler2 = StandardScaler()
y_train_scaled = scaler2.fit_transform(y_train.reshape(-1, 1))
y_val_scaled = scaler2.transform(y_val.reshape(-1, 1))

In [None]:
# results = [-2.5]

In [None]:
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(768,)))
model.add(Dense(128, activation='relu'))
# model.add(Dropout(0.3)),
model.add(Dense(256, activation='relu')),
# model.add(Dropout(0.3)),
model.add(Dense(256, activation='relu'))

model.add(Dense(1, activation='linear'))


model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001), metrics=['mae'])
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)
model.fit(X_train_scaled, y_train_scaled, batch_size=64,
          epochs=150, verbose=1, validation_data=(X_val_scaled, y_val_scaled), callbacks=[early_stopping])

model.evaluate(X_val_scaled, y_val_scaled)

results = pd.DataFrame(X_test_scaled)

Epoch 1/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - loss: 1.2600 - mae: 0.8097 - val_loss: 1.7526 - val_mae: 1.0602
Epoch 2/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0719 - mae: 0.7555 - val_loss: 1.7134 - val_mae: 1.0564
Epoch 3/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0671 - mae: 0.7404 - val_loss: 1.7050 - val_mae: 1.0559
Epoch 4/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.1029 - mae: 0.7611 - val_loss: 1.7004 - val_mae: 1.0554
Epoch 5/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0236 - mae: 0.7495 - val_loss: 1.7000 - val_mae: 1.0548
Epoch 6/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.0679 - mae: 0.7392 - val_loss: 1.6987 - val_mae: 1.0547
Epoch 7/150
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 

In [None]:
results = pd.DataFrame(y_val)
preds = model.predict(X_val_scaled)
preds_rescaled = scaler2.inverse_transform(preds)

results['predictions'] = preds_rescaled
results


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


Unnamed: 0,0,predictions
0,1.067135,0.036067
1,0.006683,0.022320
2,-0.554627,-0.151344
3,0.470367,-0.235644
4,0.769128,0.153382
...,...,...
257,1.574468,0.100523
258,-0.033515,-0.072311
259,-2.380354,-0.148335
260,1.614150,0.079369


In [None]:
print(results['predictions'].min())
print(results['predictions'].max())
results['direction'] = (results['predictions'] >= 0)
results['true_direction'] = (results[0] >= 0)
print(len(results[results['direction'] == results['true_direction']]) / len(results))

-0.3981746435165405
0.35747504234313965
0.48854961832061067


In [None]:
money = 10000
for index, row in results.iterrows():
  prediction = row['predictions']
  # bet = row['predictions'] * money / 10
  bet = prediction * money * 2
    # Calculate the profit/loss based on actual return (e.g., 10% => 0.10)
  result = bet * (row[0] / 100.0)

  money += result  # Add just the profit/loss
  print(f"prediction: {prediction:.2f}, real: {row[0]:.2f}%, result: {result:.3f}, money: {money:.3f}")

prediction: 0.14, real: 0.08%, result: 2.258, money: 10002.258
prediction: 0.03, real: 1.47%, result: 10.189, money: 10012.447
prediction: 0.06, real: -0.05%, result: -0.690, money: 10011.756
prediction: 0.19, real: -0.11%, result: -4.062, money: 10007.695
prediction: -0.00, real: -0.29%, result: 0.109, money: 10007.804
prediction: 0.14, real: 6.72%, result: 190.216, money: 10198.019
prediction: 0.12, real: 0.04%, result: 1.132, money: 10199.151
prediction: 0.43, real: 0.13%, result: 11.826, money: 10210.977
prediction: -0.02, real: 0.13%, result: -0.477, money: 10210.500
prediction: 0.14, real: 0.45%, result: 12.655, money: 10223.155
prediction: 0.26, real: 0.60%, result: 32.349, money: 10255.504
prediction: 0.16, real: 1.80%, result: 59.834, money: 10315.339
prediction: -0.12, real: -0.34%, result: 8.323, money: 10323.662
prediction: 0.10, real: -0.81%, result: -17.167, money: 10306.495
prediction: 0.44, real: 0.51%, result: 46.056, money: 10352.551
prediction: 0.29, real: 0.53%, res

In [1]:
# Classifier version


# Convert to binary classification: 1 = positive change, 0 = non-positive
y = (embeddings['TLT'].values > 0).astype('int32')  # Binary target

# Shuffle and split
X, y = shuffle(X, y, random_state=42)
n_samples = len(X)
train_split = int(0.75 * n_samples)
val_split = int(0.85 * n_samples)

X_train, X_val, X_test = X[:train_split], X[train_split:val_split], X[val_split:]
y_train, y_val, y_test = y[:train_split], y[train_split:val_split], y[val_split:]

# Scale inputs (don't scale outputs for classification)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Build model
model = Sequential([
    Dense(64, activation='relu', input_shape=(768,)),
    Dense(128, activation='relu'),
    Dense(256, activation='relu'),
    Dense(256, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])

# Compile model
model.compile(
    loss='binary_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.00001),
    metrics=['accuracy']
)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True, verbose=1)

# Train
model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=150,
    batch_size=64,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate
loss, accuracy = model.evaluate(X_val_scaled, y_val, verbose=0)
print(f"Test Accuracy: {accuracy:.4f}")


NameError: name 'embeddings' is not defined

In [None]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential()

    # Input layer
    model.add(Dense(
        hp.Int('units_input', min_value=64, max_value=256, step=64),
        activation=hp.Choice('activation_input', ['relu', 'tanh']),
        input_shape=(768,)
    ))

    # Hidden layers
    for i in range(hp.Int('num_layers', 1, 3)):  # 1 to 3 layers
        model.add(Dense(
            units=hp.Int(f'units_{i}', min_value=64, max_value=512, step=64),
            activation=hp.Choice(f'activation_{i}', ['relu', 'tanh'])
        ))
        if hp.Boolean(f'dropout_{i}'):
            model.add(Dropout(rate=hp.Float(f'dropout_rate_{i}', min_value=0.1, max_value=0.5, step=0.1)))

    # Output layer
    model.add(Dense(1, activation='sigmoid'))

    # Compile model
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Float('lr', 1e-5, 1e-3, sampling='log')),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model


In [None]:
tuner = kt.Hyperband(
    build_model,
    objective='val_accuracy',
    max_epochs=30,
    factor=3,
    directory='hyperband_dir',
    project_name='tlt_direction_classifier'
)

early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

tuner.search(
    X_train_scaled, y_train,
    epochs=50,
    validation_data=(X_val_scaled, y_val),
    callbacks=[early_stopping],
    batch_size=64
)


Trial 90 Complete [00h 00m 09s]
val_accuracy: 0.5152671933174133

Best val_accuracy So Far: 0.572519063949585
Total elapsed time: 00h 09m 24s


In [None]:
# Get the top 15 models
best_models = tuner.get_best_models(num_models=15)

# Evaluate each model
for idx, model in enumerate(best_models):
    test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=1)
    print(f"Model {idx+1}: Test Accuracy = {test_accuracy:.4f}, Loss = {test_loss:.4f}")


Exception ignored in: <function _xla_gc_callback at 0x7954bf5b6f20>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 
Exception ignored in: <function _xla_gc_callback at 0x7954bf5b6f20>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/jax/_src/lib/__init__.py", line 96, in _xla_gc_callback
    def _xla_gc_callback(*args):
    
KeyboardInterrupt: 


Model 1: Test Accuracy = 0.4924, Loss = 0.7615
Model 2: Test Accuracy = 0.4847, Loss = 0.8964
Model 3: Test Accuracy = 0.4885, Loss = 1.3622
Model 4: Test Accuracy = 0.5000, Loss = 0.7453
Model 5: Test Accuracy = 0.4580, Loss = 0.7455
Model 6: Test Accuracy = 0.5458, Loss = 0.7717
Model 7: Test Accuracy = 0.4618, Loss = 2.1464
Model 8: Test Accuracy = 0.5191, Loss = 0.7235
Model 9: Test Accuracy = 0.4809, Loss = 0.7577
Model 10: Test Accuracy = 0.4809, Loss = 0.7279
Model 11: Test Accuracy = 0.5115, Loss = 0.7064
Model 12: Test Accuracy = 0.4580, Loss = 0.7060
Model 13: Test Accuracy = 0.5038, Loss = 0.7033
Model 14: Test Accuracy = 0.5191, Loss = 0.7001
Model 15: Test Accuracy = 0.4733, Loss = 0.6981
