<a href="https://colab.research.google.com/github/antisaint669/scaling-octo-rffnnet/blob/main/WorkingCryptoPredictor9_1_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Example of the file.csv in the /home/file.csv

# Date,End,Open,High,Low,Close,Volume,MarketCap
# 2014-09-17,465.864014,468.174011,452.421997,457.334015,457.334015,21056800
# 2014-09-18,456.859985,456.859985,413.104004,424.440002,424.440002,34483200
# 2014-09-19,424.102997,427.834991,384.532013,394.795990,394.795990,37919700
# 2014-09-20,394.673004,423.295990,389.882996,408.903992,408.903992,36863600
# 2014-09-21,408.084991,412.425995,393.181000,398.821014,398.821014,26580100
# 2014-09-22,399.100006,406.915985,397.130005,402.152008,402.152008,24127600
# 2014-09-23,402.092010,441.557007,396.196991,435.790985,435.790985,45099500
# 2014-09-24,435.751007,436.112000,421.131989,423.204987,423.204987,30627700
# 2014-09-25,423.156006,423.519989,409.467987,411.574005,411.574005,26814400
# 2014-09-26,411.428986,414.937988,400.009003,404.424988,404.424988,21460800
# 2014-09-27,403.556000,406.622986,397.372009,399.519989,399.519989,15029300
# 2014-09-28,399.471008,401.016998,374.332001,377.181000,377.181000,23613300

!pip install numpy pandas tensorflow scikit-learn yfinance requests
import os
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from sklearn.preprocessing import MinMaxScaler
from datetime import timedelta
import hashlib

FILE_PATH = '/home/file.csv'
data = pd.read_csv(FILE_PATH)
data['Date'] = pd.to_datetime(data['Date'])

# Print available columns
print("Columns in the data:")
print(data.columns)

# Define the columns we want to predict
target_columns = ['High', 'Low', 'Close']

# Filter to only use columns that are actually in the data
available_columns = [col for col in target_columns if col in data.columns]

if not available_columns:
    raise ValueError(f"None of the target columns {target_columns} are in the data. Available columns are: {data.columns}")

print(f"Columns being used for prediction: {available_columns}")

models = {}

def get_model_hash():
    model_definition = """
    Sequential([
        Input(shape=(60, 1)),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=False),
        Dense(1)
    ])
    """
    return hashlib.md5(model_definition.encode()).hexdigest()

MODEL_HASH = get_model_hash()

def create_lstm_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=True),
        LSTM(units=100, return_sequences=False),
        Dense(1)
    ])
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

def prepare_data(data, column, scaler):
    scaled_data = scaler.fit_transform(data[[column]].values)
    X, y = [], []
    for i in range(60, len(scaled_data)):
        X.append(scaled_data[i-60:i])
        y.append(scaled_data[i])
    return np.array(X), np.array(y)

def predict_next_30_days(column):
    print(f"Processing {column}")
    scaler = MinMaxScaler(feature_range=(0, 1))
    X, y = prepare_data(data, column, scaler)

    model = create_lstm_model((60, 1))
    weights_file = f"{column}.weights.h5"
    hash_file = f"{column}.hash"

    if os.path.exists(weights_file) and os.path.exists(hash_file):
        with open(hash_file, 'r') as f:
            saved_hash = f.read().strip()
        if saved_hash == MODEL_HASH:
            model.load_weights(weights_file)
        else:
            print(f"Model changed for {column}. Retraining...")
            model.fit(X, y, epochs=50, batch_size=32, verbose=0)
            model.save_weights(weights_file)
            with open(hash_file, 'w') as f:
                f.write(MODEL_HASH)
    else:
        print(f"No saved weights for {column}. Training...")
        model.fit(X, y, epochs=50, batch_size=32, verbose=0)
        model.save_weights(weights_file)
        with open(hash_file, 'w') as f:
            f.write(MODEL_HASH)

    last_60_days = scaler.transform(data[[column]].tail(60).values)
    current_batch = last_60_days.reshape((1, 60, 1))

    predictions = []
    for _ in range(30):
        current_pred = model.predict(current_batch, verbose=0)
        predictions.append(current_pred[0, 0])
        current_batch = np.roll(current_batch, -1, axis=1)
        current_batch[0, -1, 0] = current_pred[0, 0]

    return scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()

def predict_next_30_days_adjusted():
    predictions = {column: predict_next_30_days(column) for column in available_columns}

    adjusted_predictions = []
    for i in range(30):
        values = [predictions[col][i] for col in available_columns]
        high = max(values)
        low = min(values)
        close = np.mean(values)  # or use 'Close' if available
        if 'Close' in predictions:
            close = np.clip(predictions['Close'][i], low, high)
        adjusted_predictions.append((high, low, close))

    return np.array(adjusted_predictions).T

print("\nStarting predictions:")
prediction_results = predict_next_30_days_adjusted()

next_30_days_high, next_30_days_low, next_30_days_close = prediction_results

print(f"Predictions for Close: {next_30_days_close}")
print(f"Predictions for High: {next_30_days_high}")
print(f"Predictions for Low: {next_30_days_low}")

next_30_days_dates = pd.date_range(start=data['Date'].max() + timedelta(days=1), periods=30)
next_30_days_df = pd.DataFrame({
    'Date': next_30_days_dates,
    'High': next_30_days_high,
    'Low': next_30_days_low,
    'Close': next_30_days_close,
})

# Add any missing columns as NaN
for col in ['Open', 'Volume']:
    if col not in next_30_days_df.columns:
        next_30_days_df[col] = np.nan

print(next_30_days_df)
next_30_days_df.to_csv('/home/predictions.csv', index=False)

# Save dates and individual predictions
pd.DataFrame({'Date': next_30_days_dates}).to_csv('/home/predictions_dates.csv', index=False)
pd.DataFrame({col: next_30_days_df[col] for col in ['Close', 'High', 'Low'] if col in next_30_days_df.columns}).to_csv('/home/predictions_prices.csv', index=False)

Columns in the data:
Index(['Date', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'MarketCap'], dtype='object')
Columns being used for prediction: ['High', 'Low', 'Close']

Starting predictions:
Processing High
No saved weights for High. Training...
