In [21]:
import os
from dotenv import load_dotenv
import asyncpg
import asyncio
import nest_asyncio

nest_asyncio.apply()
load_dotenv()

async def get_solana_data():
    conn = await asyncpg.connect(user=os.getenv('DB_USER'), password=os.getenv('DB_PASSWORD'), database=os.getenv('DB_NAME'), host=os.getenv('DB_HOST'))
    solana_data = await conn.fetch('SELECT * FROM solana')
    await conn.close()
    return solana_data

In [60]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

In [68]:
class CryptoPredictor:
    def __init__(self):
        self.model = LinearRegression()
        self.scaler = MinMaxScaler()

    def preprocess_data(self, data, duration):
        # Convert to DataFrame
        df = pd.DataFrame(data, columns=["timestamp", "open", "high", "low", "close", "volume", "number_of_trades"])
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        df.set_index("timestamp", inplace=True)

        # Resample data
        if duration in [1, 3]:
            df = df.resample('1h').mean()
        else:
            df = df.resample('1D').mean()

        # Drop missing values
        df = df.dropna()

        # Scale features
        features = df[["open", "high", "low", "close", "volume", "number_of_trades"]]
        self.scaler.fit(features)
        features = pd.DataFrame(self.scaler.transform(features), columns=features.columns, index=features.index)

        # Prepare target and timestamps
        df["target"] = df["close"].shift(-1)
        target = df["target"]
        features = features[:-1]
        target = target[:-1]
        timestamps = df.index[:-1]

        return features, target, timestamps

    def train(self, data, duration):
        features, target, _ = self.preprocess_data(data, duration)
        self.model.fit(features, target)

    def predict(self, data, duration):
        # Preprocess the data for training (using historical data)
        features, _, timestamps = self.preprocess_data(data, duration)

        # Determine forecast range based on duration
        if duration in [1, 3]:  # Hourly predictions for 1 or 3 days
            steps = 24 * duration
            freq = '1H'
        else:  # Daily predictions for durations > 3 days
            steps = duration
            freq = '1D'

        # Get the most recent data point to forecast from
        last_features = features.iloc[-1:].values  # Get the last row as the base input

        # Generate future timestamps
        future_timestamps = pd.date_range(
            start=timestamps[-1] + pd.Timedelta(freq),
            periods=steps,
            freq=freq
        )

        # Predict iteratively for the forecast period
        predictions = []
        for _ in range(steps):
            # Predict the next value
            next_prediction = self.model.predict(last_features)[0]
            predictions.append(next_prediction)

            # Update `last_features` for the next prediction
            next_features = last_features[0]  # Copy the latest features
            next_features[3] = next_prediction  # Update the 'close' column
            last_features = [next_features]  # Update for the next iteration

        # Denormalize predictions
        close_min = self.scaler.data_min_[3]  # Index 3 corresponds to 'close'
        close_max = self.scaler.data_max_[3]
        denorm_predictions = [p * (close_max - close_min) + close_min for p in predictions]

        # Structure the results
        results = [{"timestamp": ts.strftime("%Y-%m-%d %H:%M:%S"), "value": denorm_predictions[i]}
                for i, ts in enumerate(future_timestamps)]
        return results

In [69]:
predictor = CryptoPredictor()

data =  asyncio.run(get_solana_data())

# Remove the ID column
data = [(x[1], x[2], x[3], x[4], x[5], x[6], x[7]) for x in data]

# Train the model
predictor.train(data, duration=1)  # Train with hourly granularity

# Predict for the next 24 hours (based on duration = 1)
predictions = predictor.predict(data, duration=1)
print("Predictions:", len(predictions), predictions)

# Predict for the next 72 hours (based on duration = 3)
predictions = predictor.predict(data, duration=3)
print("Predictions:", len(predictions), predictions)

# Predict for the next 7 days (based on duration = 7)
predictions = predictor.predict(data, duration=7)
print("Predictions:", len(predictions), predictions)

Predictions: 1598 [{'timestamp': '2020-08-11 00:00:00', 'value': 900.2591241386518}, {'timestamp': '2020-08-12 00:00:00', 'value': 1017.3279635884857}, {'timestamp': '2020-08-13 00:00:00', 'value': 1013.1139316344645}, {'timestamp': '2020-08-14 00:00:00', 'value': 927.2380897015868}, {'timestamp': '2020-08-15 00:00:00', 'value': 869.9255217912528}, {'timestamp': '2020-08-16 00:00:00', 'value': 916.5107570126746}, {'timestamp': '2020-08-17 00:00:00', 'value': 845.8968668495655}, {'timestamp': '2020-08-18 00:00:00', 'value': 918.0088170000766}, {'timestamp': '2020-08-19 00:00:00', 'value': 797.7566106134362}, {'timestamp': '2020-08-20 00:00:00', 'value': 875.2184403405653}, {'timestamp': '2020-08-21 00:00:00', 'value': 802.8874311199139}, {'timestamp': '2020-08-22 00:00:00', 'value': 822.852486622539}, {'timestamp': '2020-08-23 00:00:00', 'value': 883.8448659702359}, {'timestamp': '2020-08-24 00:00:00', 'value': 909.7173760645388}, {'timestamp': '2020-08-25 00:00:00', 'value': 881.190905