<a href="https://colab.research.google.com/github/jeshwanth-A/defi_aiml/blob/main/Defi_Aiml.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os, warnings
os.environ['JUPYTER_WIDGETS_ENABLED'] = 'false'
warnings.filterwarnings('ignore')

from IPython import get_ipython
if get_ipython():
    get_ipython().run_line_magic('config', "InlineBackend.figure_formats = ['png']")
    get_ipython().run_line_magic('matplotlib', 'inline')

!pip install -q numpy pandas torch torchvision torchaudio
!pip install -q tensorflow
!pip install -q scikit-learn matplotlib
!pip install -q transformers peft datasets accelerate
!pip install -q huggingface-hub

import urllib.request
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import Dataset
from google.colab import drive
drive.mount('/content/drive')

save_folder = '/content/drive/MyDrive/crypto_project'
os.makedirs(save_folder, exist_ok=True)

pt_model_path = os.path.join(save_folder, 'model_pt.pth')
tf_model_path = os.path.join(save_folder, 'model_tf.h5')
llm_path = os.path.join(save_folder, 'finetuned_llm')
data_path = os.path.join(save_folder, 'processed_data.csv')

def fetch_and_parse(token_id='uniswap', days=30):
    url = f"https://api.coingecko.com/api/v3/coins/{token_id}/market_chart?vs_currency=usd&days={days}&interval=daily"
    try:
        with urllib.request.urlopen(url) as response:
            data = response.read().decode("utf-8")
            json_data = json.loads(data)

        required_keys = ['prices', 'total_volumes']
        for key in required_keys:
            if key not in json_data:
                return None

        prices = json_data.get('prices', [])
        volumes = json_data.get('total_volumes', [])

        if len(prices) == 0 or len(volumes) == 0:
            return None

        df = pd.DataFrame(prices, columns=['timestamp', 'price'])
        df['volume'] = [v[1] for v in volumes]
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

        return df

    except Exception as e:
        return None

def add_sentiment(df):
    np.random.seed(42)
    df['sentiment'] = np.random.uniform(-1, 1, size=len(df))
    return df

def create_sequences(data, seq_len=10):
    seq_len = min(seq_len, len(data) - 1)
    X, y = [], []
    for i in range(len(data) - seq_len):
        X.append(data.iloc[i:i+seq_len].values)
        y.append(data.iloc[i+seq_len]['price'])
    return np.array(X), np.array(y)

class PricePredictor(nn.Module):
    def __init__(self, input_size=5, hidden_size=50, num_layers=1):
        super().__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

if os.path.exists(data_path):
    df = pd.read_csv(data_path)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
else:
    df = fetch_and_parse(token_id='uniswap', days=30)

    if df is not None:
        df = df.dropna()

        df = add_sentiment(df)

        prices_np = df['price'].values
        window_size = min(5, len(prices_np))

        if len(prices_np) >= window_size:
            volatility = np.std(
                np.lib.stride_tricks.sliding_window_view(prices_np, window_size),
                axis=1
            )
            df['volatility'] = np.pad(volatility, (window_size-1, 0), mode='edge')
        else:
            df['volatility'] = 0

        scaler = MinMaxScaler()
        df[['price', 'volume', 'sentiment', 'volatility']] = scaler.fit_transform(
            df[['price', 'volume', 'sentiment', 'volatility']]
        )

        df['price_lag1'] = df['price'].shift(1)
        df = df.dropna()

        df.to_csv(data_path, index=False)
    else:
        df = None

if df is not None and len(df) > 0:
    features = df[['price', 'volume', 'sentiment', 'volatility', 'price_lag1']]
    seq_length = min(10, len(features) // 2)
    X, y = create_sequences(features, seq_len=seq_length)

    if len(X) > 0:
        split = max(1, int(0.8 * len(X)))
        X_train, X_test = X[:split], X[split:]
        y_train, y_test = y[:split], y[split:]

        if len(X_train) > 0:
            rf = RandomForestRegressor(n_estimators=50, random_state=42, max_depth=10)
            rf.fit(X_train.reshape(X_train.shape[0], -1), y_train)
            rf_preds = rf.predict(X_test.reshape(X_test.shape[0], -1))
            rf_mae = mean_absolute_error(y_test, rf_preds)

        model_pt = PricePredictor(input_size=X_train.shape[2])

        if os.path.exists(pt_model_path):
            model_pt.load_state_dict(torch.load(pt_model_path))
        else:
            optimizer = torch.optim.Adam(model_pt.parameters(), lr=0.001)
            criterion = nn.MSELoss()
            X_train_pt = torch.tensor(X_train, dtype=torch.float32)
            y_train_pt = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

            for epoch in range(50):
                model_pt.train()
                outputs = model_pt(X_train_pt)
                loss = criterion(outputs, y_train_pt)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            torch.save(model_pt.state_dict(), pt_model_path)

        model_pt.eval()
        with torch.no_grad():
            X_test_pt = torch.tensor(X_test, dtype=torch.float32)
            preds_pt = model_pt(X_test_pt).numpy()

        pt_mae = mean_absolute_error(y_test, preds_pt.flatten())

        if os.path.exists(tf_model_path):
            try:
                model_tf = tf.keras.models.load_model(
                    tf_model_path,
                    custom_objects={'mse': tf.keras.losses.MeanSquaredError()}
                )
            except Exception as e:
                os.remove(tf_model_path)
                model_tf = None
        else:
            model_tf = None

        if model_tf is None:
            model_tf = Sequential([
                LSTM(50, input_shape=(X_train.shape[1], X_train.shape[2])),
                Dense(1)
            ])
            model_tf.compile(
                optimizer='adam',
                loss=tf.keras.losses.MeanSquaredError()
            )

            history = model_tf.fit(
                X_train, y_train,
                epochs=50,
                batch_size=max(1, len(X_train)//10),
                verbose=0
            )

            model_tf.save(tf_model_path)

        preds_tf = model_tf.predict(X_test, verbose=0)
        tf_mae = mean_absolute_error(y_test, preds_tf.flatten())

        explanation_data = {
            'text': [
                "The prediction was close to actual value, showing good model performance.",
                "Large prediction error indicates model needs improvement.",
                "Price increased but prediction was lower, missing upward trend.",
                "Sentiment was positive and price rose as expected."
            ],
            'label': [1, 0, 0, 1]
        }
        dataset = Dataset.from_dict(explanation_data)

        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

        def preprocess(examples):
            return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

        dataset = dataset.map(preprocess, batched=True)
        dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

        if os.path.exists(os.path.join(llm_path, 'adapter_config.json')):
            base_model = AutoModelForSequenceClassification.from_pretrained(
                "distilbert-base-uncased", num_labels=2
            )
            model_llm = PeftModel.from_pretrained(base_model, llm_path)
        else:
            lora_config = LoraConfig(
                r=8,
                lora_alpha=32,
                target_modules=["q_lin", "v_lin"],
                lora_dropout=0.1,
                bias="none",
                task_type="SEQ_CLS"
            )
            base_model = AutoModelForSequenceClassification.from_pretrained(
                "distilbert-base-uncased", num_labels=2
            )
            model_llm = get_peft_model(base_model, lora_config)

            training_args = TrainingArguments(
                output_dir="./results",
                num_train_epochs=3,
                per_device_train_batch_size=2,
                save_strategy="epoch",
                logging_steps=10,
                report_to="none"
            )
            trainer = Trainer(
                model=model_llm,
                args=training_args,
                train_dataset=dataset
            )
            trainer.train()
            model_llm.save_pretrained(llm_path)

        fig = plt.figure(figsize=(15, 10))

        plt.subplot(2, 2, 1)
        test_indices = range(len(y_test))
        plt.plot(test_indices, y_test, 'g-', label='Actual', linewidth=2.5, marker='o', markersize=4)
        plt.plot(test_indices, preds_pt.flatten(), 'b--', label='PyTorch LSTM', alpha=0.8, linewidth=2)
        plt.plot(test_indices, preds_tf.flatten(), 'r--', label='TensorFlow LSTM', alpha=0.8, linewidth=2)
        plt.xlabel('Test Sample Index', fontsize=11)
        plt.ylabel('Normalized Price', fontsize=11)
        plt.title('Model Predictions vs Actual Price', fontsize=13, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        plt.subplot(2, 2, 2)
        pt_errors = np.abs(y_test - preds_pt.flatten())
        tf_errors = np.abs(y_test - preds_tf.flatten())
        rf_errors = np.abs(y_test - rf_preds)
        plt.plot(test_indices, pt_errors, 'b-', label='PyTorch Error', alpha=0.7, linewidth=2)
        plt.plot(test_indices, tf_errors, 'r-', label='TensorFlow Error', alpha=0.7, linewidth=2)
        plt.plot(test_indices, rf_errors, 'g-', label='Random Forest Error', alpha=0.5, linewidth=1.5)
        plt.xlabel('Test Sample Index', fontsize=11)
        plt.ylabel('Absolute Error', fontsize=11)
        plt.title('Prediction Errors Comparison', fontsize=13, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        plt.subplot(2, 2, 3)
        recent_data = df.tail(len(y_test))
        plt.plot(recent_data['sentiment'].values, label='Sentiment', alpha=0.8, linewidth=2, color='purple')
        plt.plot(recent_data['volatility'].values, label='Volatility', alpha=0.8, linewidth=2, color='orange')
        plt.xlabel('Time Index', fontsize=11)
        plt.ylabel('Normalized Value', fontsize=11)
        plt.title('Sentiment & Volatility Trends', fontsize=13, fontweight='bold')
        plt.legend(fontsize=10)
        plt.grid(True, alpha=0.3)

        plt.subplot(2, 2, 4)
        models = ['Random\nForest', 'PyTorch\nLSTM', 'TensorFlow\nLSTM']
        maes = [rf_mae, pt_mae, tf_mae]
        colors = ['#2ecc71', '#3498db', '#e74c3c']
        bars = plt.bar(models, maes, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)

        for bar, mae in zip(bars, maes):
            height = bar.get_height()
            plt.text(bar.get_x() + bar.get_width()/2., height,
                    f'{mae:.4f}',
                    ha='center', va='bottom', fontsize=10, fontweight='bold')

        plt.ylabel('Mean Absolute Error', fontsize=11)
        plt.title('Model Performance Comparison', fontsize=13, fontweight='bold')
        plt.grid(True, alpha=0.3, axis='y')

        plt.tight_layout()
        viz_path = os.path.join(save_folder, 'forecast_viz.png')
        plt.savefig(viz_path, dpi=150, bbox_inches='tight')
        plt.show()
        plt.close('all')