In [1]:
!pip install -q google-cloud==0.34.0 google-cloud-bigquery matplotlib pandas_ta scikit-learn emp-orderly-types emp-orderly setuptools ccxt pandas-gbq


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
!pip install -q numpy==1.26.4


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
#Importing libraries
import time
import warnings
import os
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import pandas_ta as ta
import joblib
import ccxt
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from google.cloud import bigquery
from google.cloud import storage
from google.oauth2 import service_account

# Suppress any warnings for cleaner output
warnings.filterwarnings('ignore')

In [4]:
# Load environment variables from .env file
load_dotenv()

# Retrieve GCP project ID and credentials path from environment variables
project_id = os.getenv("GCP_PROJECT_ID")
credentials_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")

# Ensure that both environment variables are available
if not project_id or not credentials_path:
    raise ValueError("GCP_PROJECT_ID and GOOGLE_APPLICATION_CREDENTIALS must be set in the .env file.")

# Initialize Google Cloud credentials and clients
credentials = service_account.Credentials.from_service_account_file(credentials_path)
bigquery_client = bigquery.Client(credentials=credentials, project=project_id)
storage_client = storage.Client(credentials=credentials, project=project_id)

print(f"Successfully initialized GCP clients for project: {project_id}")

Successfully initialized GCP clients for project: streamlit-apps-431010


In [5]:
def fetch_ohlcv_data(exchange_name, symbol, timeframe, since):
    """
    Fetch OHLCV data from a specified exchange using the ccxt library.

    Parameters:
    - exchange_name (str): The name of the exchange (e.g., 'binance').
    - symbol (str): The trading pair symbol (e.g., 'BTC/USDT').
    - timeframe (str): The timeframe for the OHLCV data (e.g., '1h', '1d').
    - since (int): Timestamp (in milliseconds) from which to start fetching data.

    Returns:
    - pd.DataFrame: DataFrame containing OHLCV data.
    """
    try:
        exchange = getattr(ccxt, exchange_name)()
        exchange.load_markets()

        all_data = []
        while since < time.time() * 1000:
            ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=1000)
            if len(ohlcv) == 0:
                break
            all_data.extend(ohlcv)
            since = ohlcv[-1][0] + 1
            time.sleep(exchange.rateLimit / 1000)

        df = pd.DataFrame(all_data, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        return df
    except Exception as e:
        print(f"Error fetching data: {e}")
        return None

In [6]:
def save_to_bigquery(dataframe, table_id):
    """
    Save a pandas DataFrame to a Google BigQuery table.
    
    Parameters:
    - dataframe (pd.DataFrame): The DataFrame to be saved.
    - table_id (str): The BigQuery table identifier in the format 'dataset.table'.
    """
    try:
        dataframe.to_gbq(destination_table=table_id, project_id=project_id, if_exists='replace', credentials=credentials)
        print(f"Data successfully saved to BigQuery table: {table_id}")
    except Exception as e:
        print(f"Error saving data to BigQuery: {e}")

In [7]:
def save_model_to_gcs(model, scaler, bucket_name, model_filename, scaler_filename):
    """
    Save a machine learning model and its corresponding scaler to a Google Cloud Storage bucket.
    
    Parameters:
    - model: The trained machine learning model.
    - scaler: The scaler object used for data preprocessing.
    - bucket_name (str): GCS bucket name.
    - model_filename (str): Filename for the model.
    - scaler_filename (str): Filename for the scaler.
    """
    try:
        bucket = storage_client.bucket(bucket_name)
        if not bucket.exists():
            bucket = storage_client.create_bucket(bucket_name)
            print(f"Bucket '{bucket_name}' created.")

        joblib.dump(model, model_filename)
        joblib.dump(scaler, scaler_filename)

        model_blob = bucket.blob(model_filename)
        scaler_blob = bucket.blob(scaler_filename)

        model_blob.upload_from_filename(model_filename)
        scaler_blob.upload_from_filename(scaler_filename)

        print(f"Model and scaler saved to GCS bucket: {bucket_name}")
    except Exception as e:
        print(f"Error saving model to GCS: {e}")

In [8]:
def add_technical_indicators(df):
    """
    Add common technical indicators to a given price DataFrame.
    
    Parameters:
    - df (pd.DataFrame): DataFrame containing OHLCV data.
    
    Returns:
    - pd.DataFrame: DataFrame with added technical indicators.
    """
    df['SMA_10'] = ta.sma(df['close'], length=10)
    df['SMA_20'] = ta.sma(df['close'], length=20)
    df['RSI'] = ta.rsi(df['close'], length=14)

    macd = ta.macd(df['close'], fast=12, slow=26, signal=9)
    df['MACD'] = macd['MACD_12_26_9']
    df['MACD_signal'] = macd['MACDs_12_26_9']

    bbands = ta.bbands(df['close'], length=20)
    df['BB_upper'] = bbands['BBU_20_2.0']
    df['BB_middle'] = bbands['BBM_20_2.0']
    df['BB_lower'] = bbands['BBL_20_2.0']

    df['ATR'] = ta.atr(df['high'], df['low'], df['close'], length=14)
    df['MOM'] = ta.mom(df['close'], length=10)
    df['ROC'] = ta.roc(df['close'], length=10)

    df.dropna(inplace=True)
    return df

In [9]:
def prepare_training_data(df):
    """
    Prepare feature matrix and target vector for training a machine learning model.
    
    Parameters:
    - df (pd.DataFrame): DataFrame with technical indicators and price data.
    
    Returns:
    - Scaled training and testing data along with the target vectors.
    """
    df['target'] = np.where(df['close'].shift(-1) > df['close'] * 1.0035, 1, 0)
    
    features = ['SMA_10', 'SMA_20', 'RSI', 'MACD', 'MACD_signal', 'BB_upper', 'BB_middle', 'BB_lower', 'ATR', 'MOM', 'ROC']
    X = df[features]
    y = df['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test, scaler

In [10]:
if __name__ == "__main__":
    exchange_name = "kucoin"
    symbol = "ETH/USDT"
    timeframe = "5m"
    since = int(time.mktime(time.strptime('2019-01-01', '%Y-%m-%d'))) * 1000

    data = fetch_ohlcv_data(exchange_name, symbol, timeframe, since)

    if data is not None:
        save_to_bigquery(data, "crypto_dataset.raw_prices")
        data = add_technical_indicators(data)
        save_to_bigquery(data, "crypto_dataset.processed_prices")

        X_train, X_test, y_train, y_test, scaler = prepare_training_data(data)

        model = RandomForestClassifier(n_estimators=300, random_state=42)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        print(f"Model Accuracy: {accuracy_score(y_test, y_pred)}")
        print(classification_report(y_test, y_pred))

        save_model_to_gcs(
            model, 
            scaler, 
            bucket_name=f'{project_id}-crypto_trading_bucket', 
            model_filename='trading_model.pkl', 
            scaler_filename='scaler.pkl'
        )
    else:
        print("Data fetching failed. Please check your parameters and try again.")

100%|██████████| 1/1 [00:00<00:00, 9709.04it/s]


Data successfully saved to BigQuery table: crypto_dataset.raw_prices


100%|██████████| 1/1 [00:00<00:00, 14979.66it/s]


Data successfully saved to BigQuery table: crypto_dataset.processed_prices
Model Accuracy: 0.9480563389573334
              precision    recall  f1-score   support

           0       0.95      1.00      0.97    115144
           1       0.31      0.02      0.04      6122

    accuracy                           0.95    121266
   macro avg       0.63      0.51      0.51    121266
weighted avg       0.92      0.95      0.93    121266

Model and scaler successfully saved to GCS bucket: 'streamlit-apps-431010-crypto_trading_bucket'
