<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/KRAKEN_FETCH.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ccxt -q
!pip install google-colab-secrets -q

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [10]:
import ccxt
import pandas as pd
import sqlite3
from datetime import datetime, timedelta
from google.colab import userdata
import os

def fetch_and_store_data(symbol, timeframe, db_path, table_name, mode='replace'):
    """
    Fetches 30 days of historical OHLCV data from Kraken and stores it in an SQLite database.

    Args:
        symbol (str): The trading pair (e.g., 'ETH/USD').
        timeframe (str): The interval of each candle (e.g., '1h').
        db_path (str): The path to the SQLite database file.
        table_name (str): The name of the table to store the data in.
        mode (str): How to handle existing tables ('replace' or 'append').
    """
    try:
        # Securely retrieve API keys from Colab's Secrets
        KRAKEN_API_KEY = userdata.get('KRAKEN')
        KRAKEN_API_SECRET = userdata.get('KRAKEN_SECRET')

        exchange_config = {
            'apiKey': KRAKEN_API_KEY,
            'secret': KRAKEN_API_SECRET,
            'enableRateLimit': True,
        }
        exchange = ccxt.kraken(exchange_config)

        # Calculate start time for a 30-day look-back
        start_time = datetime.now() - timedelta(days=30)
        since_timestamp = int(start_time.timestamp() * 1000)

        # Determine limit for 30 days of hourly data
        limit = 30 * 24

        print(f"Fetching {limit} candles for {symbol} from {datetime.fromtimestamp(since_timestamp / 1000)}...")
        ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since_timestamp, limit=limit)

        if not ohlcv:
            print(f"Warning: No data fetched for {symbol}.")
            return

        df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        df = df.dropna()
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms').dt.tz_localize('UTC')
        df.set_index('timestamp', inplace=True)

        print(f"Successfully fetched {len(df)} candles.")

    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")
        return

    try:
        # Use pandas to_sql to write the DataFrame to the database
        conn = sqlite3.connect(db_path)
        df.to_sql(table_name, conn, if_exists=mode, index=True)
        conn.close()

        print(f"Data for {symbol} successfully stored in {db_path}/{table_name} using '{mode}' mode.")

    except Exception as e:
        print(f"Error storing data to database: {e}")
        return

# Test Case
db_file = '/content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_ETH.db'
table_name = 'ethusd_1h_data_recent'

print("--- Starting Test Case for ETH/USD ---")
fetch_and_store_data(
    symbol='ETH/USD',
    timeframe='1h',
    db_path=db_file,
    table_name=table_name,
    mode='replace'
)
print("--- Test Case for ETH/USD Complete ---")

--- Starting Test Case for ETH/USD ---
Fetching 720 candles for ETH/USD from 2025-08-18 14:15:58.902000...
Successfully fetched 720 candles.
Data for ETH/USD successfully stored in /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_ETH.db/ethusd_1h_data_recent using 'replace' mode.
--- Test Case for ETH/USD Complete ---


In [8]:
import pandas as pd
import sqlite3
import os

def validate_database_data(db_path, table_name):
    """
    Connects to the database and prints the first and last 5 rows of a table.
    """
    try:
        conn = sqlite3.connect(db_path)

        # Get the first 5 rows (ordered by timestamp)
        first_rows_query = f"SELECT * FROM '{table_name}' ORDER BY timestamp ASC LIMIT 5"
        df_first = pd.read_sql_query(first_rows_query, conn)

        # Get the last 5 rows (ordered by timestamp)
        last_rows_query = f"SELECT * FROM '{table_name}' ORDER BY timestamp DESC LIMIT 5"
        df_last = pd.read_sql_query(last_rows_query, conn)

        conn.close()

        print(f"--- First 5 rows of '{table_name}' ---")
        print(df_first)
        print("\n")
        print(f"--- Last 5 rows of '{table_name}' ---")
        print(df_last)

    except Exception as e:
        print(f"Error validating data: {e}")

# Test Case
db_file = '/content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_ETH.db'
table_name = 'ethusd_1h_data_recent'

print("--- Starting Data Validation for ETH/USD ---")
validate_database_data(db_file, table_name)
print("\n--- Data Validation Complete ---")

--- Starting Data Validation for ETH/USD ---
--- First 5 rows of 'ethusd_1h_data_recent' ---
                   timestamp     open     high      low    close       volume
0  2025-08-18 14:00:00+00:00  4286.50  4335.19  4276.47  4334.00  1339.699320
1  2025-08-18 15:00:00+00:00  4334.00  4346.55  4300.27  4345.28  1256.947394
2  2025-08-18 16:00:00+00:00  4345.28  4361.01  4303.88  4324.36  2024.329272
3  2025-08-18 17:00:00+00:00  4323.22  4381.00  4315.20  4346.55  2102.715708
4  2025-08-18 18:00:00+00:00  4346.55  4374.00  4330.01  4369.07   871.008900


--- Last 5 rows of 'ethusd_1h_data_recent' ---
                   timestamp     open     high      low    close      volume
0  2025-09-17 13:00:00+00:00  4504.51  4512.02  4504.50  4506.96   56.458357
1  2025-09-17 12:00:00+00:00  4493.17  4512.11  4486.37  4504.51  318.562827
2  2025-09-17 11:00:00+00:00  4485.00  4500.14  4483.17  4493.16  184.780229
3  2025-09-17 10:00:00+00:00  4496.92  4501.67  4481.31  4485.00  275.863277
4  20