<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/BTC_13Y_DB_CRYPTO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import os

print("Mounting Google Drive...")
drive.mount('/content/gdrive')

# Create a directory in your Drive for your bot logs (optional but good practice)
log_dir = '/content/gdrive/MyDrive/TradingBotLogs' # You can change 'TradingBotLogs'
os.makedirs(log_dir, exist_ok=True)
print(f"Ensured log directory exists: {log_dir}")

In [2]:
!ls /content/gdrive/MyDrive/kraken/

ETHUSD.csv  LDOUSD.csv	SOLUSD.csv  TAOUSD.csv	XBTUSD.csv


In [3]:
import pandas as pd
import sqlite3
import os

# Function to resample raw tick data to OHLCV format
def _resample_to_ohlcv(raw_df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
    print(f"INFO - Resampling raw data to {timeframe} OHLCV candles.")
    raw_df['timestamp'] = pd.to_datetime(raw_df['timestamp'], unit='s', utc=True)
    raw_df.set_index('timestamp', inplace=True)
    ohlcv_data = raw_df['price'].resample(timeframe).ohlc()
    ohlcv_data['volume'] = raw_df['volume'].resample(timeframe).sum()
    ohlcv_data.dropna(inplace=True)
    ohlcv_data = ohlcv_data.reset_index()
    return ohlcv_data

# Function to process and clean data from CSV
def process_data_from_csv(file_path: str, timeframe: str = '1h'):
    if not os.path.exists(file_path):
        print(f"CRITICAL - Local CSV file not found at: {file_path}")
        return None
    print(f"INFO - Loading historical data from local CSV: {file_path}")
    try:
        df = pd.read_csv(file_path, header=0)
        df.columns = [col.lower() for col in df.columns]
        if 'open' in df.columns and 'close' in df.columns:
            ohlcv_df = df.copy()
        else:
            print("INFO - CSV does not contain OHLCV headers. Attempting to load as raw tick data.")
            df = pd.read_csv(file_path, header=None, names=['timestamp', 'price', 'volume'])
            ohlcv_df = _resample_to_ohlcv(df, timeframe)
        for col in ['open', 'high', 'low', 'close', 'volume']:
            if col in ohlcv_df.columns:
                ohlcv_df[col] = pd.to_numeric(ohlcv_df[col], errors='coerce')
        ohlcv_df.dropna(inplace=True)
        print("INFO - CSV data loaded and processed successfully.")
        return ohlcv_df
    except Exception as e:
        print(f"CRITICAL - Error processing CSV file: {e}")
        return None

# --- Main script to convert CSV to SQLite ---
csv_file_path = '/content/gdrive/MyDrive/kraken/XBTUSD.csv'
sqlite_db_path = '/content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC.db'
table_name = 'btcusd_1h_data_13y'

# Process the data from CSV
ohlcv_data_df = process_data_from_csv(csv_file_path)

if ohlcv_data_df is not None:
    print("\n--- Saving data to SQLite database ---")
    conn = sqlite3.connect(sqlite_db_path)
    # Write the DataFrame to an SQLite table
    ohlcv_data_df.to_sql(table_name, conn, if_exists='replace', index=False)
    conn.close()
    print(f"Data successfully saved to {sqlite_db_path} in table '{table_name}'.")

INFO - Loading historical data from local CSV: /content/gdrive/MyDrive/kraken/XBTUSD.csv
INFO - CSV does not contain OHLCV headers. Attempting to load as raw tick data.
INFO - Resampling raw data to 1h OHLCV candles.
INFO - CSV data loaded and processed successfully.

--- Saving data to SQLite database ---
Data successfully saved to /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC.db in table 'btcusd_1h_data_13y'.


In [4]:
!ls -lths /content/gdrive/MyDrive/TradingBotLogs/*.db

9.2M -rw------- 1 root root 9.2M Sep 26 01:49 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC.db
3.2M -rw------- 1 root root 3.2M Sep 25 20:52 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_LDO.db
3.2M -rw------- 1 root root 3.2M Sep 24 16:46 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_SOL.db
 96K -rw------- 1 root root  96K Sep 24 14:08 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_ETH.db
 92K -rw------- 1 root root  92K Sep 23 23:26 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC_monthly.db
 92K -rw------- 1 root root  92K Sep 23 12:39 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC_new.db
3.1M -rw------- 1 root root 3.1M Sep 17 21:35 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data.db
1.2M -rw------- 1 root root 1.2M Sep 16 22:03 /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_TAO.db


In [6]:
!ls /content/gdrive/MyDrive/TradingBotLogs/*.keras

/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_BTC.keras
/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_ETH.keras
/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_LDO.keras
/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_SOL.keras
/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_TAO.keras


In [5]:
BTC_SQLITE_DB_PATH = '/content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC.db'
BTC_SQLITE_TABLE_NAME = table_name
BTC_MODEL_PATH ='/content/gdrive/MyDrive/TradingBotLogs/crypto_model_retrained_500epochs_v3_BTC.keras'

import sqlite3
import pandas as pd

db_configs = [
    {'db_path': BTC_SQLITE_DB_PATH, 'table_name': BTC_SQLITE_TABLE_NAME, 'symbol': 'BTC/USD'},
]

for config in db_configs:
    db_path = config['db_path']
    table_name = config['table_name']
    symbol = config['symbol']

    print(f"\n--- Data for {symbol} (Table: {table_name}, DB: {db_path}) ---")

    try:
        conn = sqlite3.connect(db_path)

        # Get the total number of rows to handle cases with less than 10 rows
        count_query = f"SELECT COUNT(*) FROM {table_name}"
        total_rows = pd.read_sql_query(count_query, conn).iloc[0, 0]
        print(f"Total rows: {total_rows}")

        if total_rows == 0:
            print("Table is empty.")
            conn.close()
            continue

        # Fetch first 5 rows
        head_query = f"SELECT * FROM {table_name} ORDER BY timestamp ASC LIMIT 5"
        df_head = pd.read_sql_query(head_query, conn)
        print("\nFirst 5 rows:")
        display(df_head)

        # Fetch last 5 rows, ensuring we don't overlap with the head if total rows are small
        tail_limit = min(5, total_rows)
        offset = max(0, total_rows - tail_limit)
        tail_query = f"SELECT * FROM {table_name} ORDER BY timestamp ASC LIMIT {tail_limit} OFFSET {offset}"
        df_tail = pd.read_sql_query(tail_query, conn)
        print("\nLast 5 rows:")
        display(df_tail)

        conn.close()

    except sqlite3.Error as e:
        print(f"Error accessing database {db_path} or table {table_name}: {e}")
    except Exception as e:
        print(f"An unexpected error occurred for {symbol}: {e}")


--- Data for BTC/USD (Table: btcusd_1h_data_13y, DB: /content/gdrive/MyDrive/TradingBotLogs/ohlcv_data_BTC.db) ---
Total rows: 89788

First 5 rows:


Unnamed: 0,timestamp,open,high,low,close,volume
0,2013-10-06 21:00:00+00:00,122.0,122.0,122.0,122.0,0.1
1,2013-10-07 20:00:00+00:00,123.61,123.61,123.61,123.61,0.1
2,2013-10-08 02:00:00+00:00,123.91,123.91,123.9,123.9,1.9916
3,2013-10-08 05:00:00+00:00,124.19,124.19,124.18,124.18,2.0
4,2013-10-09 09:00:00+00:00,124.01687,124.01687,123.84,123.84,2.823



Last 5 rows:


Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-03-31 19:00:00+00:00,83286.1,83422.1,82368.9,82394.4,241.135396
1,2025-03-31 20:00:00+00:00,82394.5,82798.7,82358.5,82427.7,93.071084
2,2025-03-31 21:00:00+00:00,82427.7,82698.9,82427.7,82543.4,30.865337
3,2025-03-31 22:00:00+00:00,82543.7,82553.4,82259.3,82381.1,41.888085
4,2025-03-31 23:00:00+00:00,82386.0,82809.1,82305.5,82523.0,27.902205
