# 1. Data Exploration and Preparation

This notebook covers:
- Fetching cryptocurrency data from Hugging Face
- Data exploration and validation
- Identifying potential data quality issues


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

project_root = Path('.').resolve().parent
sys.path.insert(0, str(project_root))

from data.fetch_data import CryptoDataFetcher
from src.config import config
from src.utils import print_data_info

## Step 1: Fetch BTC_15m Data

In [None]:
fetcher = CryptoDataFetcher()
btc_15m = fetcher.fetch_symbol_timeframe('BTCUSDT', '15m')

if btc_15m is not None:
    print_data_info(btc_15m, 'BTC_15m')

## Step 2: Data Visualization

In [None]:
if btc_15m is not None:
    # Plot price and volume
    fig, axes = plt.subplots(2, 1, figsize=(14, 8))
    
    axes[0].plot(btc_15m['close'], label='Close', linewidth=1)
    axes[0].fill_between(range(len(btc_15m)), btc_15m['low'], btc_15m['high'], alpha=0.2)
    axes[0].set_title('BTC Price (15m)')
    axes[0].set_ylabel('Price')
    axes[0].legend()
    axes[0].grid(True)
    
    axes[1].bar(range(len(btc_15m)), btc_15m['volume'], alpha=0.7)
    axes[1].set_title('BTC Volume (15m)')
    axes[1].set_ylabel('Volume')
    axes[1].set_xlabel('Time')
    axes[1].grid(True)
    
    plt.tight_layout()
    plt.show()

## Step 3: Data Statistics

In [None]:
if btc_15m is not None:
    print('Price Statistics:')
    print(btc_15m[['open', 'high', 'low', 'close']].describe())

## Next Step: Feature Engineering
Proceed to notebook 02_feature_engineering.ipynb