01 - Exploratory Data Analysis

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data.load_data import load_raw_btc
from src.data.clean_data import clean_btc_data

sns.set_style('whitegrid')
%matplotlib inline

Load Data

In [None]:
# Load raw data
df_raw = load_raw_btc(start='2014-01-01', save=True)
print(f'Loaded {len(df_raw)} rows')
df_raw.head()

Data Overview

In [None]:
df_raw.info()

In [None]:
df_raw.describe()

Price Visualization

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 10))

# Linear scale
ax1.plot(df_raw['date'], df_raw['close'])
ax1.set_title('BTC Price (Linear Scale)', fontsize=14, fontweight='bold')
ax1.set_ylabel('Price (USD)')
ax1.grid(True, alpha=0.3)

# Log scale
ax2.plot(df_raw['date'], df_raw['close'])
ax2.set_yscale('log')
ax2.set_title('BTC Price (Log Scale)', fontsize=14, fontweight='bold')
ax2.set_ylabel('Price (USD)')
ax2.set_xlabel('Date')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

Clean Data and Create Basic Features

In [None]:
df_clean = clean_btc_data(df_raw)
df_clean.head()

Returns Analysis

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Returns over time
axes[0, 0].plot(df_clean['date'], df_clean['return'])
axes[0, 0].set_title('Daily Returns')
axes[0, 0].axhline(y=0, color='r', linestyle='--', alpha=0.5)

# Returns distribution
axes[0, 1].hist(df_clean['return'].dropna(), bins=100)
axes[0, 1].set_title('Returns Distribution')
axes[0, 1].set_xlabel('Return')

# Volatility over time
axes[1, 0].plot(df_clean['date'], df_clean['volatility_5d'])
axes[1, 0].set_title('5-Day Rolling Volatility')

# Volume
axes[1, 1].plot(df_clean['date'], df_clean['volume'])
axes[1, 1].set_title('Trading Volume')

plt.tight_layout()
plt.show()