# Cryptocurrency Volatility Prediction

End-to-end ML notebook

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:

# Load dataset
df = pd.read_csv('dataset.csv')
df.head()


In [None]:

# Basic info
df.info()


In [None]:

# Handle missing values
df.fillna(method='ffill', inplace=True)


In [None]:

# Feature Engineering
df['returns'] = np.log(df['close'] / df['close'].shift(1))
df['volatility_7d'] = df['returns'].rolling(7).std()
df['liquidity_ratio'] = df['volume'] / df['market_cap']
df.dropna(inplace=True)
df.head()


In [None]:

# EDA
plt.figure()
sns.histplot(df['volatility_7d'], bins=30)
plt.title("Volatility Distribution")
plt.show()


In [None]:

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [None]:

features = ['open','high','low','close','volume','market_cap','liquidity_ratio']
X = df[features]
y = df['volatility_7d']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)


In [None]:

model = RandomForestRegressor(n_estimators=200, random_state=42)
model.fit(X_train, y_train)


In [None]:

y_pred = model.predict(X_test)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

rmse, r2
