In [1]:
import pandas as pd
import numpy as np

# Load cleaned data from previous step
df = pd.read_csv('../data/processed/cleaned_crypto.csv', parse_dates=['date'])
df = df.sort_values('date').reset_index(drop=True)
df.head()


Unnamed: 0,open,high,low,close,volume,marketCap,timestamp,crypto_name,date
0,112.900002,118.800003,107.142998,115.910004,0.0,1288693000.0,2013-05-05T23:59:59.999Z,Bitcoin,2013-05-05
1,3.49313,3.69246,3.34606,3.59089,0.0,62298190.0,2013-05-05T23:59:59.999Z,Litecoin,2013-05-05
2,115.980003,124.663002,106.639999,112.300003,0.0,1249023000.0,2013-05-06T23:59:59.999Z,Bitcoin,2013-05-06
3,3.59422,3.78102,3.11602,3.37125,0.0,58594360.0,2013-05-06T23:59:59.999Z,Litecoin,2013-05-06
4,112.25,113.444,97.699997,111.5,0.0,1240594000.0,2013-05-07T23:59:59.999Z,Bitcoin,2013-05-07


In [2]:
# 1. Daily log return
df['log_return'] = np.log(df['close'] / df['close'].shift(1))


In [3]:
# 2. Rolling volatility (past variation)
df['volatility_7d'] = df['log_return'].rolling(window=7).std()
df['volatility_21d'] = df['log_return'].rolling(window=21).std()

In [4]:
# 3. Moving averages
df['ma_7'] = df['close'].rolling(window=7).mean()
df['ma_21'] = df['close'].rolling(window=21).mean()


In [5]:
# 4. Volume ratio (relative activity)
df['vol_ratio'] = df['volume'] / df['volume'].rolling(window=7).mean()

In [6]:
# 5. Target variable (next week’s volatility)
df['target_vol_7d'] = df['volatility_7d'].shift(-7)


In [7]:
# 6. Drop missing rows caused by rolling & shifting
df.dropna(inplace=True)
df.head()

Unnamed: 0,open,high,low,close,volume,marketCap,timestamp,crypto_name,date,log_return,volatility_7d,volatility_21d,ma_7,ma_21,vol_ratio,target_vol_7d
629,763.280029,777.51001,713.599976,735.070007,46862700.0,8955395000.0,2013-12-27T23:59:59.999Z,Bitcoin,2013-12-27,-0.035955,8.901689,8.581779,314.866248,201.611326,7.0,7.884601
630,24.806601,25.2691,22.263599,23.272301,31112200.0,566088000.0,2013-12-27T23:59:59.999Z,Litecoin,2013-12-27,-3.452702,8.9019,8.627728,220.732288,202.719516,2.793019,8.783791
631,0.000603,0.000628,0.000497,0.000522,477422.0,8016604.0,2013-12-27T23:59:59.999Z,Dogecoin,2013-12-27,-10.705298,8.970098,8.618737,220.729214,201.916912,0.042599,7.954442
632,0.024426,0.027123,0.023527,0.027076,148422.0,211674100.0,2013-12-27T23:59:59.999Z,XRP,2013-12-27,3.948912,9.131993,8.550137,220.729598,201.917168,0.013218,8.223179
633,0.027554,0.028106,0.026629,0.027303,143404.0,213453400.0,2013-12-28T23:59:59.999Z,XRP,2013-12-28,0.008371,8.619843,8.222379,217.196827,172.528944,0.012748,8.258219


In [8]:
df[['date', 'close', 'log_return', 'volatility_7d', 'ma_7', 'target_vol_7d']].head(10)


Unnamed: 0,date,close,log_return,volatility_7d,ma_7,target_vol_7d
629,2013-12-27,735.070007,-0.035955,8.901689,314.866248,7.884601
630,2013-12-27,23.272301,-3.452702,8.9019,220.732288,8.783791
631,2013-12-27,0.000522,-10.705298,8.970098,220.729214,7.954442
632,2013-12-27,0.027076,3.948912,9.131993,220.729598,8.223179
633,2013-12-28,0.027303,0.008371,8.619843,217.196827,8.258219
634,2013-12-28,0.000459,-4.086727,7.705493,217.196807,9.212422
635,2013-12-28,727.830017,14.277546,7.77463,212.318241,7.661536
636,2013-12-28,22.5627,-3.473769,7.884601,110.531482,7.657348
637,2013-12-29,0.000454,-10.812655,8.783791,107.206933,8.606216
638,2013-12-29,0.027218,4.092465,7.954442,107.210747,9.500548


In [9]:
df.to_csv('../data/processed/features_crypto.csv', index=False)
print("✅ Feature-engineered data saved successfully!")


✅ Feature-engineered data saved successfully!
