In [2]:
import pandas as pd
import numpy as np

In [3]:
# load data
apple_df = pd.read_csv('data/AAPL.csv')

apple_df.head()

Unnamed: 0,Date,Close,Volume,Open,High,Low
0,2023-07-21,191.94,71904040,194.1,194.97,191.23
1,2023-07-20,193.13,59581200,195.09,196.47,192.495
2,2023-07-19,195.1,80507320,193.1,198.23,192.65
3,2023-07-18,193.73,48353770,193.35,194.33,192.415
4,2023-07-17,193.99,50520160,191.9,194.32,191.81


In [4]:
# daily returns feature
apple_df['Daily_Return'] = apple_df['Close'].pct_change( periods = 1 )
apple_df.head()

# 5 day rolling average for close price
apple_df['5_day_mean_close_price'] = apple_df['Close'].rolling(5).mean()
apple_df.head(10)

# 5 day rolling average of volume
apple_df['5_day_mean_volume'] = apple_df['Volume'].rolling(5).mean()
apple_df.head(10)


# Daily_Range = High - Low
apple_df['Daily_Range'] = apple_df['High'] - apple_df['Low']


# Volaity = 5 day rolling standard deviation of daily returns
apple_df['Volatility'] = apple_df['Daily_Return'].rolling(5).std()
apple_df.head(10)


Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility
0,2023-07-21,191.94,71904040,194.1,194.97,191.23,,,,3.74,
1,2023-07-20,193.13,59581200,195.09,196.47,192.495,0.0062,,,3.975,
2,2023-07-19,195.1,80507320,193.1,198.23,192.65,0.0102,,,5.58,
3,2023-07-18,193.73,48353770,193.35,194.33,192.415,-0.007022,,,1.915,
4,2023-07-17,193.99,50520160,191.9,194.32,191.81,0.001342,193.578,62173298.0,2.51,
5,2023-07-14,190.69,41616240,190.23,191.1799,189.63,-0.017011,193.328,56115738.0,1.5499,0.010898
6,2023-07-13,190.54,41342340,190.5,191.19,189.78,-0.000787,192.81,52467966.0,1.41,0.010123
7,2023-07-12,189.77,60750250,189.68,191.7,188.47,-0.004041,191.744,48516552.0,3.23,0.007176
8,2023-07-11,188.08,46638120,189.16,189.3,186.6,-0.008906,190.614,48173422.0,2.7,0.007324
9,2023-07-10,188.61,59922160,189.26,189.99,187.035,0.002818,189.538,50053822.0,2.955,0.007707


In [5]:
# create new column called Quarter
apple_df['Quarter'] = pd.PeriodIndex(apple_df['Date'], freq='Q')

In [6]:
apple_df.head(35)

# impuute missing values
apple_df['5_day_mean_close_price'] = apple_df['5_day_mean_close_price'].fillna(0)
apple_df['5_day_mean_volume'] = apple_df['5_day_mean_volume'].fillna(0)
apple_df['Volatility'] = apple_df['Volatility'].fillna(0)
apple_df['Daily_Return'] = apple_df['Daily_Return'].fillna(0)

apple_df.head(10)

Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility,Quarter
0,2023-07-21,191.94,71904040,194.1,194.97,191.23,0.0,0.0,0.0,3.74,0.0,2023Q3
1,2023-07-20,193.13,59581200,195.09,196.47,192.495,0.0062,0.0,0.0,3.975,0.0,2023Q3
2,2023-07-19,195.1,80507320,193.1,198.23,192.65,0.0102,0.0,0.0,5.58,0.0,2023Q3
3,2023-07-18,193.73,48353770,193.35,194.33,192.415,-0.007022,0.0,0.0,1.915,0.0,2023Q3
4,2023-07-17,193.99,50520160,191.9,194.32,191.81,0.001342,193.578,62173298.0,2.51,0.0,2023Q3
5,2023-07-14,190.69,41616240,190.23,191.1799,189.63,-0.017011,193.328,56115738.0,1.5499,0.010898,2023Q3
6,2023-07-13,190.54,41342340,190.5,191.19,189.78,-0.000787,192.81,52467966.0,1.41,0.010123,2023Q3
7,2023-07-12,189.77,60750250,189.68,191.7,188.47,-0.004041,191.744,48516552.0,3.23,0.007176,2023Q3
8,2023-07-11,188.08,46638120,189.16,189.3,186.6,-0.008906,190.614,48173422.0,2.7,0.007324,2023Q3
9,2023-07-10,188.61,59922160,189.26,189.99,187.035,0.002818,189.538,50053822.0,2.955,0.007707,2023Q3


In [7]:
# 5-day and 20-day exponential moving averages for closing price
apple_df['EMA_Close_5'] = apple_df['Close'].ewm(span=5, adjust=False).mean()
apple_df['EMA_Close_20'] = apple_df['Close'].ewm(span=20, adjust=False).mean()


In [9]:
# features needed to scale
output = apple_df["Close"]
features = list(apple_df.drop(["Open","Close","Date", "Quarter", "High", "Low"], axis=1).columns)
print(list(features))

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
apple_df[features] = scaler.fit_transform(apple_df[features])
apple_df.head()

['Volume', 'Daily_Return', '5_day_mean_close_price', '5_day_mean_volume', 'Daily_Range', 'Volatility', 'EMA_Close_5', 'EMA_Close_20']


Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility,Quarter,EMA_Close_5,EMA_Close_20
0,2023-07-21,191.94,0.039229,194.1,194.97,191.23,0.420177,0.0,0.0,0.285545,0.0,2023Q3,0.990638,0.996143
1,2023-07-20,193.13,0.02731,195.09,196.47,192.495,0.444526,0.0,0.0,0.304057,0.0,2023Q3,0.992866,0.996786
2,2023-07-19,195.1,0.047551,193.1,198.23,192.65,0.460237,0.0,0.0,0.430484,0.0,2023Q3,0.99804,0.998432
3,2023-07-18,193.73,0.01645,193.35,194.33,192.415,0.3926,0.0,0.0,0.141788,0.0,2023Q3,0.998924,0.999182
4,2023-07-17,193.99,0.018546,191.9,194.32,191.81,0.425448,1.0,0.09551,0.188657,0.0,2023Q3,1.0,1.0


In [10]:
# save to csv
apple_df.to_csv('data/clean/AAPL_feature_engineered.csv', index=False)

In [11]:
apple_df.head(10)

Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility,Quarter,EMA_Close_5,EMA_Close_20
0,2023-07-21,191.94,0.039229,194.1,194.97,191.23,0.420177,0.0,0.0,0.285545,0.0,2023Q3,0.990638,0.996143
1,2023-07-20,193.13,0.02731,195.09,196.47,192.495,0.444526,0.0,0.0,0.304057,0.0,2023Q3,0.992866,0.996786
2,2023-07-19,195.1,0.047551,193.1,198.23,192.65,0.460237,0.0,0.0,0.430484,0.0,2023Q3,0.99804,0.998432
3,2023-07-18,193.73,0.01645,193.35,194.33,192.415,0.3926,0.0,0.0,0.141788,0.0,2023Q3,0.998924,0.999182
4,2023-07-17,193.99,0.018546,191.9,194.32,191.81,0.425448,1.0,0.09551,0.188657,0.0,2023Q3,1.0,1.0
5,2023-07-14,190.69,0.009933,190.23,191.1799,189.63,0.35337,0.998709,0.086204,0.113029,0.099251,2023Q3,0.994539,0.998957
6,2023-07-13,190.54,0.009668,190.5,191.19,189.78,0.417088,0.996033,0.080601,0.102009,0.092193,2023Q3,0.990618,0.997933
7,2023-07-12,189.77,0.028441,189.68,191.7,188.47,0.404307,0.990526,0.074531,0.245372,0.065354,2023Q3,0.986562,0.99659
8,2023-07-11,188.08,0.014791,189.16,189.3,186.6,0.385203,0.984688,0.074003,0.203623,0.066698,2023Q3,0.980694,0.994461
9,2023-07-10,188.61,0.02764,189.26,189.99,187.035,0.431244,0.97913,0.076892,0.22371,0.070191,2023Q3,0.977774,0.992822
