In [None]:
import pandas as pd
import numpy as np

In [None]:
# load data
apple_df = pd.read_csv('data/AAPL.csv')

apple_df.head()

In [None]:
# daily returns feature
apple_df['Daily_Return'] = apple_df['Close'].pct_change( periods = 1 )
apple_df.head()

# 5 day rolling average for close price
apple_df['5_day_mean_close_price'] = apple_df['Close'].rolling(5).mean()
apple_df.head(10)

# 5 day rolling average of volume
apple_df['5_day_mean_volume'] = apple_df['Volume'].rolling(5).mean()
apple_df.head(10)


# Daily_Range = High - Low
apple_df['Daily_Range'] = apple_df['High'] - apple_df['Low']


# Volaity = 5 day rolling standard deviation of daily returns
apple_df['Volatility'] = apple_df['Daily_Return'].rolling(5).std()
apple_df.head(10)


In [None]:
# create new column called Quarter
apple_df['Quarter'] = pd.PeriodIndex(apple_df['Date'], freq='Q')

In [23]:
apple_df.head(35)

# impuute missing values
apple_df['5_day_mean_close_price'] = apple_df['5_day_mean_close_price'].fillna(0)
apple_df['5_day_mean_volume'] = apple_df['5_day_mean_volume'].fillna(0)
apple_df['Volatility'] = apple_df['Volatility'].fillna(0)
apple_df['Daily_Return'] = apple_df['Daily_Return'].fillna(0)

apple_df.head(10)

Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility,Quarter
0,2023-07-21,191.94,71904040,194.1,194.97,191.23,0.0,0.0,0.0,3.74,0.0,2023Q3
1,2023-07-20,193.13,59581200,195.09,196.47,192.495,0.0062,0.0,0.0,3.975,0.0,2023Q3
2,2023-07-19,195.1,80507320,193.1,198.23,192.65,0.0102,0.0,0.0,5.58,0.0,2023Q3
3,2023-07-18,193.73,48353770,193.35,194.33,192.415,-0.007022,0.0,0.0,1.915,0.0,2023Q3
4,2023-07-17,193.99,50520160,191.9,194.32,191.81,0.001342,193.578,62173298.0,2.51,0.0,2023Q3
5,2023-07-14,190.69,41616240,190.23,191.1799,189.63,-0.017011,193.328,56115738.0,1.5499,0.010898,2023Q3
6,2023-07-13,190.54,41342340,190.5,191.19,189.78,-0.000787,192.81,52467966.0,1.41,0.010123,2023Q3
7,2023-07-12,189.77,60750250,189.68,191.7,188.47,-0.004041,191.744,48516552.0,3.23,0.007176,2023Q3
8,2023-07-11,188.08,46638120,189.16,189.3,186.6,-0.008906,190.614,48173422.0,2.7,0.007324,2023Q3
9,2023-07-10,188.61,59922160,189.26,189.99,187.035,0.002818,189.538,50053822.0,2.955,0.007707,2023Q3


In [24]:
# 5-day and 20-day exponential moving averages for closing price
apple_df['EMA_Close_5'] = apple_df['Close'].ewm(span=5, adjust=False).mean()
apple_df['EMA_Close_20'] = apple_df['Close'].ewm(span=20, adjust=False).mean()


In [25]:
# save to csv
apple_df.to_csv('data/clean/AAPL_feature_engineered.csv', index=False)

In [26]:
apple_df.head(10)

Unnamed: 0,Date,Close,Volume,Open,High,Low,Daily_Return,5_day_mean_close_price,5_day_mean_volume,Daily_Range,Volatility,Quarter,EMA_Close_5,EMA_Close_20
0,2023-07-21,191.94,71904040,194.1,194.97,191.23,0.0,0.0,0.0,3.74,0.0,2023Q3,191.94,191.94
1,2023-07-20,193.13,59581200,195.09,196.47,192.495,0.0062,0.0,0.0,3.975,0.0,2023Q3,192.336667,192.053333
2,2023-07-19,195.1,80507320,193.1,198.23,192.65,0.0102,0.0,0.0,5.58,0.0,2023Q3,193.257778,192.343492
3,2023-07-18,193.73,48353770,193.35,194.33,192.415,-0.007022,0.0,0.0,1.915,0.0,2023Q3,193.415185,192.47554
4,2023-07-17,193.99,50520160,191.9,194.32,191.81,0.001342,193.578,62173298.0,2.51,0.0,2023Q3,193.60679,192.619775
5,2023-07-14,190.69,41616240,190.23,191.1799,189.63,-0.017011,193.328,56115738.0,1.5499,0.010898,2023Q3,192.634527,192.435987
6,2023-07-13,190.54,41342340,190.5,191.19,189.78,-0.000787,192.81,52467966.0,1.41,0.010123,2023Q3,191.936351,192.255416
7,2023-07-12,189.77,60750250,189.68,191.7,188.47,-0.004041,191.744,48516552.0,3.23,0.007176,2023Q3,191.214234,192.01871
8,2023-07-11,188.08,46638120,189.16,189.3,186.6,-0.008906,190.614,48173422.0,2.7,0.007324,2023Q3,190.169489,191.643595
9,2023-07-10,188.61,59922160,189.26,189.99,187.035,0.002818,189.538,50053822.0,2.955,0.007707,2023Q3,189.64966,191.354681


[[[7.19040400e+07 1.94100000e+02 1.94970000e+02 ... 0.00000000e+00
   1.91940000e+02 1.91940000e+02]]

 [[5.95812000e+07 1.95090000e+02 1.96470000e+02 ... 0.00000000e+00
   1.92336667e+02 1.92053333e+02]]

 [[8.05073200e+07 1.93100000e+02 1.98230000e+02 ... 0.00000000e+00
   1.93257778e+02 1.92343492e+02]]

 ...

 [[1.77486320e+08 3.07725000e+01 3.09025000e+01 ... 1.07014759e-02
   3.04886514e+01 2.94220286e+01]]

 [[1.68361280e+08 3.13300000e+01 3.14350000e+01 ... 7.05687033e-03
   3.07007676e+01 2.95842163e+01]]

 [[2.03331800e+08 3.15500000e+01 3.17725000e+01 ... 6.78062744e-03
   3.08971784e+01 2.97466719e+01]]]
