## Script for data acquisition

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import pandas_datareader.data as web
from ta import *

In [31]:
# Collect data for 3M from 2015-04-22 to 2018-04-22 using Pandas Data Reader
# IEX is able to get data from the last 5 years of any S&P 500

start = '2014-04-22'
end = '2019-05-22'
df = pd.DataFrame(web.DataReader(name='MMM', data_source='iex', start=start, end=end))

df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-05-28,124.7061,124.7853,124.0466,124.3719,1566124
2014-05-29,124.4335,125.2513,124.1609,125.2162,1624138
2014-05-30,124.9699,125.4624,124.6797,125.3569,3908258
2014-06-02,125.1106,125.6734,124.6006,125.1546,1682363
2014-06-03,124.9436,125.8669,124.5302,125.6559,2061733


In [32]:
print(df.isnull().any())
print(df.describe())

open      False
high      False
low       False
close     False
volume    False
dtype: bool
              open        high          low        close        volume
count  1256.000000  1256.00000  1256.000000  1256.000000  1.256000e+03
mean    170.627891   171.75055   169.446076   170.669804  2.295208e+06
std      31.534336    31.78322    31.163893    31.463441  1.083282e+06
min     117.266800   119.10680   115.532900   117.567600  6.510070e+05
25%     143.187925   144.29055   142.305150   143.243075  1.667414e+06
50%     166.068600   166.85485   165.218600   166.048050  2.032874e+06
75%     197.216400   198.68565   195.616775   197.263825  2.596403e+06
max     247.978700   249.18730   245.542100   248.093800  1.464616e+07


In [29]:
# Add all technical indicator features based on the data using the TA library (https://github.com/bukosabino/ta)

#df_all_features = add_all_ta_features(df, "open", "high", "low", "close", "volume", fillna=False)
#print(sum(df_all_features.isnull().any() == True)) # Checking if any NA
#print(df_all_features.columns)
#df_all_features.to_csv("stock.csv")

20
Index(['open', 'high', 'low', 'close', 'volume', 'volume_adi', 'volume_obv',
       'volume_cmf', 'volume_fi', 'volume_em', 'volume_vpt', 'volume_nvi',
       'volatility_atr', 'volatility_bbh', 'volatility_bbl', 'volatility_bbm',
       'volatility_bbhi', 'volatility_bbli', 'volatility_kcc',
       'volatility_kch', 'volatility_kcl', 'volatility_kchi',
       'volatility_kcli', 'volatility_dch', 'volatility_dcl',
       'volatility_dchi', 'volatility_dcli', 'trend_macd', 'trend_macd_signal',
       'trend_macd_diff', 'trend_ema_fast', 'trend_ema_slow', 'trend_adx',
       'trend_adx_pos', 'trend_adx_neg', 'trend_vortex_ind_pos',
       'trend_vortex_ind_neg', 'trend_vortex_diff', 'trend_trix',
       'trend_mass_index', 'trend_cci', 'trend_dpo', 'trend_kst',
       'trend_kst_sig', 'trend_kst_diff', 'trend_ichimoku_a',
       'trend_ichimoku_b', 'trend_visual_ichimoku_a',
       'trend_visual_ichimoku_b', 'trend_aroon_up', 'trend_aroon_down',
       'trend_aroon_ind', 'momentum_rsi

In [40]:
# Add one specific indicator for each case (volume, volatility, trend, momentum, daily return)

df_some_features = df
df_some_features['volume_fi'] = force_index(df['close'], df['volume'], n=2, fillna=True)
df_some_features['volatility_atr'] = average_true_range(df['high'], df['low'], df['close'], n=14, fillna=True)
df_some_features['trend_macd'] = macd(df['close'], n_fast=12, n_slow=26, fillna=True)
df_some_features['momentum_rsi'] = rsi(df['close'], n=14, fillna=True)
df_some_features['others_dr'] = daily_return(df['close'], fillna=True)

In [44]:
print(df_some_features.isnull().any() == True) # Checking if any NA
print(df_some_features.columns)
df_some_features.to_csv("stock.csv")

open              False
high              False
low               False
close             False
volume            False
volume_fi         False
volatility_atr    False
trend_macd        False
momentum_rsi      False
others_dr         False
dtype: bool
Index(['open', 'high', 'low', 'close', 'volume', 'volume_fi', 'volatility_atr',
       'trend_macd', 'momentum_rsi', 'others_dr'],
      dtype='object')
