In [1]:
# Import reqired libraries
import numpy as np
import yfinance as yf
import pandas as pd
import os

In [2]:
# List of diverse stock symbols and ETFs
# AAPL: Apple Inc.
# MSFT: Microsoft Corporation
# GOOGL: Alphabet Inc. (Google)
# AMZN: Amazon.com Inc.
# TSLA: Tesla Inc.
# SPY: SPDR S&P 500 ETF Trust (an ETF that tracks the S&P 500)
# GLD: SPDR Gold Shares (an ETF that tracks the price of gold)
# BTC-USD: Bitcoin in USD (Cryptocurrency)
assets = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'SPY', 'GLD', 'BTC-USD']

# Define the time period
start_date = '2010-01-01'
end_date = '2023-01-01'

In [3]:
# Create a directory to save the data if it doesn't exist
os.makedirs('../data', exist_ok=True)

1. **Daily Returns**:
   $R_t = \frac{P_t - P_{t-1}}{P_{t-1}}$

   where $R_t$ is the daily return, $P_t$ is the adjusted closing price at time $t$, and $P_{t-1}$ is the adjusted closing price at time $t-1$.

2. **Moving Averages**:
   - 20-day Moving Average (MA20):
     $MA_{20} = \frac{1}{20} \sum_{i=0}^{19} P_{t-i}$

   - 50-day Moving Average (MA50):
     $MA_{50} = \frac{1}{50} \sum_{i=0}^{49} P_{t-i}$

3. **Volatility**:
   - 20-day Volatility (standard deviation of daily returns):
     $\sigma_{20} = \sqrt{\frac{1}{20} \sum_{i=0}^{19} (R_{t-i} - \mu)^2}$

     where $\sigma_{20}$ is the 20-day volatility, $R_{t-i}$ is the daily return at time $t-i$, and $\mu$ is the mean daily return over the past 20 days.


In [4]:
def add_features(df):
  """
  Adds additional features to the stock data DataFrame.
  Parameters --> : df (DataFrame) containing stock data.
  Returns --> DataFrame with additional features.
  """
  # Calculate daily returns as the percentage change of the adjusted close price
  df['Daily Return'] = df['Adj Close'].pct_change()
  
  # Calculate the 20-day moving average of the adjusted close price
  df['MA20'] = df['Adj Close'].rolling(window=20).mean()
  
  # Calculate the 50-day moving average of the adjusted close price
  df['MA50'] = df['Adj Close'].rolling(window=50).mean()
  
  # Calculate the volatility (standard deviation of daily returns) over a 20-day window
  df['Volatility'] = df['Daily Return'].rolling(window=20).std()
  
  # Drop any rows with NaN values resulting from the calculations above
  df.dropna(inplace=True)
  
  return df

In [5]:
# Download historical stock data for the given symbol and time period
for asset in assets:
  stock_data = yf.download(asset, start=start_date, end=end_date)
  
  # Save the raw data to a CSV file
  stock_data.to_csv(f'../data/{asset}_historical_data.csv')
  
  # Add features
  stock_data = add_features(stock_data)
  
  # Save the enhanced data to a new CSV file
  stock_data.to_csv(f'../data/{asset}_enhanced_data.csv')
  
  # Display the first few rows of the enhanced data
  print(f"Enhanced data for {asset}:")
  print(stock_data.head())
  print("\n")


[*********************100%%**********************]  1 of 1 completed


Enhanced data for AAPL:
                Open      High       Low     Close  Adj Close     Volume  \
Date                                                                       
2010-03-16  8.006429  8.035000  7.946786  8.016071   6.777210  446908000   
2010-03-17  8.032143  8.087500  7.973929  8.004286   6.767246  450956800   
2010-03-18  8.003571  8.035714  7.950357  8.023214   6.783249  342109600   
2010-03-19  8.028214  8.044286  7.901071  7.937500   6.710782  559445600   
2010-03-22  7.873929  8.071429  7.862500  8.026786   6.786267  456419600   

            Daily Return      MA20      MA50  Volatility  
Date                                                      
2010-03-16      0.002725  6.395352  6.249025    0.012642  
2010-03-17     -0.001470  6.427917  6.255130    0.012552  
2010-03-18      0.002365  6.460708  6.261332    0.012546  
2010-03-19     -0.010683  6.491779  6.268144    0.012797  
2010-03-22      0.011248  6.528511  6.276701    0.012590  




[*********************100%%**********************]  1 of 1 completed


Enhanced data for MSFT:
                 Open       High        Low      Close  Adj Close    Volume  \
Date                                                                          
2010-03-16  29.420000  29.490000  29.200001  29.370001  22.299158  36723500   
2010-03-17  29.500000  29.870001  29.400000  29.629999  22.496557  50385700   
2010-03-18  29.629999  29.719999  29.500000  29.610001  22.481384  43845200   
2010-03-19  29.760000  29.900000  29.350000  29.590000  22.466194  81332100   
2010-03-22  29.500000  29.700001  29.389999  29.600000  22.473787  37718200   

            Daily Return       MA20       MA50  Volatility  
Date                                                        
2010-03-16      0.002731  21.864869  22.082858    0.008102  
2010-03-17      0.008852  21.904350  22.065001    0.008119  
2010-03-18     -0.000674  21.928646  22.046689    0.007669  
2010-03-19     -0.000676  21.959775  22.030946    0.007449  
2010-03-22      0.000338  21.992803  22.020190    0.0074

[*********************100%%**********************]  1 of 1 completed


Enhanced data for GOOGL:
                 Open       High        Low      Close  Adj Close     Volume  \
Date                                                                           
2010-03-16  14.059810  14.224725  14.033033  14.144144  14.127930  137122740   
2010-03-17  14.221722  14.300551  14.120370  14.153153  14.136929  132731136   
2010-03-18  14.132132  14.225225  14.088088  14.174174  14.157926   71016912   
2010-03-19  14.014014  14.214214  13.945946  14.014014  13.997949  191576232   
2010-03-22  13.916667  14.185435  13.870871  13.951451  13.935457  160031808   

            Daily Return       MA20       MA50  Volatility  
Date                                                        
2010-03-16      0.003587  13.761496  13.939177    0.012912  
2010-03-17      0.000637  13.795678  13.908587    0.012784  
2010-03-18      0.001485  13.824649  13.879796    0.012686  
2010-03-19     -0.011299  13.848696  13.855670    0.012961  
2010-03-22     -0.004464  13.867068  13.837372  

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Enhanced data for AMZN:
              Open    High    Low   Close  Adj Close     Volume  Daily Return  \
Date                                                                            
2010-03-16  6.5620  6.6145  6.525  6.5895     6.5895   82650000      0.005033   
2010-03-17  6.6205  6.6345  6.561  6.5670     6.5670   87176000     -0.003415   
2010-03-18  6.5510  6.6425  6.522  6.6380     6.6380  100426000      0.010812   
2010-03-19  6.6855  6.6855  6.483  6.5175     6.5175  178100000     -0.018153   
2010-03-22  6.5100  6.5480  6.432  6.5235     6.5235  107668000      0.000921   

                MA20     MA50  Volatility  
Date                                       
2010-03-16  6.236600  6.21460    0.015716  
2010-03-17  6.274175  6.21204    0.015412  
2010-03-18  6.310875  6.21011    0.015307  
2010-03-19  6.342950  6.20821    0.016076  
2010-03-22  6.374100  6.20868    0.016105  


Enhanced data for TSLA:
                Open      High       Low     Close  Adj Close   Volume  \


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

Enhanced data for SPY:
                  Open        High         Low       Close  Adj Close  \
Date                                                                    
2010-03-16  115.809998  116.519997  115.489998  116.410004  88.933800   
2010-03-17  116.760002  117.480003  116.419998  117.099998  89.460876   
2010-03-18  117.110001  117.269997  116.570000  117.040001  89.415085   
2010-03-19  115.970001  117.290001  115.519997  115.970001  88.962479   
2010-03-22  115.309998  116.800003  115.239998  116.589996  89.438080   

               Volume  Daily Return       MA20       MA50  Volatility  
Date                                                                   
2010-03-16  168673000      0.007967  86.149104  85.250521    0.005395  
2010-03-17  177468100      0.005927  86.410379  85.308123    0.005422  
2010-03-18  196509100     -0.000512  86.644537  85.360226    0.005433  
2010-03-19  226641100     -0.005062  86.847279  85.402055    0.005704  
2010-03-22  184477800      0.0053




Enhanced data for GLD:
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2010-03-16  110.019997  110.570000  109.699997  110.400002  110.400002   
2010-03-17  110.180000  110.459999  109.550003  109.589996  109.589996   
2010-03-18  110.099998  110.610001  109.500000  110.339996  110.339996   
2010-03-19  108.279999  110.290001  107.849998  108.279999  108.279999   
2010-03-22  107.309998  108.000000  106.949997  107.750000  107.750000   

              Volume  Daily Return        MA20      MA50  Volatility  
Date                                                                  
2010-03-16  17749300      0.018826  109.393000  108.8286    0.008321  
2010-03-17  13784300     -0.007337  109.409999  108.8244    0.008461  
2010-03-18  13848400      0.006844  109.427999  108.8372    0.008468  
2010-03-19  24329100     -0.018670  109.368499  108.7726    0.009417  
2010-03-22  11579400     -0.0048

[*********************100%%**********************]  1 of 1 completed

Enhanced data for BTC-USD:
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2014-11-05  330.683014  343.368988  330.683014  339.485992  339.485992   
2014-11-06  339.458008  352.966003  338.424011  349.290009  349.290009   
2014-11-07  349.817993  352.731995  341.776001  342.415009  342.415009   
2014-11-08  342.153992  347.032013  342.153992  345.488007  345.488007   
2014-11-09  345.376007  363.626007  344.255005  363.264008  363.264008   

              Volume  Daily Return        MA20        MA50  Volatility  
Date                                                                    
2014-11-05  19817200      0.027214  355.747899  372.766840    0.026468  
2014-11-06  18797000      0.028879  354.024500  370.605960    0.027520  
2014-11-07  16834200     -0.019683  351.573151  368.965460    0.027100  
2014-11-08   8535470      0.008974  349.370251  367.979301    0.027315  
2014-11-09  2420


