In [1]:
import pandas as pd
import pandas_datareader as pdr  # You may need to install this library

import yfinance as yf
from statsmodels.regression.rolling import RollingOLS
import matplotlib.pyplot as plt
import pandas_datareader.data as web
import pandas as pd
import numpy as np
import datetime as dt
import warnings
import pandas_ta
import statsmodels.api as sm
warnings.filterwarnings('ignore')
pd.set_option("display.max_columns", 25)
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler

### Different Relevant time periods:
- SPIDERS -> '1998-12-22 -'2023-10-25
- Indeces -> '1992-12-22' - '2023-10-25'
- RSI/Quant -> '2015-11-30' - '2023-10-31'
- Overall Market Data -> 2013-10-28 - 2023-10-25

In [9]:
# Download historical GDP data from World Bank
gdp_data = pdr.get_data_fred("GDP", start="1992-01-01", end="2024-01-01")

# Download historical interest rates from the Federal Reserve
interest_rate_data = pdr.get_data_fred("DTB3", start="1992-01-02", end="2024-01-01")

print(gdp_data)
print(interest_rate_data)

                  GDP
DATE                 
1992-01-01   6363.102
1992-04-01   6470.763
1992-07-01   6566.641
1992-10-01   6680.803
1993-01-01   6729.459
...               ...
2022-07-01  25994.639
2022-10-01  26408.405
2023-01-01  26813.601
2023-04-01  27063.012
2023-07-01  27623.543

[127 rows x 1 columns]
            DTB3
DATE            
1992-01-02  3.86
1992-01-03  3.85
1992-01-06  3.81
1992-01-07  3.75
1992-01-08  3.72
...          ...
2023-10-24  5.31
2023-10-25  5.32
2023-10-26  5.32
2023-10-27  5.32
2023-10-30  5.33

[8303 rows x 1 columns]


### GDP Data

In [11]:
gdp_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 127 entries, 1992-01-01 to 2023-07-01
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   GDP     127 non-null    float64
dtypes: float64(1)
memory usage: 2.0 KB


### Units are in Billions of Dollars

In [58]:
gdp_data.head(2)

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
1992-01-01,6363.102
1992-04-01,6470.763


### INTEREST RATES

In [12]:
interest_rate_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8303 entries, 1992-01-02 to 2023-10-30
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   DTB3    7964 non-null   float64
dtypes: float64(1)
memory usage: 129.7 KB


In [13]:
interest_rate_data.head()

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
1992-01-02,3.86
1992-01-03,3.85
1992-01-06,3.81
1992-01-07,3.75
1992-01-08,3.72


In [28]:
total_stocks = pd.read_csv('/Users/samalainabayeva/Desktop/Capstone Project/S&P_data_w_technical_indicators.csv')
total_stocks.shape

(1234259, 18)

In [29]:
total_stocks.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
0,2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771
1,2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369


In [30]:
total_stocks["Date"].agg(["min", "max"])

min    2013-10-28
max    2023-10-25
Name: Date, dtype: object

In [31]:
total_stocks.set_index("Date", inplace=True)

In [32]:
interest_rate_data.reset_index(inplace=True)

In [33]:
interest_rate_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8303 entries, 0 to 8302
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   DATE    8303 non-null   datetime64[ns]
 1   DTB3    7964 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 129.9 KB


In [34]:
total_stocks.reset_index(inplace=True)

In [35]:
total_stocks["Date"] = pd.to_datetime(total_stocks["Date"])

In [36]:
total_stocks.set_index("Date", inplace=True)

In [37]:
interest_rate_data.set_index("DATE", inplace=True)

In [38]:
interest_rate_data.head(2)

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
1992-01-02,3.86
1992-01-03,3.85


# Joining Interest Rate Data to the Stock Data
Interest rate data was much easier to join because of its continuously updated nature, whereas GDP is only updated quarterly.
- Still need to divide by 100

In [41]:
total_stocks = total_stocks.join(interest_rate_data)

In [42]:
total_stocks.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,0.04
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,0.04


In [49]:
total_stocks.loc[total_stocks["Ticker"] == "AAPL"].loc[["2013-11-01","2013-12-20", "2014-01-02"], :]

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3
2013-11-01,AAPL,16.209167,18.5725,18.742857,18.422857,18.715,274890000.0,-0.007834,,,,,,,,,4455.738,0.04
2013-12-20,AAPL,17.212688,19.607857,19.700357,19.457857,19.479643,436413600.0,-0.005836,53.901466,16.489863,16.909347,17.328832,17.748316,18.1678,0.320701,0.229336,7511.8513,0.07
2014-01-02,AAPL,17.341543,19.754642,19.893929,19.715,19.845715,234684800.0,-0.006987,52.526818,17.113757,17.326244,17.538731,17.751218,17.963705,0.315433,0.183371,4069.7966,0.07


In [51]:
interest_rate_data.loc["2013-11-01":]

Unnamed: 0_level_0,DTB3
DATE,Unnamed: 1_level_1
2013-11-01,0.04
2013-11-04,0.05
2013-11-05,0.05
2013-11-06,0.05
2013-11-07,0.05
...,...
2023-10-24,5.31
2023-10-25,5.32
2023-10-26,5.32
2023-10-27,5.32


In [55]:
trial = total_stocks.join(gdp_data)

In [56]:
trial.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3,GDP
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,0.04,
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,0.04,


In [61]:
# GDP values are only issued quarterly

trial[trial["GDP"].notnull()].index.get_level_values(0).unique()

DatetimeIndex(['2014-04-01', '2014-07-01', '2014-10-01', '2015-04-01',
               '2015-07-01', '2015-10-01', '2016-04-01', '2016-07-01',
               '2018-10-01', '2019-04-01', '2019-07-01', '2019-10-01',
               '2020-04-01', '2020-07-01', '2020-10-01', '2021-04-01',
               '2021-07-01', '2021-10-01', '2022-04-01', '2022-07-01'],
              dtype='datetime64[ns]', freq=None)

In [63]:
# Still the same size, so this looks like it should work perfectly

trial.shape

(1234259, 19)

# TO-DOs
I think that the prioritized list of to-dos is:
- Join Interest Rates and GDP Figures to all of my data frame objects and re-export them
- Figure out how to fill in the null values correctly, primarily in the case of GDP, as interest rates seem complete
- Add Fama-French Indicators?
- Add Binary Columns 

In [65]:
pdr.DataReader("F-F_Research_Data_5_Factors_2x3", 'famafrench', start='1992')[0]

  pdr.DataReader("F-F_Research_Data_5_Factors_2x3", 'famafrench', start='1992')[0]
  pdr.DataReader("F-F_Research_Data_5_Factors_2x3", 'famafrench', start='1992')[0]


Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1992-01,-0.59,9.18,4.71,-1.32,3.20,0.34
1992-02,1.09,1.33,6.47,0.08,2.19,0.28
1992-03,-2.66,-0.93,3.56,-0.01,1.95,0.34
1992-04,1.07,-5.70,4.34,1.69,2.23,0.32
1992-05,0.30,0.21,1.19,-0.95,0.48,0.28
...,...,...,...,...,...,...
2023-03,2.51,-6.94,-8.85,2.24,-2.37,0.36
2023-04,0.61,-2.56,-0.04,2.42,2.86,0.35
2023-05,0.35,-0.38,-7.72,-1.81,-7.22,0.36
2023-06,6.46,1.34,-0.26,2.18,-1.62,0.40


# Daily Fama-French Factors
- Market Risk (RMRF): The excess return of the market over the risk-free rate, often represented by the return of a broad market index like the S&P 500.
- SMB (Small Minus Big): The return spread between small-cap stocks and large-cap stocks. It represents the historical outperformance of small-cap stocks over large-cap stocks.
- HML (High Minus Low): The return spread between high book-to-market (value) stocks and low book-to-market (growth) stocks. It captures the historical outperformance of value stocks over growth stocks.
- RMW (Robust Minus Weak): The return spread between stocks of companies with high profitability and stocks of companies with low profitability. It reflects the historical tendency of high-profitability stocks to outperform low-profitability stocks.
- CMA (Conservative Minus Aggressive): The return spread between stocks of companies that are conservative in their investments and financing and stocks of companies that are aggressive in their investments and financing. It captures the historical outperformance of conservative companies.

In [69]:
ff = pd.read_csv('/Users/samalainabayeva/Desktop/Capstone Project/F-F_Research_Data_5_Factors_2x3_daily.CSV', skiprows=2)

In [70]:
ff.head(2)

Unnamed: 0.1,Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,RF
0,19630701,-0.67,0.02,-0.35,0.03,0.13,0.012
1,19630702,0.79,-0.28,0.28,-0.08,-0.21,0.012


In [72]:
ff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15123 entries, 0 to 15122
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  15123 non-null  int64  
 1   Mkt-RF      15123 non-null  float64
 2   SMB         15123 non-null  float64
 3   HML         15123 non-null  float64
 4   RMW         15123 non-null  float64
 5   CMA         15123 non-null  float64
 6   RF          15123 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 827.2 KB


In [75]:
ff["Unnamed: 0"] = ff["Unnamed: 0"].astype(str)

In [79]:
ff["Unnamed: 0"] = ff["Unnamed: 0"].apply(lambda x: f"{x[:4]}-{x[4:6]}-{x[6:]}")

In [80]:
ff["Date"] = pd.to_datetime(ff["Unnamed: 0"])

In [81]:
ff.head()

Unnamed: 0.1,Unnamed: 0,Mkt-RF,SMB,HML,RMW,CMA,RF,Date
0,1963-07-01,-0.67,0.02,-0.35,0.03,0.13,0.012,1963-07-01
1,1963-07-02,0.79,-0.28,0.28,-0.08,-0.21,0.012,1963-07-02
2,1963-07-03,0.63,-0.18,-0.1,0.13,-0.25,0.012,1963-07-03
3,1963-07-05,0.4,0.09,-0.28,0.07,-0.3,0.012,1963-07-05
4,1963-07-08,-0.63,0.07,-0.2,-0.27,0.06,0.012,1963-07-08


In [82]:
ff.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15123 entries, 0 to 15122
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Unnamed: 0  15123 non-null  object        
 1   Mkt-RF      15123 non-null  float64       
 2   SMB         15123 non-null  float64       
 3   HML         15123 non-null  float64       
 4   RMW         15123 non-null  float64       
 5   CMA         15123 non-null  float64       
 6   RF          15123 non-null  float64       
 7   Date        15123 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(6), object(1)
memory usage: 945.3+ KB


In [83]:
# dropping unnamed: 0 and RF

ff.drop(columns=["Unnamed: 0", "RF"], inplace=True)

In [88]:
# Set Date as index

ff.set_index("Date", inplace=True)

In [92]:
# Convert integer %'s into decimals

ff = ff.div(100)

In [93]:
ff.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1963-07-01,-0.0067,0.0002,-0.0035,0.0003,0.0013
1963-07-02,0.0079,-0.0028,0.0028,-0.0008,-0.0021
1963-07-03,0.0063,-0.0018,-0.001,0.0013,-0.0025
1963-07-05,0.004,0.0009,-0.0028,0.0007,-0.003
1963-07-08,-0.0063,0.0007,-0.002,-0.0027,0.0006


In [99]:
# Final day on record is July 31st of this year

ff.index[-1]

Timestamp('2023-07-31 00:00:00')

### Including the FF with my other data

In [96]:
# Joining trial DF with all stock market stocks to the FF factors

trial = trial.join(ff)

### Including the GDP data

In [135]:
gdp_data

Unnamed: 0_level_0,GDP
DATE,Unnamed: 1_level_1
1992-01-01,6363.102
1992-04-01,6470.763
1992-07-01,6566.641
1992-10-01,6680.803
1993-01-01,6729.459
...,...
2022-07-01,25994.639
2022-10-01,26408.405
2023-01-01,26813.601
2023-04-01,27063.012


# Solution to Forward Filling the data
### Had to create a date range that corresponded to the GDP dates, 
### FILL THOSE VALUES, and MERGE THAT DATE-RANGE FORWARD FILLED DATA!!!!

In [136]:

# Define the specific month (e.g., October)
your_month_here = 10  # October in this example

# Filter the GDP data for the specific month
gdp_month_data = gdp_data[gdp_data.index.month == your_month_here]

# Create a date range spanning multiple years for October 1 to March 31
start_date = pd.Timestamp(year=gdp_data.index.min().year, month=your_month_here, day=1)
end_date = pd.Timestamp(year=2023, month=7, day=1)
date_range = pd.date_range(start=start_date, end=end_date, freq='D')

# Merge the date range with the filtered GDP data, filling in missing dates
merged_data = date_range.to_frame().merge(gdp_month_data, how='left', left_on=date_range, right_index=True)


In [138]:
end_date

Timestamp('2023-07-01 00:00:00')

In [167]:
merged_data[merged_data["GDP"].notnull()][:5]

Unnamed: 0,0,GDP
1992-10-01,1992-10-01,6680.803
1993-10-01,1993-10-01,7013.738
1994-10-01,1994-10-01,7455.288
1995-10-01,1995-10-01,7772.586
1996-10-01,1996-10-01,8259.771


### Creating new df's with all dates filled in, interesting process

In [142]:
# Only The GDP data

GDP_fill = merged_data['GDP'].ffill()

In [147]:
# Converting to data frame for join purposes

GDP_fill = GDP_fill.to_frame()

In [150]:
# Inspection of the Data

GDP_fill.head()

Unnamed: 0,GDP
1992-10-01,6680.803
1992-10-02,6680.803
1992-10-03,6680.803
1992-10-04,6680.803
1992-10-05,6680.803


### Sanity Checking along the way to make certain there were no mishaps

In [151]:
trial.shape

(1234259, 24)

In [154]:
# Obligatory Drop because I had added it earlier in the notebook

# trial.drop(columns="GDP", inplace=True)

In [155]:
# 23 Columns prior to the next join

trial.head(1)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,...,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3,Mkt-RF,SMB,HML,RMW,CMA
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,...,,,,84.0771,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015


# Adding GDP Data in Billions

In [161]:
trial = trial.join(GDP_fill)

In [162]:
GDP_fill.loc['2014-1-28']

GDP    17192.019
Name: 2014-01-28 00:00:00, dtype: float64

In [164]:
trial.loc['2014-1-28', "GDP"].value_counts()

GDP
17192.019    470
Name: count, dtype: int64

In [169]:
trial.shape

(1234259, 24)

# Shape Maintained, all is good

In [166]:
trial.head()

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,...,ATR,MACD,dollar_volume(M),DTB3,Mkt-RF,SMB,HML,RMW,CMA,GDP
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,...,,,84.0771,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015,17192.019
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,...,,,96.3369,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015,17192.019
2013-10-28,AAPL,16.51619,18.924286,18.964287,18.686071,18.894285,550440800.0,-0.006881,,,...,,,9091.1846,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015,17192.019
2013-10-28,ABBV,32.80875,49.419998,49.970001,48.970001,49.130001,5555000.0,-0.062775,,,...,,,182.2526,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015,17192.019
2013-10-28,ABT,30.737597,37.279999,37.459999,37.169998,37.330002,4799700.0,-0.014555,,,...,,,147.5312,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015,17192.019


# TO-DOs
I think that the prioritized list of to-dos is:
- Join Interest Rates and GDP Figures to all of my data frame objects and re-export them
- Figure out how to fill in the null values correctly, primarily in the case of GDP, as interest rates seem complete
- Add Fama-French Indicators?
- Add Binary Columns 

# Some strange dates that are omitted

In [174]:
trial[trial["DTB3"].isna()].index.get_level_values(0).unique()

DatetimeIndex(['2013-11-11', '2014-10-13', '2014-11-11', '2015-10-12',
               '2015-11-11', '2016-10-10', '2016-11-11', '2017-10-09',
               '2018-10-08', '2018-11-12', '2019-10-14', '2019-11-11',
               '2020-10-12', '2020-11-11', '2021-10-11', '2021-11-11',
               '2022-10-10', '2022-11-11', '2023-10-09'],
              dtype='datetime64[ns]', freq=None)

In [171]:
trial.isna().sum()

Ticker                       0
Adj Close                    0
Close                        0
High                         0
Low                          0
Open                         0
Volume                       0
garman_klass_vol             0
RSI                      10056
lowest_bollinger_band     9554
lower_bollinger_band      9554
20_day_SMA                9554
one_up_bollinger_band     9554
upper_bollinger_band      9554
ATR                       7042
MACD                     12566
dollar_volume(M)             0
DTB3                      9320
Mkt-RF                   30638
SMB                      30638
HML                      30638
RMW                      30638
CMA                      30638
GDP                      40678
dtype: int64

# Variable Collections

### interest_rate_data
### GDP_fill
### ff

In [196]:
# Interest Rate Data -> dividing by 100 to make them percentages

# interest_rate_data = interest_rate_data.div(100)  # commented out to avoid double running
# interest_rate_data.rename(columns={"DTB3":"Interest Rates"}, inplace=True)


interest_rate_data.head()

Unnamed: 0_level_0,Interest Rates
DATE,Unnamed: 1_level_1
1992-01-02,0.0386
1992-01-03,0.0385
1992-01-06,0.0381
1992-01-07,0.0375
1992-01-08,0.0372


In [175]:
# GDP Data

GDP_fill.head()

Unnamed: 0,GDP
1992-10-01,6680.803
1992-10-02,6680.803
1992-10-03,6680.803
1992-10-04,6680.803
1992-10-05,6680.803


In [176]:
# French Fama Factors

ff.head()

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1963-07-01,-0.0067,0.0002,-0.0035,0.0003,0.0013
1963-07-02,0.0079,-0.0028,0.0028,-0.0008,-0.0021
1963-07-03,0.0063,-0.0018,-0.001,0.0013,-0.0025
1963-07-05,0.004,0.0009,-0.0028,0.0007,-0.003
1963-07-08,-0.0063,0.0007,-0.002,-0.0027,0.0006


# CONFIRMATION OF VALIDITY

In [180]:
again = total_stocks.join([GDP_fill, interest_rate_data, ff])

In [183]:
pd.set_option("display.max_columns", 25)
again.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3_x,GDP,DTB3_y,Mkt-RF,SMB,HML,RMW,CMA
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015


In [186]:
(again["DTB3_x"] == again["DTB3_y"]).sum()

1224939

In [187]:
again.shape

(1234259, 25)

In [188]:
again.shape[0] - again["DTB3_y"].isna().sum()

1224939

In [189]:
total_stocks.head()

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,0.04
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,0.04
2013-10-28,AAPL,16.51619,18.924286,18.964287,18.686071,18.894285,550440800.0,-0.006881,,,,,,,,,9091.1846,0.04
2013-10-28,ABBV,32.80875,49.419998,49.970001,48.970001,49.130001,5555000.0,-0.062775,,,,,,,,,182.2526,0.04
2013-10-28,ABT,30.737597,37.279999,37.459999,37.169998,37.330002,4799700.0,-0.014555,,,,,,,,,147.5312,0.04


In [192]:
again.head()

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),DTB3_x,GDP,DTB3_y,Mkt-RF,SMB,HML,RMW,CMA
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAPL,16.51619,18.924286,18.964287,18.686071,18.894285,550440800.0,-0.006881,,,,,,,,,9091.1846,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,ABBV,32.80875,49.419998,49.970001,48.970001,49.130001,5555000.0,-0.062775,,,,,,,,,182.2526,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,ABT,30.737597,37.279999,37.459999,37.169998,37.330002,4799700.0,-0.014555,,,,,,,,,147.5312,0.04,17192.019,0.04,0.0009,-0.0004,0.0006,0.0035,0.0015


# ADDING FACTORS TO THESE FINAL DATAFRAME OBJECTS
### again = total_stocks.join([GDP_fill, interest_rate_data, ff])

In [193]:
# total_stocks.drop(columns="DTB3", inplace=True)

In [197]:
total_stocks.head(1)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771


In [206]:
again = total_stocks.join([GDP_fill, interest_rate_data, ff])

In [207]:
again[["GDP", "Interest Rates"]].isna().sum()

GDP               40678
Interest Rates     9320
dtype: int64

In [208]:
again.shape

(1234259, 24)

In [209]:
again.head()

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAPL,16.51619,18.924286,18.964287,18.686071,18.894285,550440800.0,-0.006881,,,,,,,,,9091.1846,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,ABBV,32.80875,49.419998,49.970001,48.970001,49.130001,5555000.0,-0.062775,,,,,,,,,182.2526,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,ABT,30.737597,37.279999,37.459999,37.169998,37.330002,4799700.0,-0.014555,,,,,,,,,147.5312,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015


In [211]:
# Filling null values

again[["GDP", "Interest Rates"]] = again[["GDP", "Interest Rates"]].ffill()

In [212]:
again[["GDP", "Interest Rates"]].isna().sum()

GDP               0
Interest Rates    0
dtype: int64

In [213]:
ff.columns

Index(['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'], dtype='object')

In [214]:
again[['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']].isna().sum()

Mkt-RF    30638
SMB       30638
HML       30638
RMW       30638
CMA       30638
dtype: int64

In [220]:
again[again["Mkt-RF"].isna()].index[0], again[again["Mkt-RF"].isna()].index[-1]

(Timestamp('2023-08-01 00:00:00'), Timestamp('2023-10-25 00:00:00'))

In [219]:
again[again["Mkt-RF"].isna()].index.min(), again[again["Mkt-RF"].isna()].index.max()

(Timestamp('2023-08-01 00:00:00'), Timestamp('2023-10-25 00:00:00'))

### I am inclined to leave the ff data untouched as it is with null values, and perhaps use a KNN imputer later on

In [230]:
spiders = pd.read_csv("/Users/samalainabayeva/Desktop/Capstone Project/SPDRs_w_technical_indicators.csv")
indeces = pd.read_csv("/Users/samalainabayeva/Desktop/Capstone Project/INDECES_w_technical_indicators.csv")

In [231]:
# Initial Data Compare

total_stocks.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369


In [232]:
# Spiders Compare

spiders.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
0,1998-12-22,XLB,12.011767,20.828125,20.828125,20.75,20.78125,1900.0,-0.116068,,,,,,,,,0.0228
1,1998-12-22,XLE,12.448157,23.265625,23.390625,23.1875,23.3125,15200.0,-0.152028,,,,,,,,,0.1892


In [233]:
# Indices Compare

indeces.head(2)

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
0,1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,,,,,86248.9695
1,1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,,,,,110266.8327


In [237]:
# Both Dates are "Objects" as opposed to Date Time objects

spiders.dtypes[:2], indeces.dtypes[:2]

(Date      object
 Ticker    object
 dtype: object,
 Date     object
 Index    object
 dtype: object)

In [239]:
# Conversion
spiders["Date"] = pd.to_datetime(spiders["Date"])

In [240]:
# Conversion
indeces["Date"] = pd.to_datetime(indeces["Date"])

In [241]:
# Setting index

spiders.set_index("Date", inplace=True)
indeces.set_index("Date", inplace=True)

In [243]:
indeces.head(2)

Unnamed: 0_level_0,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,,,,,86248.9695
1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,,,,,110266.8327


### Looks good

In [244]:
spiders.shape, indeces.shape

((59633, 17), (31068, 17))

In [245]:
# Spider Joins

spiders = spiders.join([GDP_fill, interest_rate_data, ff])
spiders[["GDP", "Interest Rates"]] = spiders[["GDP", "Interest Rates"]].ffill()
spiders.shape

(59633, 24)

In [246]:
# Index Joins

indeces = indeces.join([GDP_fill, interest_rate_data, ff])
indeces[["GDP", "Interest Rates"]] = indeces[["GDP", "Interest Rates"]].ffill()
indeces.shape

(31068, 24)

In [247]:
spiders.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
1998-12-22,XLB,12.011767,20.828125,20.828125,20.75,20.78125,1900.0,-0.116068,,,,,,,,,0.0228,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078
1998-12-22,XLE,12.448157,23.265625,23.390625,23.1875,23.3125,15200.0,-0.152028,,,,,,,,,0.1892,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078


In [248]:
indeces.head(2)

Unnamed: 0,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,,,,,86248.9695,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015
1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,,,,,110266.8327,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015


# Adding Binary Columns For Future Targets

In [11]:
# This group of functions works -> Just replace the DF in the initial dates=<DF>["Date"]

def one_month_later(date, months=1):
    dates = again["Date"].unique()
    # Start with a specific date
    start_date = date 
    
    # Calculate the date one month later, adjusting for weekends
    one_month_later = start_date + pd.DateOffset(months=months)

    while one_month_later not in dates:  
        if one_month_later > dates[-1]:
            return np.nan
            
        else:
            one_month_later += pd.DateOffset(days=1)
    
    return one_month_later


def three_months_later(date, months=3):
    return one_month_later(date, months=months)


def six_months_later(date, months=6):
    return one_month_later(date, months=months)


def twelve_months_later(date, months=12):
    return one_month_later(date, months=months)



In [254]:
again.head(2)

Unnamed: 0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015


In [255]:
# Gaining access to Date-Times

spiders.reset_index(inplace=True)
indeces.reset_index(inplace=True)
again.reset_index(inplace=True)

In [259]:
spiders.rename(columns={"index":"Date"}, inplace=True)
indeces.rename(columns={"index":"Date"}, inplace=True)
again.rename(columns={"index":"Date"}, inplace=True)

In [262]:
spiders.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
0,1998-12-22,XLB,12.011767,20.828125,20.828125,20.75,20.78125,1900.0,-0.116068,,,,,,,,,0.0228,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078
1,1998-12-22,XLE,12.448157,23.265625,23.390625,23.1875,23.3125,15200.0,-0.152028,,,,,,,,,0.1892,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078


### Proof of Concept

In [266]:
spiders["Date"].apply(one_month_later)

0       1999-01-22
1       1999-01-22
2       1999-01-22
3       1999-01-22
4       1999-01-22
           ...    
59628          NaT
59629          NaT
59630          NaT
59631          NaT
59632          NaT
Name: Date, Length: 59633, dtype: datetime64[ns]

In [269]:
# Cell takes about 3 minutes to run

spiders['one_months_later'] = spiders["Date"].apply(one_month_later)
spiders["three_months_later"] = spiders["Date"].apply(three_months_later)
spiders["six_months_later"] = spiders["Date"].apply(six_months_later)
spiders["twelve_months_later"] = spiders["Date"].apply(twelve_months_later)

In [270]:
# Proof of Concept

spiders.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,...,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later
0,1998-12-22,XLB,12.011767,20.828125,20.828125,20.75,20.78125,1900.0,-0.116068,,,,...,0.0228,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22
1,1998-12-22,XLE,12.448157,23.265625,23.390625,23.1875,23.3125,15200.0,-0.152028,,,,...,0.1892,9293.991,0.0439,0.0,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22


In [272]:
# Dtypes are also datetimes as well! 
spiders.dtypes[-5:]

CMA                           float64
one_months_later       datetime64[ns]
three_months_later     datetime64[ns]
six_months_later       datetime64[ns]
twelve_months_later    datetime64[ns]
dtype: object

# Perform the same on the other two groups of data

### Indeces

In [274]:
indeces.head(2)

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
0,1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,,,,,86248.9695,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015
1,1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,,,,,110266.8327,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015


In [275]:
# Indeces, takes several minutes to run, be careful

indeces['one_months_later'] = indeces["Date"].apply(one_month_later)
indeces["three_months_later"] = indeces["Date"].apply(three_months_later)
indeces["six_months_later"] = indeces["Date"].apply(six_months_later)
indeces["twelve_months_later"] = indeces["Date"].apply(twelve_months_later)

In [276]:
indeces.head(2)

Unnamed: 0,Date,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,...,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later
0,1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,...,86248.9695,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22
1,1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,...,110266.8327,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22


### Again df

In [281]:
again.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
0,2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
1,2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015


In [285]:
# Cell takes >30 minutes to run

# again['one_months_later'] = again["Date"].apply(one_month_later)
# again["three_months_later"] = again["Date"].apply(three_months_later)
# again["six_months_later"] = again["Date"].apply(six_months_later)
# again["twelve_months_later"] = again["Date"].apply(twelve_months_later)

In [284]:
again.head(2)

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA
0,2013-10-28,A,33.811764,36.831188,37.181686,36.566525,37.04578,2486623.0,-0.003084,,,,,,,,,84.0771,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015
1,2013-10-28,AAL,20.749743,22.01,22.629999,21.879999,22.24,4642800.0,-0.00129,,,,,,,,,96.3369,17192.019,0.0004,0.0009,-0.0004,0.0006,0.0035,0.0015


In [286]:
again.to_csv("/Users/samalainabayeva/Desktop/Capstone Project/All_stocks_pre_binary_pre_future_dates.csv")

In [287]:
indeces.to_csv("/Users/samalainabayeva/Desktop/Capstone Project/Major_Indeces_pre_binary.csv")

In [288]:
spiders.to_csv('/Users/samalainabayeva/Desktop/Capstone Project/Spiders_pre_binary.csv')

In [4]:
again = pd.read_csv("/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/All_stocks_pre_binary_pre_future_dates.csv")

In [6]:
again.drop(columns="Unnamed: 0", inplace=True)
again["Date"] = pd.to_datetime(again["Date"])

In [7]:
again = pd.read_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/stocks_for_modeling_clusters.csv')

In [9]:
again["Date"] = pd.to_datetime(again["Date"])

In [12]:
again['one_months_later'] = again["Date"].apply(one_month_later)
again["three_months_later"] = again["Date"].apply(three_months_later)
again["six_months_later"] = again["Date"].apply(six_months_later)
again["twelve_months_later"] = again["Date"].apply(twelve_months_later)

In [13]:
again

Unnamed: 0,Date,Ticker,Adj Close,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,...,return_6m,return_9m,return_12m,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later
0,2015-11-30,AAL,39.429935,-0.000966,40.880488,38.219863,39.883038,41.546214,43.209389,44.872565,1.118944,-0.656804,...,-0.003670,-0.015729,-0.012729,1.322942,0.910139,0.804086,1.222983,-0.061192,2015-12-31,2016-02-29,2016-05-31,2016-11-30
1,2015-11-30,AAPL,26.960346,-0.003027,53.592898,25.708802,26.310024,26.911246,27.512468,28.113690,0.565622,0.161838,...,-0.014506,-0.007686,0.000966,1.084475,-0.136031,-0.397620,0.718503,-0.795369,2015-12-31,2016-02-29,2016-05-31,2016-11-30
2,2015-11-30,ABBV,41.160294,-0.053947,46.995675,41.269060,42.342785,43.416509,44.490233,45.563957,1.753494,0.411671,...,-0.019629,-0.001641,-0.011735,0.851106,0.279184,0.021886,-0.269020,-0.043745,2015-12-31,2016-02-29,2016-05-31,2016-11-30
3,2015-11-30,ABT,38.669395,-0.009962,52.539149,38.075918,38.510178,38.944438,39.378697,39.812957,0.739885,0.405522,...,-0.011291,-0.004146,0.002517,1.056769,-0.140064,-0.638172,0.113359,0.560507,2015-12-31,2016-02-29,2016-05-31,2016-11-30
4,2015-11-30,ACN,94.345917,-0.006636,57.567387,91.367418,92.512621,93.657824,94.803026,95.948229,1.288679,0.812790,...,0.020338,0.022024,0.020034,1.143698,-0.203828,-0.181753,0.085237,-0.074808,2015-12-31,2016-02-29,2016-05-31,2016-11-30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13791,2023-10-31,VRTX,363.040009,0.000200,53.833235,344.018442,352.603220,361.187999,369.772778,378.357556,6.590981,4.149759,...,0.010627,0.013034,0.012706,1.142086,-0.222391,-1.246586,-0.743384,0.918423,NaT,NaT,NaT,NaT
13792,2023-10-31,VZ,33.689999,0.000221,58.946533,29.828625,30.701467,31.574308,32.447150,33.319992,0.808540,0.107031,...,-0.017041,-0.017064,-0.002729,0.669774,-0.389182,-0.140889,0.456590,1.195363,NaT,NaT,NaT,NaT
13793,2023-10-31,WFC,39.029999,0.000131,39.236461,38.205341,39.160670,40.116000,41.071329,42.026659,0.928127,-0.411437,...,-0.000449,-0.017741,-0.011247,1.180862,-0.162028,1.280360,-0.628291,-0.298237,NaT,NaT,NaT,NaT
13794,2023-10-31,WMT,162.759995,0.000062,55.397237,156.054002,158.010000,159.965999,161.921997,163.877996,2.215188,0.245920,...,0.013851,0.015090,0.012521,0.634972,-0.425805,-0.220643,0.608535,0.533930,NaT,NaT,NaT,NaT


In [14]:
# This function works!!!!

def one_month_gainer(df, num="One"):
    for ticker in list(df["Ticker"].unique()):
        ticker_df = df[df["Ticker"] == ticker]

        for index, row in ticker_df.iterrows():
            try:
                one_month_later_value = row[f"{num.lower()}_months_later"]
                if not pd.isna(one_month_later_value):
                    ticker_df.at[index, f"{num}_Month_Positive"] = (row["Adj Close"] < ticker_df.loc[\
                                                                    one_month_later_value, "Adj Close"]).astype(int)
                else:
                    # Handle the case where one_month_later is NaN (NaT)
                    ticker_df.at[index, f"{num}_Month_Positive"] = np.nan
            except KeyError:
                # Handle the KeyError exception here if necessary
                ticker_df.at[index, f"{num}_Month_Positive"] = np.nan
           
        df.loc[df["Ticker"] == ticker, f"{num}_Month_Positive"] = ticker_df[f"{num}_Month_Positive"]
    
    return df


In [15]:
def three_month_gainer(df, num="Three"):
    return one_month_gainer(df, num)


def six_month_gainer(df, num="Six"):
    return one_month_gainer(df, num)


def twelve_month_gainer(df, num="Twelve"):
    return one_month_gainer(df, num)

In [18]:
again.set_index("Date", inplace=True)

In [20]:
one_month_gainer(again)
three_month_gainer(again)
six_month_gainer(again)
twelve_month_gainer(again)

Unnamed: 0_level_0,Ticker,Adj Close,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,return_1m,...,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
2015-11-30,AAL,39.429935,-0.000966,40.880488,38.219863,39.883038,41.546214,43.209389,44.872565,1.118944,-0.656804,-0.105388,...,0.910139,0.804086,1.222983,-0.061192,2015-12-31,2016-02-29,2016-05-31,2016-11-30,1.0,0.0,0.0,1.0
2015-11-30,AAPL,26.960346,-0.003027,53.592898,25.708802,26.310024,26.911246,27.512468,28.113690,0.565622,0.161838,-0.005804,...,-0.136031,-0.397620,0.718503,-0.795369,2015-12-31,2016-02-29,2016-05-31,2016-11-30,0.0,0.0,0.0,0.0
2015-11-30,ABBV,41.160294,-0.053947,46.995675,41.269060,42.342785,43.416509,44.490233,45.563957,1.753494,0.411671,-0.023510,...,0.279184,0.021886,-0.269020,-0.043745,2015-12-31,2016-02-29,2016-05-31,2016-11-30,1.0,0.0,1.0,1.0
2015-11-30,ABT,38.669395,-0.009962,52.539149,38.075918,38.510178,38.944438,39.378697,39.812957,0.739885,0.405522,0.002679,...,-0.140064,-0.638172,0.113359,0.560507,2015-12-31,2016-02-29,2016-05-31,2016-11-30,0.0,0.0,0.0,0.0
2015-11-30,ACN,94.345917,-0.006636,57.567387,91.367418,92.512621,93.657824,94.803026,95.948229,1.288679,0.812790,0.000186,...,-0.203828,-0.181753,0.085237,-0.074808,2015-12-31,2016-02-29,2016-05-31,2016-11-30,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-31,VRTX,363.040009,0.000200,53.833235,344.018442,352.603220,361.187999,369.772778,378.357556,6.590981,4.149759,0.043998,...,-0.222391,-1.246586,-0.743384,0.918423,NaT,NaT,NaT,NaT,,,,
2023-10-31,VZ,33.689999,0.000221,58.946533,29.828625,30.701467,31.574308,32.447150,33.319992,0.808540,0.107031,0.061682,...,-0.389182,-0.140889,0.456590,1.195363,NaT,NaT,NaT,NaT,,,,
2023-10-31,WFC,39.029999,0.000131,39.236461,38.205341,39.160670,40.116000,41.071329,42.026659,0.928127,-0.411437,-0.044787,...,-0.162028,1.280360,-0.628291,-0.298237,NaT,NaT,NaT,NaT,,,,
2023-10-31,WMT,162.759995,0.000062,55.397237,156.054002,158.010000,159.965999,161.921997,163.877996,2.215188,0.245920,0.017695,...,-0.425805,-0.220643,0.608535,0.533930,NaT,NaT,NaT,NaT,,,,


In [21]:
# Define a function to calculate quartiles

def calculate_quartiles(group):
    quartiles = group['RSI'].quantile([0.25, 0.5, 0.75])
    
    # pd.cut() is used to segment data, bins = -inf to .25, .25 to .5, .5 to .75, .75 to inf,
    # label "4" is the highest value group
    group['RSI_Quartile'] = pd.cut(group['RSI'], bins=[-float("inf")] + quartiles.to_list() + [float("inf")],\
                                   labels=[1, 2, 3, 4])
    return group


In [22]:
# Group by date and apply the quartile calculation function
quartile_df = again.groupby(level='Date', group_keys=False).apply(calculate_quartiles)

In [44]:
pd.set_option("display.max_columns", 35)
quartile_df.head(2)

Unnamed: 0_level_0,Ticker,Adj Close,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive,RSI_Quartile
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
2015-11-30,AAL,39.429935,-0.000966,40.880488,38.219863,39.883038,41.546214,43.209389,44.872565,1.118944,-0.656804,-0.105388,0.031926,0.019861,-0.00367,-0.015729,-0.012729,1.322942,0.910139,0.804086,1.222983,-0.061192,2015-12-31,2016-02-29,2016-05-31,2016-11-30,1.0,0.0,0.0,1.0,1
2015-11-30,AAPL,26.960346,-0.003027,53.592898,25.708802,26.310024,26.911246,27.512468,28.11369,0.565622,0.161838,-0.005804,0.037844,0.017564,-0.014506,-0.007686,0.000966,1.084475,-0.136031,-0.39762,0.718503,-0.795369,2015-12-31,2016-02-29,2016-05-31,2016-11-30,0.0,0.0,0.0,0.0,3


In [63]:
macro_data = spiders[['GDP', 'Interest Rates']]
macro_data.resample("M").last()

Unnamed: 0_level_0,GDP,Interest Rates
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
1998-12-31,9293.991,0.0437
1999-01-31,9293.991,0.0437
1999-02-28,9293.991,0.0455
1999-03-31,9293.991,0.0437
1999-04-30,9293.991,0.0443
...,...,...
2023-06-30,26408.405,0.0517
2023-07-31,26408.405,0.0528
2023-08-31,26408.405,0.0532
2023-09-30,26408.405,0.0532


In [67]:
quartile_df.shape

(13796, 31)

In [68]:
quartile_df = quartile_df.join(macro_data.resample("M").last())

In [69]:
quartile_df.head(2)

Unnamed: 0_level_0,Ticker,Adj Close,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,one_up_bollinger_band,upper_bollinger_band,ATR,MACD,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive,RSI_Quartile,GDP,Interest Rates
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
2015-11-30,AAL,39.429935,-0.000966,40.880488,38.219863,39.883038,41.546214,43.209389,44.872565,1.118944,-0.656804,-0.105388,0.031926,0.019861,-0.00367,-0.015729,-0.012729,1.322942,0.910139,0.804086,1.222983,-0.061192,2015-12-31,2016-02-29,2016-05-31,2016-11-30,1.0,0.0,0.0,1.0,1,18435.137,0.0022
2015-11-30,AAPL,26.960346,-0.003027,53.592898,25.708802,26.310024,26.911246,27.512468,28.11369,0.565622,0.161838,-0.005804,0.037844,0.017564,-0.014506,-0.007686,0.000966,1.084475,-0.136031,-0.39762,0.718503,-0.795369,2015-12-31,2016-02-29,2016-05-31,2016-11-30,0.0,0.0,0.0,0.0,3,18435.137,0.0022


In [70]:
# Exporting Completed File

quartile_df.to_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/Final_Monthly_Stocks.csv')

In [27]:
spiders = pd.read_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/Spiders_pre_binary.csv', index_col=0)
indeces = pd.read_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/Major_Indeces_pre_binary.csv', index_col=0)

In [29]:
spiders["Date"] = pd.to_datetime(spiders["Date"])
indeces["Date"] = pd.to_datetime(indeces["Date"])

spiders.set_index("Date", inplace=True)
indeces.set_index("Date", inplace=True)

In [32]:
indeces.rename(columns={"Index": "Ticker"}, inplace=True)

In [33]:
indeces.head(2)

Unnamed: 0_level_0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,...,dollar_volume(M),GDP,Interest Rates,Mkt-RF,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,...,86248.9695,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22
1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,...,110266.8327,6680.803,0.032,-0.0006,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22


### Indeces Binary Columns

In [34]:
one_month_gainer(indeces)
three_month_gainer(indeces)
six_month_gainer(indeces)
twelve_month_gainer(indeces)

Unnamed: 0_level_0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,...,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.500000,25970000,0.000031,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,0.0,1.0,1.0,1.0
1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.250000,440.700012,250430000,0.000029,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,0.0,1.0,1.0,1.0
1992-12-22,^IXIC,660.840027,660.840027,664.130005,658.429993,663.739990,247700000,0.000030,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,1.0,1.0,1.0,1.0
1992-12-22,^RUT,214.800003,214.800003,215.160004,214.309998,214.990005,249670000,0.000008,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,1.0,1.0,1.0,1.0
1992-12-23,^DJI,3313.540039,3313.540039,3332.719971,3313.540039,3321.100098,21980000,0.000015,,,,,...,0.0037,0.0047,0.0002,0.0008,1993-01-25,1993-03-23,1993-06-23,1993-12-23,0.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-24,^RUT,1679.500000,1679.500000,1690.410034,1671.270020,1677.739990,3821820000,0.000064,35.372275,1667.992629,1703.455070,1738.917511,...,,,,,,,,,,,,
2023-10-25,^DJI,33035.929688,33035.929688,33267.910156,32989.718750,33203.531250,355690000,0.000025,38.317395,32811.087853,33131.017852,33450.947852,...,,,,,,,,,,,,
2023-10-25,^GSPC,4186.770020,4186.770020,4232.419922,4181.419922,4232.419922,3869370000,0.000028,36.726926,4184.179627,4239.573590,4294.967554,...,,,,,,,,,,,,
2023-10-25,^IXIC,12821.219727,12821.219727,13042.519531,12804.299805,13039.860352,4599850000,0.000059,37.899302,12850.433481,13073.433464,13296.433447,...,,,,,,,,,,,,


### Spiders Binary Columns

In [35]:
one_month_gainer(spiders)
three_month_gainer(spiders)
six_month_gainer(spiders)
twelve_month_gainer(spiders)

Unnamed: 0_level_0,Ticker,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,...,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1998-12-22,XLB,12.011767,20.828125,20.828125,20.750000,20.781250,1900.0,-0.116068,,,,,...,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22,1.0,1.0,1.0,1.0
1998-12-22,XLE,12.448157,23.265625,23.390625,23.187500,23.312500,15200.0,-0.152028,,,,,...,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22,0.0,1.0,1.0,1.0
1998-12-22,XLF,11.612926,18.937855,19.052092,18.849005,19.052092,55887.0,-0.094617,,,,,...,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22,0.0,1.0,1.0,1.0
1998-12-22,XLI,14.971390,23.281250,23.281250,23.203125,23.203125,600.0,-0.074152,,,,,...,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22,1.0,1.0,1.0,1.0
1998-12-22,XLK,24.243126,32.046875,32.500000,31.781250,32.406250,300500.0,-0.032286,,,,,...,-0.0029,-0.0043,-0.0009,0.0078,1999-01-22,1999-03-22,1999-06-22,1999-12-22,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-25,XLP,67.940002,67.940002,68.239998,67.419998,67.599998,11351100.0,0.000063,43.174669,66.122483,66.838991,67.555500,...,,,,,,,,,,,,
2023-10-25,XLRE,32.189999,32.189999,32.650002,32.099998,32.540001,8454400.0,0.000099,33.847969,32.120178,32.866839,33.613500,...,,,,,,,,,,,,
2023-10-25,XLU,59.349998,59.349998,59.470001,58.680000,58.919998,20654700.0,0.000069,46.305928,56.095167,57.214334,58.333500,...,,,,,,,,,,,,
2023-10-25,XLV,126.510002,126.510002,127.459999,125.570000,127.070000,11427000.0,0.000104,37.148926,126.420510,127.778756,129.137002,...,,,,,,,,,,,,


In [36]:
indeces.rename(columns={"Ticker":"Index"}, inplace=True)

In [37]:
indeces.head(2)

Unnamed: 0_level_0,Index,Adj Close,Close,High,Low,Open,Volume,garman_klass_vol,RSI,lowest_bollinger_band,lower_bollinger_band,20_day_SMA,...,SMB,HML,RMW,CMA,one_months_later,three_months_later,six_months_later,twelve_months_later,One_Month_Positive,Three_Month_Positive,Six_Month_Positive,Twelve_Month_Positive
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
1992-12-22,^DJI,3321.100098,3321.100098,3328.939941,3301.649902,3312.5,25970000,3.1e-05,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,0.0,1.0,1.0,1.0
1992-12-22,^GSPC,440.309998,440.309998,441.640015,438.25,440.700012,250430000,2.9e-05,,,,,...,-0.0006,0.0061,-0.0009,0.0015,1993-01-22,1993-03-22,1993-06-22,1993-12-22,0.0,1.0,1.0,1.0


In [38]:
indeces.to_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/Indeces_Final.csv')

In [39]:
spiders.to_csv('/Users/samalainabayeva/Documents/Flatiron/Stock_Market_Analysis/Stock_Market_Analysis/Data/Spiders_Final.csv')