## Imports

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import os
import matplotlib.pyplot as plt
import seaborn as sns
import time

from scipy.stats import spearmanr
from talib import RSI, BBANDS, MACD, ATR

In [2]:
# Top 50 companies by market cap
tickers = [
    # Technology
    'AAPL', 'MSFT', 'NVDA', 'GOOGL', 'GOOG', 'AMZN', 'META', 'TSLA', 'AVGO', 'ORCL',
    'CRM', 'ADBE', 'NFLX', 'AMD', 'INTC', 'CSCO', 'QCOM', 'TXN', 'INTU', 'IBM',
    
    # Financial Services
    'BRK-B', 'JPM', 'V', 'MA', 'BAC', 'WFC', 'GS', 'MS', 'C', 'AXP',
    
    # Healthcare & Pharmaceuticals
    'UNH', 'JNJ', 'PFE', 'ABBV', 'MRK', 'TMO', 'ABT', 'DHR', 'BMY', 'LLY',
    
    # Consumer & Retail
    'COST', 'PG', 'KO', 'PEP', 'WMT', 'HD', 'MCD', 'DIS', 'NKE', 'SBUX'
]

# Clean tickers for yfinance compatibility
tickers = [ticker.replace('.', '-') for ticker in tickers]
print(f"Total tickers: {len(tickers)}")
print(tickers)

Total tickers: 50
['AAPL', 'MSFT', 'NVDA', 'GOOGL', 'GOOG', 'AMZN', 'META', 'TSLA', 'AVGO', 'ORCL', 'CRM', 'ADBE', 'NFLX', 'AMD', 'INTC', 'CSCO', 'QCOM', 'TXN', 'INTU', 'IBM', 'BRK-B', 'JPM', 'V', 'MA', 'BAC', 'WFC', 'GS', 'MS', 'C', 'AXP', 'UNH', 'JNJ', 'PFE', 'ABBV', 'MRK', 'TMO', 'ABT', 'DHR', 'BMY', 'LLY', 'COST', 'PG', 'KO', 'PEP', 'WMT', 'HD', 'MCD', 'DIS', 'NKE', 'SBUX']


In [3]:
start_date = '2015-01-01'
end_date = '2025-01-01'

The following method is supposed to download data for each ticket.

In [4]:
def get_data(ticker, start_date, end_date):
    data = yf.download(ticker, start_date, end_date)
    data["Ticker"] = ticker
    return data

In [5]:
os.makedirs("data", exist_ok=True)

print(f"Processing data for {len(tickers)} stocks...")
for ticker in tickers:
    try:
        # Get enhanced data for this stock
        stock_data = get_data(ticker, start_date, end_date)
        
        # Reset index to make Date a column
        stock_data = stock_data.reset_index()

        print(f"Processed {ticker} data with {len(stock_data)} rows.")
        
        # Save individual stock data
        stock_data.to_csv(f"data/{ticker}.csv", index=False)
        
        # Add a small delay to avoid hitting API limits
        time.sleep(2)

    except Exception as e:
        print(f"Error processing {ticker}: {str(e)}")

Processing data for 50 stocks...


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed AAPL data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed MSFT data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed NVDA data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed GOOGL data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed GOOG data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed AMZN data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed META data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed TSLA data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed AVGO data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed ORCL data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed CRM data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed ADBE data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed NFLX data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed AMD data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed INTC data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed CSCO data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed QCOM data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed TXN data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed INTU data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed IBM data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed BRK-B data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed JPM data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed V data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed MA data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed BAC data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed WFC data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed GS data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed MS data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed C data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed AXP data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed UNH data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed JNJ data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed PFE data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed ABBV data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed MRK data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed TMO data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed ABT data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed DHR data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed BMY data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed LLY data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed COST data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed PG data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed KO data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed PEP data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed WMT data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed HD data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed MCD data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed DIS data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed NKE data with 2516 rows.


  data = yf.download(ticker, start_date, end_date)
[*********************100%***********************]  1 of 1 completed


Processed SBUX data with 2516 rows.


Let us analyze how does the dataframe for one stock look like.

In [6]:
files = os.listdir('data') # List of files in the data directory
files

['MCD.csv',
 'ABBV.csv',
 'SBUX.csv',
 'CSCO.csv',
 'WMT.csv',
 'DHR.csv',
 'UNH.csv',
 'MSFT.csv',
 'AVGO.csv',
 'BRK-B.csv',
 'BAC.csv',
 'ABT.csv',
 'AMZN.csv',
 'QCOM.csv',
 'IBM.csv',
 'GOOGL.csv',
 'JNJ.csv',
 'NFLX.csv',
 'ORCL.csv',
 'NVDA.csv',
 'BMY.csv',
 'META.csv',
 'INTC.csv',
 'JPM.csv',
 'DIS.csv',
 'C.csv',
 'WFC.csv',
 'TSLA.csv',
 'GS.csv',
 'MS.csv',
 'MA.csv',
 'MRK.csv',
 'NKE.csv',
 'GOOG.csv',
 'AMD.csv',
 'TXN.csv',
 'KO.csv',
 'TMO.csv',
 'PFE.csv',
 'HD.csv',
 'COST.csv',
 'PEP.csv',
 'AAPL.csv',
 'LLY.csv',
 'CRM.csv',
 'AXP.csv',
 'INTU.csv',
 'ADBE.csv',
 'PG.csv',
 'V.csv']

In [7]:
example_file = 'MSFT.csv'
example_file_path = os.path.join('data', example_file)

In [8]:
msft_data = pd.read_csv(example_file_path)
msft_data.head()

Unnamed: 0,Date,Close,High,Low,Open,Volume,Ticker
0,,MSFT,MSFT,MSFT,MSFT,MSFT,
1,2015-01-02,39.99870300292969,40.563269701522984,39.81051627880419,39.91316387951886,27913900,MSFT
2,2015-01-05,39.630882263183594,39.973042020810816,39.56244835379311,39.665095954770436,39673900,MSFT
3,2015-01-06,39.04921340942383,39.990156974160506,38.955118400328374,39.673658249746744,36447900,MSFT
4,2015-01-07,39.54534149169922,39.74208409457034,38.91234411952407,39.331490410781676,29114100,MSFT


As we see, there is a redundant second row(row with index 0), and moreover there is redundant indexes in the dataframe. We want to get rid of them and concatenate dataframes.  

In [9]:
dataframes = []
for file in files:
    file_path = os.path.join('data', file)     
    stock_data = pd.read_csv(file_path)
    stock_data.drop([0], inplace=True) # Drop row with index 0
    dataframes.append(stock_data)
if dataframes:
    data = pd.concat(dataframes, ignore_index=True)

In [10]:
data

Unnamed: 0,Date,Close,High,Low,Open,Volume,Ticker
0,2015-01-02,71.4981689453125,72.83214555241732,71.33717227282632,72.16515432431137,6019700,MCD
1,2015-01-05,70.70852661132812,71.82017297443562,70.70085843080031,71.52884645456604,6211900,MCD
2,2015-01-06,70.83882904052734,71.9198081126038,70.44016569476011,71.0458230418677,6484100,MCD
3,2015-01-07,72.07316589355469,72.10383276468738,71.19151235808935,71.49817522030872,6400300,MCD
4,2015-01-08,72.34149932861328,72.816827094462,72.10383837024294,72.24183637651463,5476700,MCD
...,...,...,...,...,...,...,...
125795,2024-12-24,319.5805969238281,320.4676432922445,316.2218410476972,317.10888741611353,2684100,V
125796,2024-12-26,319.8397521972656,320.37795978575224,318.3945758559628,318.70353955666053,2856000,V
125797,2024-12-27,317.59722900390625,320.4277535659147,316.5008915710655,318.20517997096465,3489200,V
125798,2024-12-30,314.2584228515625,316.0524075946733,312.1853731391722,313.6305190667336,3478500,V


In [11]:
data.dtypes

Date      object
Close     object
High      object
Low       object
Open      object
Volume    object
Ticker    object
dtype: object

The columns should be converted into appropriate datatype. 

In [12]:
data['Date'] = pd.to_datetime(data['Date'])
numeric_columns = ['Close', 'High', 'Low', 'Open', 'Volume']
for col in numeric_columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')

In [13]:
data.dtypes

Date      datetime64[ns]
Close            float64
High             float64
Low              float64
Open             float64
Volume             int64
Ticker            object
dtype: object

In [14]:
data.columns = data.columns.map(lambda x: x.lower())

#### Compute Rolling Average Dollar Volume

In [15]:
data['dollar_vol'] = data[['close', 'volume']].prod(axis=1)

In [16]:
data

Unnamed: 0,date,close,high,low,open,volume,ticker,dollar_vol
0,2015-01-02,71.498169,72.832146,71.337172,72.165154,6019700,MCD,4.303975e+08
1,2015-01-05,70.708527,71.820173,70.700858,71.528846,6211900,MCD,4.392343e+08
2,2015-01-06,70.838829,71.919808,70.440166,71.045823,6484100,MCD,4.593261e+08
3,2015-01-07,72.073166,72.103833,71.191512,71.498175,6400300,MCD,4.612899e+08
4,2015-01-08,72.341499,72.816827,72.103838,72.241836,5476700,MCD,3.961927e+08
...,...,...,...,...,...,...,...,...
125795,2024-12-24,319.580597,320.467643,316.221841,317.108887,2684100,V,8.577863e+08
125796,2024-12-26,319.839752,320.377960,318.394576,318.703540,2856000,V,9.134623e+08
125797,2024-12-27,317.597229,320.427754,316.500892,318.205180,3489200,V,1.108160e+09
125798,2024-12-30,314.258423,316.052408,312.185373,313.630519,3478500,V,1.093148e+09


In [17]:
data['dollar_vol_1m'] = (data.groupby('ticker')['dollar_vol']
                           .rolling(window=21)
                           .mean()).values

In [18]:
data.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125800 entries, 0 to 125799
Data columns (total 9 columns):
 #   Column         Non-Null Count   Dtype         
---  ------         --------------   -----         
 0   date           125800 non-null  datetime64[ns]
 1   close          125800 non-null  float64       
 2   high           125800 non-null  float64       
 3   low            125800 non-null  float64       
 4   open           125800 non-null  float64       
 5   volume         125800 non-null  int64         
 6   ticker         125800 non-null  object        
 7   dollar_vol     125800 non-null  float64       
 8   dollar_vol_1m  124800 non-null  float64       
dtypes: datetime64[ns](1), float64(6), int64(1), object(1)
memory usage: 8.6+ MB


In [19]:
data['dollar_vol_rank'] = (data.groupby('date')
                             .dollar_vol_1m
                             .rank(ascending=False))

In [20]:
data.info(show_counts=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125800 entries, 0 to 125799
Data columns (total 10 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   date             125800 non-null  datetime64[ns]
 1   close            125800 non-null  float64       
 2   high             125800 non-null  float64       
 3   low              125800 non-null  float64       
 4   open             125800 non-null  float64       
 5   volume           125800 non-null  int64         
 6   ticker           125800 non-null  object        
 7   dollar_vol       125800 non-null  float64       
 8   dollar_vol_1m    124800 non-null  float64       
 9   dollar_vol_rank  124800 non-null  float64       
dtypes: datetime64[ns](1), float64(7), int64(1), object(1)
memory usage: 9.6+ MB


### Add some Basic Factors

### Compute the Relative Strength Index

In [21]:
data['rsi'] = data.groupby('ticker')['close'].transform(RSI)

### Compute Bollinger Bands

In [22]:
def compute_bb(close_prices):
    """
    Compute Bollinger Bands for a single ticker's close prices
    Returns a DataFrame with bb_high and bb_low columns
    """
    high, mid, low = BBANDS(close_prices, timeperiod=20)
    return pd.DataFrame({
        'bb_high': high, 
        'bb_low': low
    }, index=close_prices.index)

In [23]:
bb_results = data.groupby('ticker')['close'].apply(compute_bb)
bb_results = bb_results.reset_index(level=0, drop=True) # Reset index to align with original data
data = data.join(bb_results)

In [24]:
data['bb_high'] = data['bb_high'].sub(data['close']).div(data['bb_high']).apply(np.log1p)
data['bb_low'] = data['close'].sub(data['bb_low']).div(data['close']).apply(np.log1p)

### Compute Average True Range

In [25]:
def compute_atr(stock_data):
    """
    Compute normalized ATR for a single ticker
    """
    try:
        df = ATR(stock_data['high'], stock_data['low'], 
                 stock_data['close'], timeperiod=14)
        return df.sub(df.mean()).div(df.std())
    except:
        return pd.Series(np.nan, index=stock_data.index)


In [26]:
data['atr'] = data.groupby('ticker', group_keys=False).apply(compute_atr, include_groups=False)

### Compute Moving Average
### Convergance/Divergance

In [27]:
def compute_macd(close_prices):
    """
    Compute normalized MACD for a single ticker
    """
    try:
        macd = MACD(close_prices)[0]  # Get only the MACD line
        return (macd - np.mean(macd)) / np.std(macd)
    except:
        return pd.Series(np.nan, index=close_prices.index)


In [28]:
data['macd'] = data.groupby('ticker')['close'].apply(compute_macd).reset_index(level=0, drop=True)

### Compute Lagged Returns

In [29]:
lags = [1, 5, 10, 21, 42, 63]

In [30]:
returns = data.groupby('ticker')['close'].pct_change()
percentiles=[.0001, .001, .01]
percentiles+= [1-p for p in percentiles]
returns.describe(percentiles=percentiles).iloc[2:].to_frame('percentiles').style.format(lambda x: f'{x:,.2%}')

Unnamed: 0,percentiles
std,1.92%
min,-35.12%
0.01%,-18.84%
0.1%,-10.88%
1%,-5.23%
50%,0.07%
99%,5.54%
99.9%,12.21%
99.99%,20.36%
max,61.22%


In [31]:
q = 0.0001

### Winsorize outliers

In [32]:
for lag in lags:
    data[f'return_{lag}d'] = (data.groupby('ticker')['close']
                                .pct_change(lag)
                                .pipe(lambda x: x.clip(lower=x.quantile(q),
                                                       upper=x.quantile(1 - q)))
                                .add(1)
                                .pow(1 / lag)
                                .sub(1)
                                )

### Shift lagged outliers

In [33]:
for t in [1, 2, 3, 4, 5]:
    for lag in [1, 5, 10, 21]:
        data[f'lag_return_{lag}d'] = (data.groupby('ticker')
                                           [f'return_{lag}d'].shift(t * lag))

### Compute forward returns

In [34]:
for t in [1, 5, 10, 21]:
    data[f'target_{t}d'] = data.groupby('ticker')[f'return_{t}d'].shift(-t)

### Create year and month columns

In [35]:
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month

In [36]:
data = pd.get_dummies(data,
                      columns=['year', 'month'],
                      prefix=['year', 'month'],
                      prefix_sep=['_', '_'],
                      drop_first=True)

In [37]:
data

Unnamed: 0,date,close,high,low,open,volume,ticker,dollar_vol,dollar_vol_1m,dollar_vol_rank,...,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
0,2015-01-02,71.498169,72.832146,71.337172,72.165154,6019700,MCD,4.303975e+08,,,...,False,False,False,False,False,False,False,False,False,False
1,2015-01-05,70.708527,71.820173,70.700858,71.528846,6211900,MCD,4.392343e+08,,,...,False,False,False,False,False,False,False,False,False,False
2,2015-01-06,70.838829,71.919808,70.440166,71.045823,6484100,MCD,4.593261e+08,,,...,False,False,False,False,False,False,False,False,False,False
3,2015-01-07,72.073166,72.103833,71.191512,71.498175,6400300,MCD,4.612899e+08,,,...,False,False,False,False,False,False,False,False,False,False
4,2015-01-08,72.341499,72.816827,72.103838,72.241836,5476700,MCD,3.961927e+08,,,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125795,2024-12-24,319.580597,320.467643,316.221841,317.108887,2684100,V,8.577863e+08,1.790341e+09,21.0,...,False,False,False,False,False,False,False,False,False,True
125796,2024-12-26,319.839752,320.377960,318.394576,318.703540,2856000,V,9.134623e+08,1.732572e+09,20.0,...,False,False,False,False,False,False,False,False,False,True
125797,2024-12-27,317.597229,320.427754,316.500892,318.205180,3489200,V,1.108160e+09,1.710112e+09,20.0,...,False,False,False,False,False,False,False,False,False,True
125798,2024-12-30,314.258423,316.052408,312.185373,313.630519,3478500,V,1.093148e+09,1.689756e+09,20.0,...,False,False,False,False,False,False,False,False,False,True


In [38]:
data.shape

(125800, 49)

In [39]:
data.isna().sum()

date                  0
close                 0
high                  0
low                   0
open                  0
volume                0
ticker                0
dollar_vol            0
dollar_vol_1m      1000
dollar_vol_rank    1000
rsi                 700
bb_high             950
bb_low              950
atr                 700
macd               1650
return_1d            50
return_5d           250
return_10d          500
return_21d         1050
return_42d         2100
return_63d         3150
lag_return_1d       300
lag_return_5d      1500
lag_return_10d     3000
lag_return_21d     6300
target_1d            50
target_5d           250
target_10d          500
target_21d         1050
year_2016             0
year_2017             0
year_2018             0
year_2019             0
year_2020             0
year_2021             0
year_2022             0
year_2023             0
year_2024             0
month_2               0
month_3               0
month_4               0
month_5         

In [40]:
data.dropna(inplace=True)

In [41]:
data.isna().sum()

date               0
close              0
high               0
low                0
open               0
volume             0
ticker             0
dollar_vol         0
dollar_vol_1m      0
dollar_vol_rank    0
rsi                0
bb_high            0
bb_low             0
atr                0
macd               0
return_1d          0
return_5d          0
return_10d         0
return_21d         0
return_42d         0
return_63d         0
lag_return_1d      0
lag_return_5d      0
lag_return_10d     0
lag_return_21d     0
target_1d          0
target_5d          0
target_10d         0
target_21d         0
year_2016          0
year_2017          0
year_2018          0
year_2019          0
year_2020          0
year_2021          0
year_2022          0
year_2023          0
year_2024          0
month_2            0
month_3            0
month_4            0
month_5            0
month_6            0
month_7            0
month_8            0
month_9            0
month_10           0
month_11     

In [42]:
data.to_csv('data/stocks.csv', index=False)