In [1]:
from pathlib import Path
import pandas as pd
import numpy as np

import os
import json
from dotenv import load_dotenv
from urllib.request import urlopen

%matplotlib inline

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima_model import ARIMA

In [2]:
# Stock Price data to DataFrames

amzn_csv = Path('Resources/stock_price_data/amazon_prices.csv')
att_csv = Path('Resources/stock_price_data/at_t_prices.csv')
boa_csv = Path('Resources/stock_price_data/boa_prices.csv')
coke_csv = Path('Resources/stock_price_data/coca_cola_prices.csv')
csx_csv = Path('Resources/stock_price_data/csx_prices.csv')
jpm_csv = Path('Resources/stock_price_data/jpmorgan_prices.csv')
odfl_csv = Path('Resources/stock_price_data/old_dominion_prices.csv')
pg_csv = Path('Resources/stock_price_data/proctor_gamble_prices.csv')
tesla_csv = Path('Resources/stock_price_data/tesla_prices.csv')
verizon_csv = Path('Resources/stock_price_data/verizon_prices.csv')

amzn_prices = pd.read_csv(amzn_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
att_prices = pd.read_csv(att_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
boa_prices = pd.read_csv(boa_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
coke_prices = pd.read_csv(coke_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
csx_prices = pd.read_csv(csx_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
jpm_prices = pd.read_csv(jpm_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
odfl_prices = pd.read_csv(odfl_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
pg_prices = pd.read_csv(pg_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
tesla_prices = pd.read_csv(tesla_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')
verizon_prices = pd.read_csv(verizon_csv, parse_dates=True, infer_datetime_format=True, index_col='Date')


Path.cwd()
#csx_prices.index
amzn_prices.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
1997-05-15 16:00:00,1.96
1997-05-16 16:00:00,1.73
1997-05-19 16:00:00,1.71
1997-05-20 16:00:00,1.64
1997-05-21 16:00:00,1.43


In [3]:
# Income Statement Data to DataFrames

amzn_path = Path('Resources/income_statement_data/amzn_income_statement_quarter.csv')
att_path = Path('Resources/income_statement_data/t_income_statement_quarter.csv')
boa_path = Path('Resources/income_statement_data/bac_income_statement_annual.csv')
coke_path = Path('Resources/income_statement_data/ko_income_statement_quarter.csv')
csx_path = Path('Resources/income_statement_data/csx_income_statement_quarter.csv')
jpm_path = Path('Resources/income_statement_data/jpm_income_statement_quarter.csv')
odfl_path = Path('Resources/income_statement_data/odfl_income_statement_quarter.csv')
pg_path = Path('Resources/income_statement_data/pg_income_statement_quarter.csv')
tesla_path = Path('Resources/income_statement_data/tsla_income_statement_quarter.csv')
verizon_path = Path('Resources/income_statement_data/vz_income_statement_quarter.csv')

amzn_inc_stmnt = pd.read_csv(amzn_path, parse_dates=True, infer_datetime_format=True, index_col='date')
att_inc_stmnt = pd.read_csv(att_path, parse_dates=True, infer_datetime_format=True, index_col='date')
boa_inc_stmnt = pd.read_csv(boa_path, parse_dates=True, infer_datetime_format=True, index_col='date')
coke_inc_stmnt = pd.read_csv(coke_path, parse_dates=True, infer_datetime_format=True, index_col='date')
csx_inc_stmnt = pd.read_csv(csx_path, parse_dates=True, infer_datetime_format=True, index_col='date')
jpm_inc_stmnt = pd.read_csv(jpm_path, parse_dates=True, infer_datetime_format=True, index_col='date')
odfl_inc_stmnt = pd.read_csv(odfl_path, parse_dates=True, infer_datetime_format=True, index_col='date')
pg_inc_stmnt = pd.read_csv(pg_path, parse_dates=True, infer_datetime_format=True, index_col='date')
tesla_inc_stmnt = pd.read_csv(tesla_path, parse_dates=True, infer_datetime_format=True, index_col='date')
vz_inc_stmnt = pd.read_csv(verizon_path, parse_dates=True, infer_datetime_format=True, index_col='date')



In [4]:
# Convert index to datetime

amzn_prices.index = pd.to_datetime(amzn_prices.index)
att_prices.index = pd.to_datetime(att_prices.index)
boa_prices.index = pd.to_datetime(boa_prices.index)
coke_prices.index = pd.to_datetime(coke_prices.index)
csx_prices.index = pd.to_datetime(csx_prices.index)
jpm_prices.index = pd.to_datetime(jpm_prices.index)
odfl_prices.index = pd.to_datetime(odfl_prices.index)
pg_prices.index = pd.to_datetime(pg_prices.index)
tesla_prices.index = pd.to_datetime(tesla_prices.index)
verizon_prices.index = pd.to_datetime(verizon_prices.index)

In [5]:
#pd.to_datetime(odfl_inc_stmnt)

odfl_inc_stmnt.index[0][:10]
range(len(odfl_inc_stmnt.index))
odfl_index = [odfl_inc_stmnt.index[i][:10] for i in range(len(odfl_inc_stmnt.index))]
odfl_index
odfl_inc_stmnt['Date'] = odfl_index
#odfl_inc_stmnt.head()
odfl_indexed = odfl_inc_stmnt.set_index('Date')
odfl_indexed.index

Index(['2020-06-30', '2020-03-31', '2019-12-31', '2019-09-30', '2019-06-30',
       '2019-03-31', '2018-12-31', '2018-09-30', '2018-06-30', '2018-03-31',
       ...
       '1993-06-30', '1993-03-31', '1992-12-31', '1992-09-30', '1992-06-30',
       '1992-03-31', '1991-12-31', '1991-09-30', '1991-06-30', '1991-03-31'],
      dtype='object', name='Date', length=118)

In [6]:
odfl_indexed.index = pd.to_datetime(odfl_indexed.index, format='%Y-%m-%d')
odfl_indexed.head()
odfl_indexed.index

DatetimeIndex(['2020-06-30', '2020-03-31', '2019-12-31', '2019-09-30',
               '2019-06-30', '2019-03-31', '2018-12-31', '2018-09-30',
               '2018-06-30', '2018-03-31',
               ...
               '1993-06-30', '1993-03-31', '1992-12-31', '1992-09-30',
               '1992-06-30', '1992-03-31', '1991-12-31', '1991-09-30',
               '1991-06-30', '1991-03-31'],
              dtype='datetime64[ns]', name='Date', length=118, freq=None)

In [7]:
# Create datetime index conversion function

def convert_datetime(income_statement_df):
    
    dates = [income_statement_df.index[i][:10] for i in range(len(income_statement_df.index))]
    income_statement_df['Date'] = dates
    income_statement_df = income_statement_df.set_index('Date')
    income_statement_df.index = pd.to_datetime(income_statement_df.index, format='%Y-%m-%d')
    
    return income_statement_df

In [8]:
# Convert CSX Dates



csx_indexed = convert_datetime(csx_inc_stmnt)
odfl_indexed = convert_datetime(odfl_inc_stmnt)

#dates = [csx_inc_stmnt.index[i][:10] for i in range(len(csx_inc_stmnt.index))]
#dates

csx_indexed.index

DatetimeIndex(['2020-06-30', '2020-03-31', '2019-12-31', '2019-09-30',
               '2019-06-30', '2019-03-31', '2018-12-31', '2018-09-30',
               '2018-06-30', '2018-03-31',
               ...
               '1991-09-30', '1991-06-30', '1991-03-31', '1990-12-31',
               '1990-09-30', '1990-06-30', '1990-03-31', '1989-12-31',
               '1989-09-30', '1989-06-30'],
              dtype='datetime64[ns]', name='Date', length=125, freq=None)