In [None]:
# TS Analysis
# OLS fit for price prediction
# signal analysis and prediction
# profit calculation project

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas_datareader as dr
from pandas_datareader import data
from datetime import datetime
import cvxopt as opt
from cvxopt import blas, solvers
# import requests_cache

In [3]:
# Create a list of the ticker symbols to be used in this project
tickers = ['AMZN', 'GOOGL', 'JNJ', 'V', 'PG', 'UNH', 'JPM', 'HD', 'VZ', 'NFLX', 'DIS', 'MRK', 'PEP', 'BAC', 'KO', 'WMT',
          'CVX', 'ABT', 'AMGN', 'MCD', 'COST', 'NKE', 'PM', 'QCOM', 'LOW', 'BA', 'LMT', 'SBUX', 'UPS', 'CAT']

# tickers_short = ['AMZN', 'GOOGL', 'JNJ', 'V', 'PG']

In [4]:
ticker_length = len(tickers)
pd.set_option('display.max_columns', ticker_length)

In [9]:
# Define start and end date
# end = datetime(2023, 5, 26)
# start = datetime(2019, 8, 6)

start = pd.to_datetime(['2019-01-01']).astype(int)[0]//10**9 # convert to unix timestamp.
end = pd.to_datetime(['2023-05-19']).astype(int)[0]//10**9 # convert to unix timestamp.

In [10]:
## actual data pull

prices = []
vol = []
df_prices = pd.DataFrame()
df_volumes = pd.DataFrame()


for tick in tickers:
    url = 'https://query1.finance.yahoo.com/v7/finance/download/' + tick + '?period1=' + str(start) + '&period2=' + str(end) + '&interval=1d&events=history'
    df = pd.read_csv(url)
    df = df.set_index('Date')
    df_prices[tick] = df['Adj Close']
    df_volumes[tick] = df['Volume']

    prices.append(df['Adj Close'])
    vol.append(df['Volume'])


df_prices.columns = tickers
df_volumes.columns = tickers

In [11]:
df_prices.head()

Unnamed: 0_level_0,AMZN,GOOGL,JNJ,V,PG,UNH,JPM,HD,VZ,NFLX,DIS,MRK,PEP,BAC,KO,WMT,CVX,ABT,AMGN,MCD,COST,NKE,PM,QCOM,LOW,BA,LMT,SBUX,UPS,CAT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2019-01-02,76.956497,52.734001,113.289436,128.979141,81.445816,228.546387,86.554703,155.546707,45.232269,267.660004,107.654335,63.185005,96.932426,22.64102,41.098732,86.520966,90.628799,64.570145,167.338028,159.459518,192.415695,71.081741,53.02129,51.560314,85.18148,314.645142,236.479187,58.850693,84.561768,113.021957
2019-01-03,75.014,51.273499,111.489204,124.331146,80.874771,222.313919,85.324608,152.118347,45.393757,271.200012,105.046211,61.88937,96.027695,22.278181,40.84478,86.076042,88.893013,61.522789,164.791779,158.408905,188.337387,69.824417,52.335987,50.033264,84.165977,302.100525,230.538864,56.297939,82.160873,108.66671
2019-01-04,78.769501,53.9035,113.360374,129.687454,82.525452,224.913895,88.470131,156.638351,45.506805,297.570007,108.286606,63.753418,97.996841,23.203411,41.659222,86.613655,90.735252,63.278736,170.424927,161.4702,193.806564,71.648003,54.785786,50.841705,86.658562,317.822571,236.75618,58.164474,85.02282,114.604874
2019-01-07,81.475502,53.796001,112.633217,132.026031,82.195313,225.345688,88.531639,159.723862,45.79747,315.339996,109.225136,63.05125,97.154182,23.185274,41.116261,87.633293,91.914261,64.226379,172.718338,163.227295,194.520737,72.67498,53.107944,50.697983,88.301796,318.823395,239.471695,58.164474,84.735741,114.676437
2019-01-08,82.829002,54.268501,115.24929,132.74408,82.498711,228.358643,88.364693,160.490677,47.137806,320.269989,110.07476,63.519367,98.085548,23.139921,41.580406,88.245071,91.513046,63.455261,174.933197,163.571487,195.97728,73.644356,53.462421,50.257843,90.129692,330.891937,241.070709,58.210213,84.744438,116.05365


In [15]:
df_prices.tail()

Unnamed: 0_level_0,AMZN,GOOGL,JNJ,V,PG,UNH,JPM,HD,VZ,NFLX,DIS,MRK,PEP,BAC,KO,WMT,CVX,ABT,AMGN,MCD,COST,NKE,PM,QCOM,LOW,BA,LMT,SBUX,UPS,CAT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2023-05-12,110.260002,117.510002,159.576004,231.380005,155.960007,491.230011,134.100006,290.470001,37.59,339.890015,91.989998,117.139999,196.119995,27.09,64.110001,153.070007,155.101166,110.489998,230.772614,296.140015,504.070007,120.209999,95.120003,103.620003,203.259995,200.699997,450.790009,106.120003,168.5,209.539993
2023-05-15,111.199997,116.510002,158.355209,232.809998,156.009995,486.859985,135.229996,288.540009,36.77,335.890015,92.860001,116.370003,194.270004,27.65,63.939999,151.880005,155.675552,109.839996,231.347183,295.899994,502.040009,119.830002,95.0,106.279999,201.550003,202.770004,451.920013,106.639999,168.850006,211.289993
2023-05-16,113.400002,119.510002,158.146774,230.470001,155.740005,479.720001,134.320007,282.329987,36.049999,333.75,90.980003,116.080002,193.429993,27.360001,63.220001,149.779999,152.021332,109.389999,225.75,294.149994,496.149994,116.480003,93.860001,103.790001,199.220001,200.869995,447.730011,105.279999,166.610001,207.199997
2023-05-17,115.5,120.839996,157.799408,232.649994,155.080002,484.809998,138.449997,292.390015,36.18,339.959991,92.769997,114.760002,192.059998,28.57,63.150002,149.529999,154.200012,108.82,225.020004,293.459991,495.0,116.980003,93.489998,104.809998,207.199997,206.869995,457.299988,105.690002,171.360001,212.100006
2023-05-18,118.150002,122.830002,157.293213,233.600006,152.529999,479.230011,139.5,295.160004,36.130001,371.290009,93.760002,114.0,191.559998,28.469999,62.799999,151.470001,154.009995,108.470001,224.229996,294.049988,498.100006,118.870003,92.599998,106.959999,209.160004,207.240005,455.75,107.089996,171.039993,214.720001


In [13]:
df_volumes.head()

Unnamed: 0_level_0,AMZN,GOOGL,JNJ,V,PG,UNH,JPM,HD,VZ,NFLX,DIS,MRK,PEP,BAC,KO,WMT,CVX,ABT,AMGN,MCD,COST,NKE,PM,QCOM,LOW,BA,LMT,SBUX,UPS,CAT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2019-01-02,159662000,31868000,7631700,8788000,9843900,4063600,15670900,4242900,13969700,11679500,9723500,15120125,4835400,71836300,11603700,8152700,6384800,8737200,3009100,2571500,2143100,6762700,6471200,9896600,4198200,3292200,1064700,10094000,3297300,4783200
2019-01-03,139512000,41960000,8654500,9428300,9820200,4623200,16286400,4810500,19245400,14969600,10594700,13910628,5975900,66599600,14714400,8277300,6496200,10570700,3973900,3728200,2877700,8007400,6178200,14422200,3798900,5705600,1165600,14390100,2705000,6165800
2019-01-04,183652000,46022000,8831700,11065800,10565700,5367600,16935200,4762600,17035800,19330100,10122800,14753115,6293900,83829100,13013700,8029100,7952500,8299100,4193400,3194300,2761600,7844200,7220700,14177300,4107400,4448800,1443800,12694400,3347300,5456600
2019-01-07,159864000,47446000,8404700,12928000,9012500,4133000,15430700,4928800,16349000,18620100,6714700,15795142,5220300,57016300,13135500,7789700,5712700,9503000,3032500,3283000,2226600,8184800,7807200,12352000,4885000,4030300,1465200,10337600,3795700,4653400
2019-01-08,177628000,35414000,9351600,9243000,9551300,3618600,13578800,4722400,21424600,15359200,8730700,11896686,6407700,67339400,15420700,7200900,5674900,8905100,3290300,2941400,2366500,8809000,7257700,12110000,4663500,4736500,1791400,10809700,3034300,4202600


In [14]:
df_prices.isna().sum()

AMZN     0
GOOGL    0
JNJ      0
V        0
PG       0
UNH      0
JPM      0
HD       0
VZ       0
NFLX     0
DIS      0
MRK      0
PEP      0
BAC      0
KO       0
WMT      0
CVX      0
ABT      0
AMGN     0
MCD      0
COST     0
NKE      0
PM       0
QCOM     0
LOW      0
BA       0
LMT      0
SBUX     0
UPS      0
CAT      0
dtype: int64

In [None]:
# want normalized pricces
# want price return
# want log return

In [None]:
# normalized:

