In [8]:
import pandas as pd
from prophet import Prophet
from IPython.display import clear_output
import os
import logging

logging.getLogger('fbprophet').setLevel(logging.WARNING) # Removes the warning messages

In [9]:
# Read in the data

df = pd.read_csv('VOO.csv', parse_dates=['Date'])
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-09-09,102.500000,102.500000,101.139999,101.320000,81.350807,26500
1,2010-09-10,101.680000,101.860001,101.300003,101.779999,81.720131,8600
2,2010-09-13,102.959999,103.139999,102.500000,103.059998,82.747894,33750
3,2010-09-14,102.839996,103.480003,102.379997,103.040001,82.731789,59400
4,2010-09-15,102.620003,103.379997,102.400002,103.300003,82.940575,9250
...,...,...,...,...,...,...,...
2876,2022-02-10,414.929993,420.799988,411.140015,413.179993,413.179993,9260500
2877,2022-02-11,413.149994,415.200012,403.570007,404.940002,404.940002,10452700
2878,2022-02-14,404.429993,405.989990,400.239990,403.619995,403.619995,10162000
2879,2022-02-15,408.179993,410.290009,407.459991,410.100006,410.100006,6822300


In [10]:
# Rename the columns to fit Prophet's naming convention

day_mapper = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri', 5: 'Sat', 6: 'Sun'}
df["DayOfWeek"] = df["Date"].map(lambda x: day_mapper[x.weekday()])
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,DayOfWeek
0,2010-09-09,102.500000,102.500000,101.139999,101.320000,81.350807,26500,Thu
1,2010-09-10,101.680000,101.860001,101.300003,101.779999,81.720131,8600,Fri
2,2010-09-13,102.959999,103.139999,102.500000,103.059998,82.747894,33750,Mon
3,2010-09-14,102.839996,103.480003,102.379997,103.040001,82.731789,59400,Tue
4,2010-09-15,102.620003,103.379997,102.400002,103.300003,82.940575,9250,Wed
...,...,...,...,...,...,...,...,...
2876,2022-02-10,414.929993,420.799988,411.140015,413.179993,413.179993,9260500,Thu
2877,2022-02-11,413.149994,415.200012,403.570007,404.940002,404.940002,10452700,Fri
2878,2022-02-14,404.429993,405.989990,400.239990,403.619995,403.619995,10162000,Mon
2879,2022-02-15,408.179993,410.290009,407.459991,410.100006,410.100006,6822300,Tue


In [11]:
# Take the previous value and then subtract it from the current value to get the daily change

df["diff_from_previous_day"] = (df["Open"].diff() / df["Open"]) * 100
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,DayOfWeek,diff_from_previous_day
0,2010-09-09,102.500000,102.500000,101.139999,101.320000,81.350807,26500,Thu,
1,2010-09-10,101.680000,101.860001,101.300003,101.779999,81.720131,8600,Fri,-0.806452
2,2010-09-13,102.959999,103.139999,102.500000,103.059998,82.747894,33750,Mon,1.243200
3,2010-09-14,102.839996,103.480003,102.379997,103.040001,82.731789,59400,Tue,-0.116689
4,2010-09-15,102.620003,103.379997,102.400002,103.300003,82.940575,9250,Wed,-0.214376
...,...,...,...,...,...,...,...,...,...
2876,2022-02-10,414.929993,420.799988,411.140015,413.179993,413.179993,9260500,Thu,-0.872435
2877,2022-02-11,413.149994,415.200012,403.570007,404.940002,404.940002,10452700,Fri,-0.430836
2878,2022-02-14,404.429993,405.989990,400.239990,403.619995,403.619995,10162000,Mon,-2.156121
2879,2022-02-15,408.179993,410.290009,407.459991,410.100006,410.100006,6822300,Tue,0.918712


In [15]:
# Compare the different days to see which one has the biggest dip from the previous day

df.groupby("DayOfWeek")["diff_from_previous_day"].mean().sort_values()

DayOfWeek
Mon   -0.034693
Thu   -0.001912
Fri    0.064210
Wed    0.076498
Tue    0.103887
Name: diff_from_previous_day, dtype: float64