In [None]:
import pandas as pd

# put data into Pandas DataFrame
df = pd.read_csv('ifr.csv')

# keep only columns "Date", "Time", and "feed"
df = df[['Date', 'Time', 'feed']]

# convert to DateTime
df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
df.set_index('datetime', inplace=True)

# delete Date and Time columns
df = df.drop(columns=['Date', 'Time'])

# drop rows with NaN
df = df.dropna()

# aggregate by summing feed for each day
df_D = df.resample('D').sum()

# find minimum daily feed after January 17, 2023
min_after = df_D[df_D.index >= '2023-01-17']['feed'].min()

# find earlier values lower than this minimum
lower_values = df_D[df_D['feed'] < min_after]

# find datetime of maximum of lower_values
cutoff = lower_values['feed'].idxmax()

# aggregate by summing feed for each 4 hours
df_4H = df.resample('4H').sum()

# get rid of data before cutoff
df_4H_cut = df_4H[df_4H.index >= cutoff + pd.Timedelta('1D')]

In [None]:
from statsmodels.tsa.forecasting.theta import ThetaModel
import matplotlib.pyplot as plt

# fit the model
model = ThetaModel(df_4H)
model_results = model.fit()

# plot a prediction
model_results.plot_predict(100, in_sample=True, alpha=None)

# add a title
plt.title('Infant Feeding Record')

# add a legend
plt.legend(['Observed', 'Predicted'])

# add a label to the x-axis
plt.xlabel('Date')

# add a label to the y-axis
plt.ylabel('Amount (mL)')

# make the plot twice as wide
plt.gcf().set_size_inches(12, 6)

# add horizontal grid lines
plt.grid(axis='y')

# change x-axis to days elapsed

# show the plot
plt.show()

# do the same for the cut data
model_cut = ThetaModel(df_4H_cut)
model_cut_results = model_cut.fit()
model_cut_results.plot_predict(100, in_sample=True, alpha=None)
plt.title('Infant Feeding Record')
plt.legend(['Observed', 'Predicted'])
plt.xlabel('Date')
plt.ylabel('Amount (mL)')
plt.gcf().set_size_inches(12, 6)
plt.grid(axis='y')
plt.show()

In [None]:
# aggregate by summing feed for various numbers of days
df_2D = df.resample('2D').sum()
df_3D = df.resample('3D').sum()
df_4D = df.resample('4D').sum()
df_5D = df.resample('5D').sum()
df_6D = df.resample('6D').sum()
df_W = df.resample('W').sum()

# get rid of end data for each
if len(df) % 2:
    df_2D = df_2D[:-1]
if len(df) % 3:
    df_3D = df_3D[:-1]
if len(df) % 4:
    df_4D = df_4D[:-1]
if len(df) % 5:
    df_5D = df_5D[:-1]
if len(df) % 6:
    df_6D = df_6D[:-1]
if len(df) % 7:
    df_W = df_W[:-1]

# plot the data
df_D.plot()
df_2D.plot()
df_3D.plot()
df_4D.plot()
df_5D.plot()
df_6D.plot()
df_W.plot()