In [2]:
from os import listdir
import glob
import pandas as pd
import numpy as np

In [3]:
filepaths = glob.glob("../data/tidy/acetaminophen/*_events_*.csv")
df = pd.concat(map(pd.read_csv, filepaths))
df["CLINICAL_EVENT_DATETIME"] = pd.to_datetime(df["CLINICAL_EVENT_DATETIME"])
df.index = df["CLINICAL_EVENT_DATETIME"]

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


In [60]:
df_monthly = df.resample("MS").count()[["EVENT_ID"]]
df_monthly.columns = ["Actual"]

In [61]:
from statsmodels.tsa.arima_model import ARIMA, ARIMAResults
from pmdarima import auto_arima

In [62]:
auto_mod = auto_arima(
    df_monthly["Actual"], 
    suppress_warnings=True,
    seasonal=False,
    stepwise=False,
    error_action="ignore"
)

In [63]:
model_arima = ARIMA(df_monthly["Actual"], order=auto_mod.order)
fit_arima = model_arima.fit()

In [64]:
n_pred = 12
fc, se, conf = fit_arima.forecast(n_pred, alpha=0.05)  # 95% conf
idx = pd.date_range(df_monthly.index[-1] + 1, periods = n_pred, freq="MS")
fc_series = pd.Series(fc, index=idx)
lower_series = pd.Series(conf[:, 0], index=idx)
upper_series = pd.Series(conf[:, 1], index=idx)

  This is separate from the ipykernel package so we can avoid doing imports until


In [142]:
df_forecast = pd.DataFrame(fc.round(0), index=idx, columns=["Forecast"])
df_forecast.index.name = "CLINICAL_EVENT_DATETIME"

In [167]:
df_combined = pd.concat([df_monthly, df_forecast]).replace({pd.np.nan: None})

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [168]:
from pptx import Presentation 
from pptx.chart.data import CategoryChartData
from pptx.dml.color import RGBColor
from pptx.enum.chart import XL_CHART_TYPE, XL_LEGEND_POSITION, XL_DATA_LABEL_POSITION, XL_MARKER_STYLE, XL_TICK_MARK
from pptx.enum.dml import MSO_LINE_DASH_STYLE
from pptx.util import Inches, Pt

In [169]:
prs = Presentation()
# title slide
title_slide_layout = prs.slide_layouts[0]
slide = prs.slides.add_slide(title_slide_layout)
title = slide.shapes.title
subtitle = slide.placeholders[1]

title.text = "Test Forecast Slides"
subtitle.text = "Updated: Some Date"

In [170]:
# forecast slide
blank_slide_layout = prs.slide_layouts[6]
slide = prs.slides.add_slide(blank_slide_layout)

chart_data = CategoryChartData()
chart_data.categories = df_combined.index
chart_data.add_series("Actual", df_combined["Actual"])
chart_data.add_series("Forecast", df_combined["Forecast"])

<pptx.chart.data.CategorySeriesData at 0x7f03d427d668>

In [171]:
# format graph
x, y, cx, cy = Inches(1), Inches(1), Inches(8), Inches(6)
chart = slide.shapes.add_chart(XL_CHART_TYPE.LINE, x, y, cx, cy, chart_data).chart

category_axis = chart.category_axis
category_axis.has_title = True
category_axis.axis_title.text_frame.text = "Month"

value_axis = chart.value_axis
value_axis.has_major_gridlines = False
value_axis.has_minor_gridlines = False

chart.has_legend = True
chart.legend.include_in_layout = False
chart.legend.position = XL_LEGEND_POSITION.TOP

# chart.series[0].smooth = True

chart.series[1].format.line.dash_style = MSO_LINE_DASH_STYLE.DASH
chart.series[1].format.line.width = Pt(1.5)

i = len(chart.series[-1].points) - 1
chart.series[-1].points[i]

chart.series[-1].points[i].marker.style = XL_MARKER_STYLE.CIRCLE
chart.series[-1].points[i].marker.format.fill.solid()
chart.series[-1].points[i].marker.format.fill.fore_color.rgb = RGBColor.from_string("FFFFFF")
chart.series[-1].points[i].marker.format.line.width = Pt(1.5)
chart.series[-1].points[i].data_label.has_text_frame = True
chart.series[-1].points[i].data_label.position = XL_DATA_LABEL_POSITION.BELOW
chart.series[-1].points[i].data_label.text_frame.text = str(round(chart.series[-1].values[-1], None))

In [172]:
prs.save('../doc/forecast.pptx')