In [1]:
import os
import datetime
import pandas as pd
import numpy as np

from bokeh.plotting import figure, show, output_file
from bokeh.embed import autoload_static
from bokeh.resources import CDN
from bokeh.models import (ColumnDataSource,
                          DatetimeTickFormatter,
                          LinearAxis,
                          Range1d, 
                          NumeralTickFormatter,
                          HoverTool,
                          Legend,
                          LegendItem,
                          Label)


In [216]:
# set the filepath 
username = os.getlogin()

# read in data
df = pd.read_stata(os.path.join("/scratch", f"{username}", "agmark_total_ts.dta"))

# merge in date index to ensure there is an entry for every date
date_index = pd.DataFrame(pd.date_range('2018-01-01', periods=1096, freq='D'), columns=["date"])
df = date_index.merge(df, left_on="date", right_on="date", how="left")

# fill missing values with the average of the values before and after
df['qty'] = df['qty'].fillna((df['qty'].shift() + df['qty'].shift(-1))/2)

# define annualized date variable
df['month'] = df['date'].dt.month.astype(str)
df['day'] = df['date'].dt.day.astype(str)
df["month-day"] = df["month"] + "-" + df["day"] + "-" + "2016"
df["annualized"] = df["month-day"].apply(lambda x: datetime.datetime.strptime(x, "%m-%d-%Y"))

# take 3-day rolling average
zero_index = df[df["date"]>datetime.datetime(2020,5,17)].index
df['qty_rolling'] = df['qty'].rolling(window=3).mean()
df.loc[zero_index, "qty_rolling"] = 0

# set index to be the date
df.index = df["date"]
df.index.name = 'Date'
df.sort_index(inplace=True)

# extract data for each year
df_2018 = pd.DataFrame(df.loc[df['year'] == 2018].set_index("annualized")['qty_rolling'])
df_2018 = df_2018.rename(columns={"qty_rolling": "qty_2018"})

df_2019 = pd.DataFrame(df.loc[df['year'] == 2019].set_index("annualized")['qty_rolling'])
df_2019 = df_2019.rename(columns={"qty_rolling": "qty_2019"})
data = df.loc[df['year'] == 2020].set_index("annualized")
data = data.rename(columns={"qty_rolling": "qty_2020"})

# merge all years to stack the data
data = data.merge(df_2018, left_index=True, right_index=True, how="outer")
data = data.merge(df_2019, left_index=True, right_index=True, how="outer")

# calculate the percentage change between 2019 and 2020
data['perc_change_qty'] = (data['qty_2020'] - data['qty_2019']) / data['qty_2019']

# create the date as a string variable
data["date_str"] = data.index
data["date_str"] = data["date_str"].apply(lambda x: x.strftime("%b %d"))

# only replace data with nan if it is outside the reporting window
outside_range_index = data[data["date"]>datetime.datetime(2020,5,17)].index
data.loc[outside_range_index, "qty_2020"] = np.nan

In [217]:
# define bokeh-source data
source = ColumnDataSource(data)

# define tools
TOOLS = "pan,box_zoom,reset,lasso_select,save,box_select,xzoom_in,crosshair"

# define bokey figure
p = figure(tools=TOOLS,
           x_axis_type="datetime",
           plot_width=1000,
           plot_height=600,
           toolbar_location="right",
           y_axis_label="India-Wide Food Arrivals (Tonnes)",
           y_range=(0, 2000000),
           #y_axis_label="Number of Livestock",
           #y_range=(0, 3000),
          )

# set the second y axis range name and range
yend = np.max(data['cases'])+1500
p.extra_y_ranges = {"cases": Range1d(start=0, end=yend)}

# add the second axis to the plot.  
p.add_layout(LinearAxis(y_range_name="cases", axis_label="Number of New Cases"), 'right')


# plot new covid cases on second y axis
bar = p.vbar(x='annualized',
       top='cases',
       width=60*60*1000*24*0.75,
       alpha=0.65,
       color="#ffbb00",
       source=source,
       y_range_name="cases")

# plot the quantity data for each year
l18 = p.line('annualized', 'qty_2018', source=source, color="#077300", line_width=1)
l19 = p.line('annualized', 'qty_2019', source=source, color="#050063", line_width=1)
l20 = p.line('annualized', 'qty_2020', source=source, color="red", line_width=2.5)

# set axes parameters
p.x_range.start = datetime.datetime(2016,1,1)
xend = datetime.datetime(2016,6, 4)
p.x_range.end = xend
p.xaxis.formatter=DatetimeTickFormatter(months="%B")
p.yaxis.formatter=NumeralTickFormatter(format="0,0")
p.xaxis.axis_label = 'Date'

#mytext = Label(x=datetime.datetime(2016,3,23),y=yend*0.765,
#               text='Lockdown Begins', y_range_name="cases", angle=np.pi/2, text_color="black")

#p.add_layout(mytext)

boxwidth = 500

# phase 1
p.quad(top=[yend],
       bottom=[yend-boxwidth],
       left=[datetime.datetime(2016,3,24)],
       right=[datetime.datetime(2016,4,14)],
       y_range_name="cases", color="#154360")
t1 = Label(x=datetime.datetime(2016,4,3),y=yend-(boxwidth*0.9), text='1', y_range_name="cases",
           text_color="white")
p.add_layout(t1)
p.line([datetime.datetime(2016,3,24), datetime.datetime(2016,3,24)], [0, yend],
       line_width=1.5, color="#154360", y_range_name="cases", line_dash="dotdash")
t11 = Label(x=datetime.datetime(2016,3,24),y=yend*0.88,
               text='Mar-24', y_range_name="cases", angle=np.pi/2, text_color="#154360",  text_font_style="italic",  text_font_size='8pt')
p.add_layout(t11)

# phase 2
p.quad(top=[yend],
       bottom=[yend-boxwidth],
       left=[datetime.datetime(2016,4,14)],
       right=[datetime.datetime(2016,5,3)],
       y_range_name="cases", color="#1F618D")
t2 = Label(x=datetime.datetime(2016,4,23),y=yend-(boxwidth*0.9), text='2', y_range_name="cases",
           text_color="white")
p.add_layout(t2)
p.line([datetime.datetime(2016,4,14), datetime.datetime(2016,4,14)], [0, yend],
       line_width=1.5, color="#1F618D", y_range_name="cases", line_dash="dotdash")
t21 = Label(x=datetime.datetime(2016,4,14),y=yend*0.885,
               text='Apr-14', y_range_name="cases", angle=np.pi/2, text_color="#1F618D",  text_font_style="italic",  text_font_size='8pt')
p.add_layout(t21)

# phase 3
p.quad(top=[yend],
       bottom=[yend-boxwidth],
       left=[datetime.datetime(2016,5,3)],
       right=[datetime.datetime(2016,5,17)],
       y_range_name="cases", color="#2980B9", alpha=0.95)
t3 = Label(x=datetime.datetime(2016,5,9),y=yend-(boxwidth*0.9), text='3', y_range_name="cases",
           text_color="white")
p.add_layout(t3)
p.line([datetime.datetime(2016,5,3), datetime.datetime(2016,5,3)], [0, yend],
       line_width=1.5, color="#2980B9", y_range_name="cases", line_dash="dotdash")
t31 = Label(x=datetime.datetime(2016,5,3),y=yend*0.88,
               text='May-05', y_range_name="cases", angle=np.pi/2, text_color="#2980B9",  text_font_style="italic",  text_font_size='8pt')
p.add_layout(t31)

# phase 4
p.quad(top=[yend],
       bottom=[yend-boxwidth],
       left=[datetime.datetime(2016,5,17)],
       right=[datetime.datetime(2016,5,31)],
       y_range_name="cases", color="#7FB3D5")
t4 = Label(x=datetime.datetime(2016,5,23),y=yend-(boxwidth*0.9), text='4', y_range_name="cases",
           text_color="white")
p.add_layout(t4)
p.line([datetime.datetime(2016,5,17), datetime.datetime(2016,5,17)], [0, yend],
       line_width=1.5, color="#7FB3D5", y_range_name="cases", line_dash="dotdash")
t41 = Label(x=datetime.datetime(2016,5,17),y=yend*0.88,
               text='May-17', y_range_name="cases", angle=np.pi/2, text_color="#7FB3D5",  text_font_style="italic",  text_font_size='8pt')
p.add_layout(t41)

# phase 5
p.quad(top=[yend],
       bottom=[yend-boxwidth],
       left=[datetime.datetime(2016,5,31)],
       right=[xend],
       y_range_name="cases", color="#A9CCE3")
t5 = Label(x=datetime.datetime(2016,6,1),y=yend-(boxwidth*0.9), text='5', y_range_name="cases",
           text_color="white")
p.add_layout(t5)
p.line([datetime.datetime(2016,5,31), datetime.datetime(2016,5,31)], [0, yend],
       line_width=1.5, color="#A9CCE3", y_range_name="cases", line_dash="dotdash")
t41 = Label(x=datetime.datetime(2016,5,31),y=yend*0.88,
               text='May-31', y_range_name="cases", angle=np.pi/2, text_color="#A9CCE3",  text_font_style="italic",  text_font_size='8pt')
p.add_layout(t41)


t6 = Label(x=datetime.datetime(2016,2,20),y=yend-(boxwidth*0.8), text='Pandemic Lockdown Phase:',
           y_range_name="cases", text_color="#154360", text_font_style="italic",  text_font_size='10pt')
p.add_layout(t6)

# define hover tip tool
hover1 = HoverTool(renderers=[l20, bar],
    tooltips = [
        ("New Cases", "@cases{0}"),
        ("Percent Quantity Change, 2019 to 2020", "@perc_change_qty{:.2%}"),
        ("Date", "@date_str")],
)
p.add_tools(hover1)

# create legend
li1 = LegendItem(label='New Covid-19 Cases', renderers=[p.renderers[0]])
li2 = LegendItem(label='2018 Market Quantity', renderers=[p.renderers[1]])
li3 = LegendItem(label='2019 Market Quantity', renderers=[p.renderers[2]])
li4 = LegendItem(label='2020 Market Quantity', renderers=[p.renderers[3]])
legend1 = Legend(items=[li1, li2, li3, li4], location='top_left')
p.add_layout(legend1)

#write out the code to embed the figure
js, tag = autoload_static(p, CDN, 'market-volumes.js')
with open('../assets/market-volumes.js', 'w') as f:
    f.write(js)
with open("../assets/market-volumes.html", "w") as file:
    file.write(tag)
            
# output html
output_file("/dartfs-hpc/rc/home/y/f00473y/public_html/png/ts.html")
#output_file("/dartfs-hpc/rc/home/y/f00473y/public_html/png/ts_livestock.html")
show(p)