# Interactive graphics

In [1]:
import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:

from helper_save_load import load_from_pickle

df_a, df_f, df_v = load_from_pickle("data/dataframes_Dollars.pickle")
df_c = load_from_pickle("data/Labeled_comments.pkl")
pos_with_brands = load_from_pickle("data/pos_with_brands.pickle")

ignored_brands = ["57-CAMAY BAR SOAP Brand",
"IGNORE",
"71-LBP SH/CD Roll-up Brand",
"72-LBP SHOWER Roll-up Brand"]

df_c = df_c[~df_c["Brand_1"].isin(ignored_brands)]

In [None]:
comme_type = ['Promo' if row['Promo'] == 1 else 
              'Phasing' if row['Phasing'] == 1 else 
              'SP&D' if row['SP&D'] == 1 else 
              'POS' if row['POS'] == 1 else
              'Empty' if pd.isna(row['Commentaries']) else 'Unknown' 
              for i,row in df_c.iterrows()]
df_c['Comment_type'] = comme_type
df_c[["Month","Commentaries", "Brand_1","Comment_type"]].head()

In [4]:
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta
def convert_date(mon):
    y,m = mon.split("/")
    date1 = date(int(y), int(m), 1)
    return date1.strftime("%b_%Y")

In [5]:
df_c["Month"] = df_c["Month"].apply(convert_date)

In [None]:
df_c.head()

In [7]:
#using interactivity
from ipywidgets import interact_manual, SelectionRangeSlider
import numpy as np
import ipywidgets
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
v = df_v.groupby(["Brand","Territory"]).sum().loc["JK-DOVE HS-SA Brand"] / 1e6
v["Aug_2017"]

In [None]:
v_b = df_v.groupby(["Brand"]).sum().loc["JK-DOVE HS-SA Brand"] / 1e6
v_b

In [10]:
def group_by_TR_BR(territory, brand):
    v = df_v.groupby(["Territory", "Brand"]).sum().loc[territory].loc[brand]
    a = df_a.groupby(["Territory", "Brand"]).sum().loc[territory].loc[brand]
    f = df_f.groupby(["Territory", "Brand"]).sum().loc[territory].loc[brand]
    a.index = v.index
    f.index = v.index
    return v, a, f

In [11]:
def set_graph(ax, vx, vy, thresholds):
    xticks = ax.get_xticklabels()
    plt.setp(xticks, rotation=90, fontsize=18)
    yticks = ax.get_yticklabels()
    plt.setp(yticks, fontsize=18)
    ymin = min(-0.22, np.min(vy)*1.1)
    ymax = max(0.22, np.max(vy)*1.1)
    ax.set(xlim=[0, len(vx)-1])  #, title=variance.name
    ax.set(ylim=[ymin, ymax])
    ax.set_xlabel('Months', fontsize=28)
    ax.set_ylabel('Variance ($M)', fontsize=28)
    ax.grid(axis='x')
    ax.hlines(0, 0, len(vx)-1)
    
    #variances thresholds
    if thresholds:
        ax.hlines(0.2, 0, len(vx)-1, colors='r', linestyles='dashed')
        ax.hlines(-0.2, 0, len(vx)-1, colors='r', linestyles='dashed')

In [12]:
def plot_variance(variance, ax):
    bar_width = 0.95
    x = variance.index
    y = variance.values/1e6
    set_graph(ax, x, y, True)
#     ax.set_title()
    ax.bar(x, variance.values/1e6, width=bar_width, align='edge', alpha=0.7)

#     ax.plot(variance)   

In [13]:
def plot_variance_all(v, ax):
    last_h = [0] * len(v.columns)
    set_graph(ax, v.columns, v.values, True)
    ymin=0
    ymax=0
    for terr in v.index:
        ax.bar(v.loc[terr].index, v.loc[terr].values, bottom=last_h, label=terr, align='edge')
        last_h = last_h + v.loc[terr].values
        ymin = min(ymin, np.min(last_h))
        ymax = max(ymax, np.max(last_h))

        
    ymin = min(-0.22, ymin*1.1)
    ymax = max(0.22, ymax*1.1)        
    ax.set(ylim=[ymin, ymax])

In [14]:
def plot_variance_sum_all(v, ax):
    set_graph(ax, v.index, v.values, True)
    ax.bar(v.index, v.values, align='edge')
    

In [15]:
def print_comments(brand):
    c_filter = (df_c["Brand_1"]==brand) & (df_c["Comment_type"]!="Empty")
    comments = df_c[c_filter][["Month","Commentaries", "Comment_type"]]
    display(comments)

def plot_comments(brand, months, ax):
    c_filter = (df_c["Brand_1"]==brand) & (df_c["Comment_type"]!="Empty")
    comments = df_c[c_filter][["Month","Commentaries", "Comment_type"]]
    
    comments_pivot = comments.pivot_table(values='Commentaries', index='Month', columns ='Comment_type', aggfunc=lambda x: len(x.unique()))
    df1 = pd.DataFrame(data=[0]*len(months), index=months, columns=["dummy"])
    df2 = pd.concat([comments_pivot, df1], axis=1, join_axes=[df1.index])
    df2.fillna(0, inplace=True)
    df2.drop(columns=['dummy'], inplace=True)
    if (df2.empty==False):
        ax = df2.plot.bar(ax=ax, align='edge')
        set_graph(ax, df2.index, [0,1,2,3] , False)
        ax.set_ylabel('Commentaries', fontsize=28)


In [41]:
WM_labels = df_c[df_c['WM'] == 1][["Month", "Brand_1", "Commentaries", "Promo", "Phasing", "SP&D", "POS"]].fillna(0)
WM_labels["NoClass"] = ~WM_labels[["Promo", "Phasing", "SP&D", "POS"]].any(axis=1)
#WM_labels["Month"] = pd.to_datetime(WM_labels["Month"], format='%b_%Y')

In [None]:
v, a, f = group_by_TR_BR("M7400 - WALMART", "05-AXE SA Brand")
v

In [None]:
v

In [16]:
def plot_graph(brand, period, date_range, 
               POS_plot=True, 
               territory_v_plot=True,
               all_territory_v_plot=False, 
               sum_v_plot=False,
               commentary_plot=True):
    
    print(brand)
    
    num_plots = sum([POS_plot, territory_v_plot, all_territory_v_plot, sum_v_plot, commentary_plot])
    
    #Filter dates

    StartDate = pd.to_datetime(date_range[0], format='%b_%Y')
    EndDate = pd.to_datetime(date_range[1], format='%b_%Y')

    StartDate_f = StartDate.strftime("%Y-%m-%d")
    EndDate_f = EndDate.strftime("%Y-%m-%d")

    months = pd.date_range(StartDate, EndDate, freq='MS').strftime("%b_%Y")
    
    v, a, f = group_by_TR_BR("M7400 - WALMART", brand)
    v = v[months]
 
    ### Creates figure
    fig, axes = plt.subplots(nrows=num_plots, ncols=1, figsize=(30, 8*num_plots), squeeze=False)
    axis_num = 0

    ###
    plt.rcParams.update({'font.size': 22})
    
    if POS_plot:
        # Filtering Walmart POS data
        Pos_Date_Filtered = pos_with_brands[(pos_with_brands['POSDate'] >= StartDate_f) & \
                                        (pos_with_brands['POSDate'] < EndDate_f)]
        
        group_by_day = Pos_Date_Filtered.groupby([Pos_Date_Filtered["POSDate"].dt.to_period(period), "Brand"]).\
               agg({"TotalSales":"sum", "TotalQty":"sum"}).reset_index()
        group_by_day["MonthlyUnitPrice"] = group_by_day["TotalSales"]/group_by_day["TotalQty"]
        to_plot = group_by_day[group_by_day["Brand"] == brand]
        
        # Plotting POS data
        to_plot.plot(x="POSDate", y="TotalQty", linewidth=2, label="Sales (Qty)", ax=axes[axis_num, 0])
        ax = axes[axis_num, 0]
        ax2 = to_plot.plot(x="POSDate", y="MonthlyUnitPrice", secondary_y=True, linewidth=2, 
                       label="Unit Price", ax=axes[axis_num, 0] )
        axis_num += 1
        ax.set_ylabel("Sales", fontsize=28, labelpad=20)
        ax2.set_ylabel("Average unit price of brand ($)", fontsize=28, labelpad=40 ,rotation=-90)
        ax.set_xlabel("Date", fontsize=28)

        ax.set_title(brand, fontsize=30)
        ax.grid(axis='x')
    
        xticks = ax.get_xticklabels()
        plt.setp(xticks, rotation=90, fontsize=18)
        yticks = ax.get_yticklabels()
        plt.setp(yticks, fontsize=18)
        
        #TODO: annotate POS_plot with WM comments
        # Add xtick labels
    
        from matplotlib.ticker import MultipleLocator, FormatStrFormatter
        import matplotlib.dates as dates

        ax.xaxis.set_major_locator(dates.MonthLocator(bymonth=None, bymonthday=1, interval=1))
    
    if territory_v_plot:
        plot_variance(v, axes[axis_num, 0])
        axis_num += 1
    
    if all_territory_v_plot:
        v_all = df_v.groupby(["Brand","Territory"]).sum().loc[brand] / 1e6
        v_all = v_all[months]

        plot_variance_all(v_all, axes[axis_num, 0])
        axis_num += 1
    
    if sum_v_plot:
        v_b = df_v.groupby(["Brand"]).sum().loc[brand] / 1e6
        v_b = v_b[months]
        plot_variance_sum_all(v_b, axes[axis_num, 0])
        axis_num +=1
        
    if (commentary_plot):
        plot_comments(brand, months, axes[axis_num, 0])
        axis_num += 1
    plt.show()
    print_comments(brand)

In [None]:
WM_labels[WM_labels["Brand_1"] == "05-AXE SA Brand"]

In [17]:
brands_sorted = pos_with_brands['Brand'].sort_values().unique()

months = df_v.columns[8:]
index = (0, len(months)-1)
date_range = SelectionRangeSlider(
                options=months,
                index=index,
                description='Date Range',
                layout={'width': '500px'})

interact_manual(plot_graph,brand=brands_sorted, period=['D','W','M'], date_range=date_range)

interactive(children=(Dropdown(description='brand', options=('00-DEODORANT MIXED BRAND Brand', '05-AXE SA Bran…

<function __main__.plot_graph(brand, period, date_range, POS_plot=True, territory_v_plot=True, all_territory_v_plot=False, sum_v_plot=False, commentary_plot=True)>

# Plotting heatmap

In [27]:
pos_grouped_brands = pos_with_brands.groupby([pos_with_brands["POSDate"].dt.to_period("W"), "Brand"]).agg(
                                            {"TotalSales":"sum", "TotalQty":"sum"}).reset_index()

In [28]:
selected_brands = pos_with_brands['Brand'].unique()[0:20]

In [None]:
to_plot = pos_grouped_brands[pos_grouped_brands["Brand"].isin(selected_brands)]
display(to_plot.head())
to_plot_pivot = to_plot.pivot(index='POSDate', columns='Brand', values='TotalQty')
#rescaling between 0 and 1
to_plot_pivot[to_plot_pivot.columns[:len(selected_brands)]] /= to_plot_pivot[to_plot_pivot.columns[:len(selected_brands)]].max()
display(to_plot_pivot.head())


In [31]:
to_plot_pivot.columns[:len(selected_brands)]

Index(['18-DOVE BW MENS+CARE Brand', '19-DOVE BAR MENS+CARE Brand',
       '1C-PERSONAL CARE MIXED BRAND Brand', '3K-SIMPLE FACE Brand',
       '4B-TRESEMME HS-SA Brand', '4C-TRESEMME SH-CD Brand',
       '4D-VO5 HS-SA Brand', '4H-AXE FACE Brand',
       '4I-DOVE MENS CARE FACE Brand', '4W-DEGREE MENS AP/DEO Brand',
       '56-KNORR SOUPS Brand', 'IX-DEG WOMENS AP-DEO Brand',
       'JH-DOVE WOMENS AP-DEO Brand', 'JJ-AXE DEO Brand',
       'JL-DOVE SH-CD Brand', 'KJ-DOVE BODY WASH Brand', 'KQ-LEVER 2000 Brand',
       'LL-DOVE BAR Brand', 'LW-LEVER 2000 BW Brand',
       'LY-AXE BODY WASH Brand'],
      dtype='object', name='Brand')

In [None]:
import seaborn as sns
plt.figure(figsize=(10,15))
ax = sns.heatmap(to_plot_pivot.T, cmap="YlGnBu")
ax.tick_params(labelsize=12)
plt.title("Sales per week")