# QBR Report Tool

Author: Farid Javadnejad\
Latest Update: 1/13/2023

### Import Required Libraries

In [None]:
# Import NumPy, Pandas, PyPlot SeaBorn libs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as tick
import seaborn as sns

# Import Google Colab filer upload libs
import io
from google.colab import files
from google.colab import drive

# Import Google Sheet access authentication libs
from google.colab import auth
auth.authenticate_user()
import gspread
from google.auth import default
creds, _ = default()

#Files & folders
import shutil
import os

#time
from datetime import datetime
from pandas.tseries.offsets import MonthEnd

#Excel file handling
!pip install xlsxwriter
import xlsxwriter

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xlsxwriter
  Downloading XlsxWriter-3.0.7-py3-none-any.whl (152 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.8/152.8 KB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.0.7


Global: Colors & Font Settings

In [None]:
#Define global variables
#Colors
global COLOR_SET_google
COLOR_SET_google = ['#4486F4', '#51B457', '#F3C302', '#E44B31']

#Set default font size
global FONT_SIZE
FONT_SIZE = "13"

global DIRECTORY

global PALLETE
PALLETE = ['viridis', 'magma', 'plasma', 'tab20', 'tab10', 'paired']


## Custom Functions 

### Color Functions

f: Color Picker Function

In [None]:
def gradient_color_picker(i,n, pallete = 'PALLETE'):

    magma = ['#000004', '#120d31', '#331067', '#59157e', '#7e2482', '#a3307e', '#c83e73', '#e95462', '#fa7d5e',' #fea973', '#fed395', '#fcfdbf']

    tab20 = ['#1f77b4', '#17becf', '#ff7f0e', '#ffbb78', '#2ca02c', '#98df8a', '#d62728', '#ff9896', '#9467bd', '#c5b0d5', '#8c564b',
           '#c49c94', '#e377c2', '#f7b6d2', '#7f7f7f', '#c7c7c7', '#bcbd22', '#dbdb8d', '#6baed6', '#9edae5']

    tab10 = ['#1f77b4','#ff7f0e','#2ca02c','#d62728','#9467bd','#8c564b','#e377c2','#7f7f7f','#bcbd22','#17becf']

    viridis = ['#440154', '#481b6d', '#46327e', '#3f4788', '#365c8d', '#2e6e8e', '#277f8e', '#21918c',
             '#1fa187', '#2db27d', '#4ac16d', '#73d056', '#a0da39', '#d0e11c', '#fde725']

    plasma = ['#0d0887', '#350498', '#5302a3', '#6f00a8', '#8b0aa5', '#a31e9a', '#b83289', '#cc4778', '#db5c68',
          '#e97158', '#f48849', '#fba238', '#febd2a', '#fada24', '#f0f921']

    paired =['#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99', '#e31a1c',
           '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a', '#ffff99', '#b15928']


    if pallete == 'viridis':
        pallete = viridis
    elif pallete == 'plasma':
        pallete = plasma
    elif pallete == 'tab20':
        pallete = tab20
    elif pallete == 'tab10':
        pallete = tab10
    else:
        pallete = magma 

    sizeOfList = len(pallete) - 1
    
    if n < 0 or n > sizeOfList:
        raise ValueError #The accepted values are between 0 - {sizeOfList}

    if i > n:
        raise ValueError #i can not be larget than {sizeOfList}

    idx = sizeOfList*i//n

    return pallete[idx]

### Reading Functions

Generate Keyword from List

In [None]:
def keyword_from_list(li: list):
    ''' Gets a list of strings and returns one string as keyword'''

    str_list = filter(lambda item: item !='', li)  #remove empty items from list
    key_words ='|'.join(str_list)  #keyword Generator
    return key_words

Goolge Workbook to DataFrame

In [None]:
def workbook_to_dataframe(work_book, work_sheet):
    '''Gets a GoogleSheet workbook and worksheet name oand retrun a dataframe'''

    ws = work_book.worksheet(work_sheet) #read the worksheet
    rows = ws.get_all_values() #get_all_values gives a list of rows
    df = pd.DataFrame.from_records(rows) #create a dataframe from all values
    df.replace('', np.nan) #data cleanup
    return df


f: Read GoogleSheet by URL

In [None]:
def read_gs_by_url(url: str):
    ''' Read a workbook by url '''

    gc = gspread.authorize(creds)
    wb = gc.open_by_url(url)
    return wb

f: Cast df colum to a dictionry

In [None]:
def column_to_dict(column, output = True):
    ''' Cast dataframe columnto a dictionry '''  
    dct = {}
    i = 0
    for item in column:
    if item != '':    
        dct[i]= item
        i +=1
    if output:
    for a,b in dct.items():
        print(a, ':', b)
    return dct


### Pre-processing Functions

f: Replace df header with 1st row

In [None]:
def dataframe_header(df: pd.DataFrame):
    ''' Python Pandas Replacing Header with Top Row '''
    df_header = df.iloc[0] #grab the first row for the header
    df = df[1:] #take the data less the header row
    df.columns = df_header #set the header row as the df header
    return df

f: Get User Input (Integer)

In [None]:
def get_user_input_int(lower: int, upper: int):
    '''User input to select the product'''

    while True:
        try:
            val = int(input('\nEnter an integer value: '))
            if val < lower or val > upper:
                raise ValueError #this will send it to the print message and back to the input option
                break
        except ValueError:
            print(f"INVALID: The number must be between ({lower} - {upper}).")
    return val


f: Add Q, Year, m_in_Q

In [None]:
def add_q_year_month(df):
    #Delete rows without valid 'MM_YY' data
    df = df[df['MM_YY'].str.get(0).isin(['0'])==True]

    #Define Quarter column
    df['Q'] = (df['MM_YY'].str[1:3].astype(float)/3).apply(np.ceil).astype(int)
    #Define Year column
    df['Year'] = df['MM_YY'].str[-4:].astype(int)
    df['m_in_Q'] = (df['MM_YY'].str[1:3].astype(int))-df['Q']*3 + 3

    #sort by Year, Q, m_in_Q
    df = df.sort_values(by=['Year', 'Q', 'm_in_Q'])

    return df

f: QTD, PreQTD, LYQTD Encoding

In [None]:
def qtd_encoder(df: pd.DataFrame):
    '''This function add QTD, PreQTD, LYQTD Encoding for the sales report dataframe'''

    df_encoded = df.copy()

    #Create a new colum to classify QTD
    df_encoded['QTD_Encoding'] = 'O'

    #get the current Year, Quarter, and month in Quarter
    current_q = df['Q'].iloc[-1]
    current_yr = df['Year'].iloc[-1]
    current_m_in_Q = df['m_in_Q'].iloc[-1]
    print(f'Current qaurter is:\nQ = {current_q}\nYear = {current_yr}\nm_in_Q = {current_m_in_Q}\n')

    #create a filter for current quarter-to-date (QTD)
    qtd_filter = (df['Q'] == current_q) & (df['Year'] == current_yr)

    ################
    #print(qtd_filter[:10])

    #create a filter for previous quarter-to-date (PreQTD)
    preqtd_filter = (df['Q'] == current_q - 1) & (df['Year'] == current_yr) & (df['m_in_Q'] <= current_m_in_Q)

    if current_q == 1:
    preqtd_filter = (df['Q'] == 4) & (df['Year'] == (current_yr -1)) & (df['m_in_Q'] <= current_m_in_Q)


    ################
    #print(preqtd_filter[:10])

    #create a filter for last year quarter-to-date (LYQTD)
    lyqtd_filter = (df['Q'] == current_q) & (df['Year'] == (current_yr-1)) & (df['m_in_Q'] <= (current_m_in_Q))

    ################
    #print(lyqtd_filter[:10])

    df_encoded.QTD_Encoding[qtd_filter] = 'QTD'
    df_encoded.QTD_Encoding[preqtd_filter] = 'PreQTD'
    df_encoded.QTD_Encoding[lyqtd_filter] = 'LYQTD'

    return df_encoded

### Calculations Functions

f: Calcualte MAT

In [None]:
def calculate_mat(df: pd.DataFrame, product_name:str):
    '''This function gets a sales dataframe that includes QTD Encoding and calcuates MAT df '''
    #Aggregate Sales & Billing values
    group_data = df.groupby(['MM_YY', 'Q','Year', 'm_in_Q', 'QTD_Encoding'],as_index=False)['Sales', 'Billing'].sum()

    #sory by Year, Q, month in Q
    group_data = group_data.sort_values(by=['Year', 'Q', 'm_in_Q']).reset_index()
    group_data['Billing_MAT'] = group_data.Billing.rolling(window=12).sum()
    group_data['Sales_MAT'] = group_data.Sales.rolling(window=12).sum()

    # 12 month period for MAT
    group_data['Start_Period'] = group_data.MM_YY.shift(periods=11)

    # Create period labels
    group_data['Period'] = group_data['Start_Period'].str[1:] + ' - ' + group_data['MM_YY'].str[1:]

    #df_MAT['Period'].astype(str)
    group_data['Billing_MAT'].astype(float)
    group_data['Sales_MAT'].astype(float)
    group_data['YYYYMM'] = group_data['MM_YY'].astype(str).str[4:] + group_data['MM_YY'].astype(str).str[1:3]
    group_data['Date'] = pd.to_datetime(group_data['YYYYMM'], format='%Y%m', errors='coerce') + MonthEnd(1)
    group_data['Product'] = product_name

    #Subset the dataframe
    df_MAT = group_data[['Product', 'Date', 'Sales', 'Billing', 'Period', 'Billing_MAT', 'Sales_MAT', 'QTD_Encoding']]

    return df_MAT

In [None]:
def calculate_mat_group(df: pd.DataFrame, mat_by = 'MAT_by'):

    ### TEST ####
    #df = filtered_df
    #######################

    MAT_by = ['GROUP', 'Region']

    df_MAT = calculate_mat(df, 'CD')

    all_GROUP = df[mat_by].unique()
    #Aggregate Sales & Billing values

    for group in all_GROUP:
    if group != 0:    
        df_GROUP = df[df[mat_by] == group]
        df_MAT_GROUP = calculate_mat(df_GROUP, group)  
        df_MAT = pd.concat([df_MAT, df_MAT_GROUP], axis=0)

    return df_MAT

f: Caclulate Quarter Sales Trend

In [None]:
def generate_q_sales_report(df: pd.DataFrame, product_name:str):
    ''' Calculate Quarte Sales Trend report '''

    filter_qtd_sales = (df['QTD_Encoding'] == 'QTD')
    filter_preqtd_sales = (df['QTD_Encoding'] == 'PreQTD')
    filter_lyqtd_sales = (df['QTD_Encoding'] == 'LYQTD')

    #Sum of Sales for Quarter to Date (QTD)
    QTD = (df[filter_qtd_sales].Sales.sum()).astype(float)

    #Sum of Sales for Last Year Quarter to Date (LYQTD)
    PreQTD = (df[filter_preqtd_sales].Sales.sum()).astype(float)

    #Sum of Sales for Last Year Quarter to Date (LYQTD)
    LYQTD = (df[filter_lyqtd_sales].Sales.sum()).astype(float)

    #  data of lists.
    q_report_data = {'Product':[product_name],
      'QTD(kEUR)':[QTD/1000],  #convert to kEUR
      'PreQTD(kEUR)':[PreQTD/1000], #convert to kEUR
      'LYQTD(kEUR)':[LYQTD/1000], #convert to kEUR
      'Rel_PreQTD(%)':[(100*QTD/PreQTD).astype(int)], #convert to %
      'Rel_LYQTD(%)':[(100*QTD/LYQTD).astype(int)] #convert to % 
      }

    # Create Q Report DataFrame
    q_report_df = pd.DataFrame(q_report_data)
    q_report_df = q_report_df.round({'QTD(kEUR)':1, 'PreQTD(kEUR)':1, 'LYQTD(kEUR)':1, 'Rel_PreQTD(%)':0,'Rel_LYQTD(%)':0} )

    return q_report_df

f: Clean-up & Rename

In [None]:
def cleanup_and_rename(df):
    #Set the Headers
    column_header =['PID', 'License', 'Region_ID', 'Region', 'MM_YY', 'Billing', 'Qty', 'Sales']
    df.columns = column_header


    #Filter world cummulative sales values and keep regions 
    world_sales_filter = df['Region'].str.contains("World GEO|Sales Reg HGS").fillna(False)
    df = df[~world_sales_filter.values]

    #Clean up Region name
    df['Region'] = df['Region'].str.replace('GSR', '')

    #Rename Region Names:
    df['Region'] = df['Region'].str.replace('US/CAN', 'US/CA')
    df['Region'] = df['Region'].str.replace('Europe', 'EU')
    df['Region'] = df['Region'].str.replace('Emerging', 'EM')


    #Delete rows without valid 'MM_YY' data
    df = df[df['MM_YY'].str.get(0).isin(['0'])==True]
    #fill NaN with 0
    df = df.fillna(0)

    #Convert to numeric values
    df['Billing'] = (df['Billing']).astype(float)
    df['Sales'] = (df['Sales']).astype(float)
    df['Qty'] = (df['Qty']).astype(int)

    return df

f: Calculate QTD sales by License

In [None]:
def generate_qtd_sales_by_license(df:pd.DataFrame, product_name: str):

    #limit the data for QTD sales
    by_license_filter = df['QTD_Encoding'] == 'QTD'
    df_qtd = df[by_license_filter]

    #Filters
    filter_permanent = df['EID_CODE'].str.contains('PERM')
    filter_subscription = df['EID_CODE'].str.contains('SUB')
    filter_ccp = df['EID_CODE'].str.contains('CCP')
    filter_support = df['EID_CODE'].str.contains('SUPPORT')
    filter_prorated = df['EID_CODE'].str.contains('PRO-RATED')
    filter_updates = df['EID_CODE'].str.contains('UPDATE')
    filter_other = df['EID_CODE'].str.contains('OTHER')

    #Apply filters
    permanent_sales = df_qtd.Sales[filter_permanent].sum()
    permanent_qty = df_qtd.Qty[filter_permanent].sum()

    subscription_sales = df_qtd.Sales[filter_subscription].sum() + df_qtd.Sales[filter_prorated].sum()
    subscription_qty = df_qtd.Qty[filter_subscription].sum() #Don't include pro-rated qty (unit in weeks)

    ccp_sales = (df_qtd.Sales[filter_ccp].sum() + df_qtd.Sales[filter_support].sum() + df_qtd.Sales[filter_updates].sum())
    qty_ccp = df_qtd.Qty[filter_ccp].sum() + (df_qtd.Qty[filter_support].sum() + df_qtd.Qty[filter_updates].sum())/2

    other_sales = df_qtd.Sales[filter_other].sum()
    other_qty = df_qtd.Qty[filter_other].sum()


    #  data of lists; convert to kEUR
    by_license_data = {'Product':[product_name, product_name],
          'Unit':['Sales (kEUR)', 'Seat (Qty)'],
          'Perpetual':[0.001*permanent_sales, permanent_qty],
          'Subscription':[0.001*subscription_sales, subscription_qty],
          'CCP':[0.001*ccp_sales, qty_ccp],
          'Other':[0.001*other_sales, other_qty]}

    by_license_df = pd.DataFrame(by_license_data)
    by_license_df.round(1)

    return by_license_df

f: Add missing regions 

In [None]:
def add_missing_regions(df):

    #create a list of all Regions
    list_all_regions = ['LATAM', 'EM EMEA', 'India', 'Nordics', 'Russia', 'ANZ', 'China', 'Asia', 'UK/BX', 'Central EU','South EU','US/CA']

    #create an empty DataFrame
    df_all_regions = pd.DataFrame(data=None, columns = df.columns)

    #add all region with 0 sales value to make sure all regions are shown in the plot
    for region in list_all_regions:
    if ~df['Region'].str.contains(region).any():
        region_dict = {'Region': region, 'MM_YY': '00', 'Sales': 0, 'EID_CODE': 'PERM', 'QTD_Encoding': 'QTD'}
        temporary_df = pd.DataFrame.from_dict([region_dict])
        df_all_regions = df_all_regions.append(temporary_df, ignore_index=True)

    #concat the df_region and df_all_regions 
    df_region_concat = pd.concat([df, df_all_regions], ignore_index=True)

    return df_region_concat

f: Add missing periods to MAT

In [None]:
def add_missing_month_MAT(df_new, df_ref):
  ######### TEST ########
    '''
    df_ref = temp_data_0
    df_new = df_MAT_new
    '''
    ########################

    df_merged = pd.merge(df_ref, df_new, suffixes = ('_ref', '_new'), on='Date')   

    #create an empty DataFrame
    df_all_new = pd.DataFrame(data=None, columns = df_ref.columns)


    df_all_new['Date'] = df_merged['Date']
    df_all_new['Sales'] = df_merged['Sales_new']
    df_all_new['Billing'] = df_merged['Billing_new']
    df_all_new['Period'] = df_merged['Period_ref']
    df_all_new['Billing_MAT'] = df_merged['Billing_MAT_new']
    df_all_new['Sales_MAT'] = df_merged['Sales_MAT_new']
    df_all_new['QTD_Encoding'] = df_merged['QTD_Encoding_new']
    df_all_new.loc[:,'Product'] = list(df_merged['Product_new'])[-1]

    #print(df_all_new.tail(3))

    return df_all_new

f: Calculate Sales by Region & Quarter

In [None]:
def calculate_sales_by_region_and_quarter(df:pd.DataFrame, product_name: str): 

    #filter to the QTD, PreQTD, LYQTD
    df_region = df[df['QTD_Encoding'] != 'O']

    #add missing regions
    df_all_regions = add_missing_regions(df_region)

    #Aggregate based on QTD Encoding
    df_region_agg = df_all_regions.groupby(['Region', 'QTD_Encoding'])['Sales'].agg('sum').reset_index()

    #sort by Sales
    df_region_agg = df_region_agg.sort_values(['Region', 'QTD_Encoding', 'Sales'], ascending= True)
    df_region_agg = df_region_agg[df_region_agg["Region"] != 0]

    df_region_agg.insert(0,'Product', product_name)

    return df_region_agg

f: Calculate Sales by Region & Lic

In [None]:
def calculate_sales_by_region_and_lic(df:pd.DataFrame, product_name: str): 

    #filter to the QTD, PreQTD, LYQTD
    df_region = df[df['QTD_Encoding'] == 'QTD']

    #add missing regions
    df_all_regions = add_missing_regions(df_region)

    #Aggregate based on 'EID_CODE'
    df_region_agg = df_all_regions.groupby(['Region', 'EID_GROUP'])['Sales'].agg('sum').reset_index()

    #sort by Sales
    df_region_agg = df_region_agg.sort_values(['Region', 'EID_GROUP', 'Sales'], ascending= True)
    df_region_agg.drop(df_region_agg[df_region_agg['Region'] == 0].index, inplace = True)

    #drop MISC & OTHER license types
    df_region_agg = df_region_agg[~df_region_agg['EID_GROUP'].str.contains('Miscsallanous')]

    df_region_agg.insert(0,'Product', product_name)

    return df_region_agg

### Plot Functions

f: Plot QTD Sales by Region

In [None]:
def plot_qtd_sales_region (df:pd.DataFrame, dir:str, product_name:str, display = True, pallete = 'PALLETE'):
  
    #### TEST

    dir = DIRECTORY 
    #df = temp_data_1
    #display = True
    #pallete = 'tab20'


    product_name = PRODUCT_GROUP

    plot_df = df[df["QTD_Encoding"] == 'QTD']
    plot_df.sort_values('Sales', ascending = False, inplace = True)

    #calucalte totale sales
    total_sales = plot_df['Sales'].sum()

    #calculte pecentage
    plot_df['Sales'] = 100*plot_df['Sales']/total_sales

    ### SETTINGS

    #Turn interactive plotting off
    if not display:
    plt.ioff()

    #plot & font size
    plt.figure(figsize=[11,7])
    plt.rcParams["font.size"] = FONT_SIZE


    #### DATA ###
    #get Sales row of the dataframe

    #plot values
    plot_labels = list(plot_df['Region']) 
    plot_sizes = list(plot_df['Sales'])
    pie_plot_labels = [f'{l}: {s:0.1f}%' for l, s in zip(plot_labels, plot_sizes)]  

    #### PLOT ####
    #Title
    plot_title_by_region = str(product_name) +' - QTD Sales by Region'

    #labels
    plt.title(plot_title_by_region)

    numOfItemx = len(pie_plot_labels)
    #explosion

    color = []
    for i in range(numOfItemx):
        color.append(gradient_color_picker(i,numOfItemx, pallete ))
    #color = color.append(gradient_color_picker(i,numOfItemx, pallete))

    #pie plotdifinition
    plt.pie(plot_sizes, colors = color,startangle=90)
    plt.legend(pie_plot_labels, bbox_to_anchor=(.95,0.5), loc="center right", bbox_transform=plt.gcf().transFigure)
    fig = plt.gcf()

    #### STORE ####
    #Save as png
    file_address = dir + plot_title_by_region
    plt.savefig(file_address, bbox_inches = 'tight')

    if not display:
    plt.close(fig)

    plt.show()

f: Plot (Pie): Q Sales by License

In [None]:
def plot_qtd_sales_license (df:pd.DataFrame, dir:str, product_name:str, display = True):
    #### TEST
    '''
    df = qtd_sales_license_seat
    dir = DIRECTORY
    display = True
    product_name = 'ddsdsdsd'
    '''
    ### SETTINGS
    #set color
    colors = COLOR_SET_google

    #Wedge 
    wedgeprops  = {"edgecolor":"k",'linewidth': 0.1, 'linestyle': '-'}

    #Turn interactive plotting off
    if not display:
    plt.ioff()

    #plot & font size
    plt.figure(figsize=[6,6])
    plt.rcParams["font.size"] = FONT_SIZE


    #### DATA ###
    #get Sales row of the dataframe
    df_plot = df.drop(columns = ['Product', 'Unit']).sort_index(axis = 1)
    df_plot = df_plot.iloc[0]


    #plot values
    pie_plot_labels = list(df_plot.keys())  #[Perpetual, Subscription, CCP, Other]
    pie_plot_x = list(df_plot)

    #### PLOT ####
    #Title
    by_license_plot_title = product_name +' - QTD Sales by License'

    #labels
    plt.legend(pie_plot_labels, loc="upper right")
    plt.title(by_license_plot_title)

    #explosion
    explode = (0.05, 0.05, 0.05,0.05)

    #pie plotdifinition
    plt.pie(pie_plot_x, colors = colors, labels=pie_plot_labels, autopct='%1.1f%%', startangle=90, pctdistance=0.85, wedgeprops = wedgeprops, explode = explode)

    #draw white circle
    centre_circle = plt.Circle((0,0),0.70,fc='white')
    fig = plt.gcf()

    fig.gca().add_artist(centre_circle)
    # Equal aspect ratio ensures that pie is drawn as a circle
    plt.tight_layout()

    #### STORE ####
    #Save as png
    plt.savefig(dir + '/' + by_license_plot_title, bbox_inches = 'tight')

    if not display:
        plt.close(fig)

    plt.show()

f: Plot (Bar): Sales Trend

In [None]:
def plot_q_sales_trend (df:pd.DataFrame, dir, product_name:str, display = True):
    ######## TEST ########
    '''
    df = q_report_df
    display = True
    dir = DIRECTORY
    product_name = 'SSSTHHHHH'
    '''
    #####################

    # Turn interactive plotting off
    if not display:
        plt.ioff()
    

    #Transform the data
    df = df.rename(columns={'QTD(kEUR)': 'QTD', 'PreQTD(kEUR)': 'PreQTD', 'LYQTD(kEUR)': 'LYQTD'})
    df = df.drop(['Rel_PreQTD(%)', 'Rel_LYQTD(%)'], axis = 1)

    plot_df = pd.melt(df, id_vars=['Product'], value_vars=['QTD', 'PreQTD', 'LYQTD'], var_name='QTD_Encoding', value_name='Sales')
    plot_df['Sales'] = plot_df['Sales']*1000

    hue_order = ['LYQTD', 'PreQTD', 'QTD']

    #set the lenth of figure based on the number of products
    num_of_products = len(plot_df['Product'].unique())
    fig_len = 1 + num_of_products * 2 

    #agg_function options
    add_plot_title_phrase = ' and Quarter'    
    sns_colour = sns.set_palette(COLOR_SET_google)
    linewidth = 0.4
    saturation= 0.9


    #plot & font size
    plt.figure(figsize=[fig_len,6])
    plt.rcParams["font.size"] = FONT_SIZE 

    #barchart definition
    ax = sns.barplot(x='Product', y='Sales', hue= 'QTD_Encoding',
                   saturation = saturation,  palette=sns_colour,
                   edgecolor = 'black', linewidth = linewidth,
                   hue_order = hue_order,
                   ci = None, data = plot_df)

    #invert x axis to show larget sales at the left
    #ax.invert_xaxis()

    #Title
    plot_title = product_name +'- Quarter Short Term Trends'

    #set labels
    plt.ylabel("Sales (EUR)")
    plt.title(plot_title)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0, ha='right')
    plt.legend(loc='upper right')

    #set graphics
    plt.grid(linestyle='--', axis='y')

    #adding commas to thousands, matplotlib, python
    ax.get_yaxis().set_major_formatter(tick.FuncFormatter(lambda x, p: format(int(x), ',')))

    #Save & show
    plt.savefig(dir + '/' +  plot_title, bbox_inches = 'tight')

    if not display:
    plt.close()

    plt.show()

f: Plot (Bar): Sales by Region

In [None]:
def plot_sales_by_region(df:pd.DataFrame, product_name: str, dir: str, display = True, agg_function = 'by_columns'):
  
    ####### TEST ###########
    #df = temp_data_1
    #product_name = PRODUCT_GROUP
    #dir = DIRECTORY
    #display = True
    #agg_function = 'QTD_Encoding'
    #########################



    # Turn interactive plotting off
    if not display:
    plt.ioff()

    #agg_function options
    by_columns = ['QTD_Encoding', 'EID_GROUP']

    if agg_function == by_columns[0]: #when QTD is selected
    add_plot_title_phrase = ' and Quarter'    
    sns_colour = sns.set_palette(COLOR_SET_google)
    linewidth = 0.4

    else: # when EID_CODE is selected
        add_plot_title_phrase = ' and License'
        sns_colour = sns.color_palette(COLOR_SET_google)
        linewidth = 0.2

    #plot & font size
    plt.figure(figsize=[20,6])
    plt.rcParams["font.size"] = FONT_SIZE 

    #barchart definition
    ax = sns.barplot(x='Region', y='Sales', hue= agg_function,
                   saturation=0.9,  palette=sns_colour, edgecolor = 'black',
                   linewidth = linewidth, ci = None, data = df)

    #Title
    by_region_plot_title = product_name +' - Sales by Region' + add_plot_title_phrase

    #set labels
    plt.ylabel("Sales (EUR)")
    plt.title(by_region_plot_title)
    plt.xticks(rotation=0, ha='center')
    plt.yticks(rotation=0, ha='right')
    plt.legend(loc='upper left')

    #set graphics
    plt.grid(linestyle='--', axis='y')

    #adding commas to thousands, matplotlib, python
    ax.get_yaxis().set_major_formatter(tick.FuncFormatter(lambda x, p: format(int(x), ',')))

    #Save & show
    plt.savefig(dir + '/' +  by_region_plot_title, bbox_inches = 'tight')

    if not display:
    plt.close()

    plt.show()

f: Plot (Line): MAT

In [None]:
def plot_MAT(df:pd.DataFrame, product_name:str, dir:str, display=True, startIndex = 0, pallete = 'PALLETE'):
    ### TEST ###
    '''
    df = df_MAT
    display = True
    dir = DIRECTORY
    startIndex = 0
    product_name = 'XXXXXX'
    pallete = 'viridis'
    '''
    ########################

    # Turn interactive plotting off
    if not display:
    plt.ioff()
    #Get all the products in the df
    my_products = df['Product'].unique()
    #Use the first product (Product Group)
    plot_df = df.loc[df.Product == my_products[startIndex]]

    #plot & font size
    fig, ax = plt.subplots(figsize=[15,6])
    plt.rcParams["font.size"] = FONT_SIZE

    #plot title
    plot_title_mat = product_name +' - Worldwide MAT'


    #y and y axes ranges
    y_max = 1.1 * max (plot_df['Billing_MAT'].max(),plot_df['Sales_MAT'].max())
    x_max = plot_df['Billing_MAT'].count()

    #Plot grid, title, and labels
    plt.ylabel('EUR')
    plt.ylim(0,y_max)
    plt.xlim(1,x_max)
    plt.title(plot_title_mat)


    #Add ticks
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0, ha='right')

    #Set grid
    plt.grid(linestyle='dotted', axis='y')
    plt.grid(linestyle='dotted', axis='x')

    #Add thousands comma
    current_values = plt.gca().get_yticks()
    plt.gca().set_yticklabels(['{:,.0f}'.format(x) for x in current_values])


    flag = False
    i = 0
    for item in my_products[startIndex:]:
    #print(item)
    plot_df = df.loc[df.Product == item]
    x = plot_df['Period'].tolist()

    if i == 0 and startIndex == 0:
        new_color = gradient_color_picker(i,len(my_products), pallete)
        label = str(item) + ': Billing_MAT'
        ax.plot(x, plot_df['Billing_MAT'], color = new_color,  label= label)

    i = i + 1
    new_color = gradient_color_picker(i,len(my_products), pallete)
    label = str(item) +': Sales_MAT'
    ax.plot(x, plot_df['Sales_MAT'], color = new_color, label= label)

    #Add legend
    legend = ax.legend(loc='upper left', fontsize='x-small')

    #### STORE ####
    #Save as png
    plt.savefig(dir + '/' + plot_title_mat, bbox_inches = 'tight')

    if not display:
    plt.close()

    plt.show()


### Reshape Functions

In [None]:
## reshape MAT data for Excel
def reshape_excel_plot_MAT(df_MAT):
    ''' This funciton gets the MAT WW dataframe and reshapes it for Excel '''
    column_names = df_MAT['Product'].unique()

    df_plot_temp = df_MAT.loc[df_MAT['Product'] == column_names[0]]
    df_MAT_Excel_Plot = df_plot_temp[['Date', 'Period', 'Billing_MAT']]
    df_MAT_Excel_Plot.rename(columns = {'Billing_MAT': str(column_names[0]) + ': Billing_MAT'}, inplace = True )

    for item in column_names:
    df_plot_temp = df_MAT.loc[df_MAT['Product'] == item]
    df_plot_temp = df_plot_temp[['Date', 'Sales_MAT']]
    df_plot_temp.rename(columns = {'Sales_MAT': (str(item) + ': Sales_MAT')}, inplace = True )
    df_MAT_Excel_Plot = df_MAT_Excel_Plot.merge(df_plot_temp, on = 'Date')
    df_MAT_Excel_Plot.set_index('Date', inplace = True)


    #set "Period as index"
    df_MAT_Excel_Plot = df_MAT_Excel_Plot.loc[~df_MAT_Excel_Plot['Period'].isnull()]
    df_MAT_Excel_Plot.set_index('Period', inplace = True)

    return df_MAT_Excel_Plot

In [None]:
## Reshape sales by resgion for Excel
def reshape_sales_by_region(sales_by_region_and_lic):
    '''This function gets the sales_by_region_and_lic and reshape it to make Excel plot ready'''

    region_names = sales_by_region_and_lic['Region'].unique()
    df_Excel = pd.DataFrame(index=region_names)

    #get the name of all products
    names = sales_by_region_and_lic['Product'].unique()

    for item in names:
    #filter to the first product
    df_temp = sales_by_region_and_lic.loc[sales_by_region_and_lic['Product'] == item]
    df_temp = df_temp.groupby(by = ['Region'])['Sales'].agg(['sum'])
    df_temp.sort_values('sum', ascending = False, inplace = True)

    df_temp.rename(columns = {'sum': str(item)}, inplace = True )
    df_Excel = pd.merge(df_Excel, df_temp, left_index = True, right_index = True)

    df_Excel.sort_values(by = df_Excel.columns[0], ascending = False, inplace = True)

    return df_Excel

## --- Data ---

Upload Datasheet

In [28]:
# Upload -byarticle-byregion Excel file
uploaded = files.upload()

TypeError: ignored

In [None]:
filename = next(iter(uploaded)) #get the uploaded file
df_uploaded = pd.read_excel(io.BytesIO(uploaded[filename])) #cast the excel file to a DataFrame
df_uploaded = df_uploaded[1:] #drop the 1st row
print(f'The uploaded Excel file has the size of {df_uploaded.shape}\n')
df_uploaded.head() #print first 5 rows of df

⚠️ **IMPORTANT:** Make sure the column names and data math; otherwise, the data should be re-exported or the **Pre-processing: Renaming & Cleaning** section should be modified.
```
Unnamed: 0 = Article Number
Unnamed: 1 = License Name
Unnamed: 2 = Region Code
Unnamed: 3 = Region Name
Unnamed: 4 = Fiscal year/period	
4.2:Billing exFr GC
4.4 Billing qty
5.2:Sales exFr GC
```

Renaming & Cleaning

In [None]:
#Make a copy of df_uploaded
df = cleanup_and_rename(df_uploaded)
#see the header
print(f'The size of dataframe is {df.shape}\n')
df.head()

Read product license info

In [None]:
#url address for gs workbook
workbook_url = 'https://docs.google.com/spreadsheets/d/1Gbwg4edyMHps1QQQmm_6AiR8IDUN0A_6kEtZ0pOWXz8/edit#gid=1839828940'

#Read product categories and names inot a DataFrame
product_wb = read_gs_by_url(workbook_url)

In [None]:
#Read dataframes from gs workbook & and clean up the dataframes
df_all_lic = workbook_to_dataframe(product_wb, 'All_Licenses')
df_product_groups = workbook_to_dataframe(product_wb, 'Product_Groups')

df_all_lic = dataframe_header(df_all_lic)
df_product_groups = dataframe_header(df_product_groups)

## Pre-processing

Drop duplicates

In [None]:
print(f'The size of dataframe is {df_all_lic.shape}\n')
df_all_lic = df_all_lic.drop_duplicates(subset='PID', keep="first")

Merge product & sales dfs

In [None]:
merged_df = pd.merge(df, df_all_lic, on = 'PID', how = 'left')
merged_df = merged_df.drop('LICENSE', axis =1 )
merged_df = merged_df.drop('Region_ID', axis =1)
merged_df = merged_df[merged_df.Region != 0]
print(f'The size of dataframe is {merged_df.shape}\n')
merged_df.head()

Add Q, year, & month info

In [None]:
#Add Q, Year, m_in_Q information to df
df_q_year_m = add_q_year_month(merged_df)
df_q_year_m.head()

 QTD Encoding

In [None]:
#Add QTD encoding column to the dataframe
df_encoded = qtd_encoder(df_q_year_m)
print(f'The size of encoded dataframe is {df_encoded.shape}\n')

## --- Select Product ---

User input

In [None]:
#Get user user input 
group_dict = column_to_dict(df_product_groups.columns)
productIdx = get_user_input_int(0, len(group_dict)-1)

PRODUCT_GROUP = group_dict[productIdx]
group_dct = column_to_dict(df_product_groups[PRODUCT_GROUP], output = False)

## Processing

In [None]:
#Processing message
print(f'Processing Product Group: {productIdx}')

Directory

In [None]:
#Set working directory
DIRECTORY = '/content/QBR_REPORTS/'+ PRODUCT_GROUP + '/'
#create directory
if os.path.exists(DIRECTORY):
    shutil.rmtree(DIRECTORY)
os.makedirs(DIRECTORY)

Total: All Product in Group

In [None]:
#========== Filter_ALL: by Articles =========#
#filter dataframe with boolean filter
boolean_filter_pid = df_encoded["CD"].str.contains(PRODUCT_GROUP) | df_encoded["SUBGROUP"].str.contains(PRODUCT_GROUP)
filtered_df = df_encoded[boolean_filter_pid]
print(f'Number of "{PRODUCT_GROUP}" transactions found: {filtered_df.shape[0]}\n')


#===========  Calculate_ALL: MAT ============#
#Calcualte the MAT for PRODUCT_GROUP
df_MAT = calculate_mat(filtered_df, PRODUCT_GROUP)
temp_data_0 = df_MAT #temp data for plot

#Calcualte the MAT for GROUPs & Regions
if productIdx == 0:
    df_MAT_group = calculate_mat_group(filtered_df, 'GROUP')

#Calcualte the MAT for Regions
df_MAT_region = calculate_mat_group(filtered_df, 'Region')


#========  Calculate_ALL: Sales Trend ========#
#Calculate q repoort df for PRODUCT_GROUP
q_report_df = generate_q_sales_report(df_MAT, PRODUCT_GROUP)


#========  Calculate ALL: QTD Sales by Product ========#
#Calculate QTD Sales by PRODUCT_GROUP 
qtd_sales_license_seat = generate_qtd_sales_by_license(filtered_df, PRODUCT_GROUP)


#==========  Calculate ALL: Sales by Region =========#
#Calculate Sales by Region and Quarter 
sales_by_region_and_q = calculate_sales_by_region_and_quarter(filtered_df, PRODUCT_GROUP)
temp_data_1 = sales_by_region_and_q #temp data for plot
#Calculate Sales by Region and License 
sales_by_region_and_lic = calculate_sales_by_region_and_lic(filtered_df, PRODUCT_GROUP)
temp_data_2 = sales_by_region_and_lic #temp data for plot


Each Product in Group

In [None]:
#If there are more than one product in the group
if len(group_dct.values()) > 1:

  #For product in the group, except the first one
  for item in list(group_dct.values()):
    
    #========== Filter: by Articles =========#
    #Filter df_encoded by product article keywords
    boolean_filter_pid = df_encoded["CD"].str.contains(item) | df_encoded["SUBGROUP"].str.contains(item)
    filtered_df = df_encoded[boolean_filter_pid]
   

    #===========  Calculate: MAT ============#
    #Calculate MAT for the new filtered df
    df_MAT_new = calculate_mat(filtered_df, item)
    df_MAT_all_periods = add_missing_month_MAT(df_MAT_new, temp_data_0)
    #Append the new MAT dataframe to the end of df_MAT
    df_MAT = pd.concat([df_MAT, df_MAT_all_periods], axis=0)
    

    #========  Calculate: Sales Trend ========#
    #Calculate q repoort df for the new filtered df
    q_report_df_new = generate_q_sales_report(df_MAT_new, item)
    #Append the new q repoort df dataframe to the q repoort df
    q_report_df = pd.concat([q_report_df, q_report_df_new], axis=0)   

    #========  Calculate: QTD Sales by Product ========#
    #Calculate QTD Sales by PRODUCT_GROUP 
    qtd_sales_license_seat_new = generate_qtd_sales_by_license(filtered_df, item)
    qtd_sales_license_seat = pd.concat([qtd_sales_license_seat, qtd_sales_license_seat_new], axis=0)
    #Plot QTD sales by license
    plot_qtd_sales_license(qtd_sales_license_seat_new, DIRECTORY, item,  False)
    

    #==========  Calculate ALL: Sales by Region =========#
    #Calculate Sales by Region and Quarter 
    sales_by_region_and_q_new = calculate_sales_by_region_and_quarter(filtered_df, item)
    sales_by_region_and_q = pd.concat([sales_by_region_and_q, sales_by_region_and_q_new], axis=0)  
    #Plot Sales by Region & Quarter
    plot_sales_by_region(sales_by_region_and_q_new,item, DIRECTORY, False, 'QTD_Encoding')
        
    #Calculate Sales by Region and License
    sales_by_region_and_lic_new = calculate_sales_by_region_and_lic(filtered_df, item)
    sales_by_region_and_lic = pd.concat([sales_by_region_and_lic, sales_by_region_and_lic_new], axis=0)

    #Plot Sales by Region & Quarter
    plot_sales_by_region(sales_by_region_and_lic,item, DIRECTORY, False, 'EID_GROUP')

## Plots

In [None]:
plot_MAT(df_MAT, PRODUCT_GROUP, DIRECTORY, True, 0, 'viridis')

In [None]:
label1 = PRODUCT_GROUP + ' by Group'
label2 = PRODUCT_GROUP + ' by Region'

if productIdx == 0:
    plot_MAT(df_MAT_group, label1, DIRECTORY, True, 0,'tab10')
    print('')
plot_MAT(df_MAT_region, label2, DIRECTORY, True, 0, 'tab20')

In [None]:
plot_q_sales_trend(q_report_df, DIRECTORY, PRODUCT_GROUP, True)

In [None]:
plot_sales_by_region(temp_data_1, PRODUCT_GROUP, DIRECTORY, True, 'QTD_Encoding')

In [None]:
plot_qtd_sales_license(qtd_sales_license_seat, DIRECTORY, PRODUCT_GROUP,  True)

In [None]:
plot_sales_by_region(temp_data_2, PRODUCT_GROUP, DIRECTORY, True, 'EID_GROUP')

In [None]:
plot_qtd_sales_region(temp_data_1, PRODUCT_GROUP, DIRECTORY, True, 'tab20')

## Write to Excel

Reshape data

In [None]:
#reshape MAT data
excel_MAT_WW = reshape_excel_plot_MAT(df_MAT)
excel_MAT_byRegion = reshape_excel_plot_MAT(df_MAT_region)
excel_MAT_byGroup = reshape_excel_plot_MAT(df_MAT_group)

In [None]:
#reshape quarterly short term sales trend
excel_q_report_df = q_report_df.set_index('Product')

In [None]:
#reshape QTD sales&seat by licese
excel_qtd_sales_license_seat = qtd_sales_license_seat.set_index('Product')

In [None]:
#reshape QTD sakes by license 
excel_qtd_sales_license = qtd_sales_license_seat.loc[qtd_sales_license_seat['Unit'] != 'Seat (Qty)'] #drop "Seat(Qty)"
excel_qtd_sales_license = excel_qtd_sales_license.drop(columns = ['Unit']) #drop "Unit"
excel_qtd_sales_license.set_index('Product', inplace = True) #set roduct as index

#calculate percetage per product
excel_qtd_sales_license_pct = excel_qtd_sales_license.iloc[:, 0:].apply(lambda x: x/x.sum(), axis=1)

In [None]:
#reshpa sales by region
excel_by_region_reshaped = reshape_sales_by_region(sales_by_region_and_lic)
excel_by_region_reshaped_pct = excel_by_region_reshaped.apply(lambda x: x/x.sum(), axis=0)

Write to Excel

In [None]:
with pd.ExcelWriter(DIRECTORY + '/_' + PRODUCT_GROUP + '_Excel_Report.xlsx', engine='xlsxwriter') as writer:
    # Write each dataframe to one ksheet.
    excel_MAT_WW.to_excel(writer, sheet_name='MAT_WW')
    excel_MAT_byRegion.to_excel(writer, sheet_name='MAT_Region')
    excel_MAT_byGroup.to_excel(writer, sheet_name='MAT_Group')
    excel_q_report_df.to_excel(writer, sheet_name='ShortTerm_Trend')
    excel_qtd_sales_license_seat.to_excel(writer, sheet_name='QTD_bySeat_byLic')
    excel_qtd_sales_license.to_excel(writer, sheet_name='QTD_byLic')
    excel_qtd_sales_license_pct.to_excel(writer, sheet_name='QTD_byLic_pct')
    excel_by_region_reshaped.to_excel(writer, sheet_name='QTD_byRegion')
    excel_by_region_reshaped_pct.to_excel(writer, sheet_name='QTD_byRegion_pct')
    sales_by_region_and_lic.to_excel(writer, sheet_name='QTD_Region_byLic')
    sales_by_region_and_q.to_excel(writer, sheet_name='ShortTerm_Trend_byRegion')

## File Management

In [None]:
print(DIRECTORY)
#============== WARNING ================#
#-------- Deletes Reports Folder --------#
DELETE_ALL = False
if DELETE_ALL is True:
  #============== WARNING ================#
  shutil.rmtree('/content/QBR_REPORTS', ignore_errors=True)

In [None]:
#Zip all reports
#Run after generating repots for all
TO_ZIP = False
if TO_ZIP == True:
    !zip -r '/content/QBR_REPORTS/QBR_Reports.zip' '/content/QBR_REPORTS'
    print('The zip file is successfully created!')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Report Summary

In [None]:
#Reporting message
print(f'Reports generated for Product Group {productIdx} \n---------------------------------------')
print('All: '+ PRODUCT_GROUP)
print(''.join("{0}\n".format(x) for x in group_dct.values()))