In [333]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import random

In [334]:
# define the structure for kpi tracking based on the final input fields for dashboard

# final toy dataset with time series pattern
PATH_TOY_DATASET = "/Users/diana/Dropbox/_hackathon/deploy_2023/_data/pj_sample_value_long_completed_timeSeries_smooth_2014_2023.csv"

# import toy dataset
dfAllYears = pd.read_csv(PATH_TOY_DATASET)
# remove dropped index if there is one
if 'Unnamed: 0' in dfAllYears.columns:
    dfAllYears = dfAllYears.drop(columns=['Unnamed: 0'])
# correct error in kpi  
dfAllYears.kpi.replace('share short tern leave',
                     'share short term leave',inplace=True)

dfAllYears['date'] = pd.to_datetime(dfAllYears.date)
kpis = dfAllYears.kpi.unique().tolist()
dfUnique = dfAllYears.sort_values(by=['date','circle'],
                                   ascending=[True,True]).drop_duplicates(subset=['circle',
                                                                                  'kpi',
                                                                                  'periodicity',
                                                                                  'range']).reset_index(drop=True)

dfUnique

Unnamed: 0,circle,kpi,periodicity,range,period_year,period_month,date,value
0,Fundraising,private donations,month,0 <= X,2014,1,2014-01-01,248384.0
1,HR,involuntary headcount change (FTE),month,0 <= % <= 100,2014,1,2014-01-01,1.086386
2,HR,share short term leave,month,0 <= % <= 100,2014,1,2014-01-01,1.708795
3,Programs - Parents -Online,count leads,month,0 <= X,2014,1,2014-01-01,128.204863
4,Programs - Parents -Online,count sessions on .projuventute.ch,month,0 <= X,2014,1,2014-01-01,92148.809629
5,Programs - Parents -Online,net promoter score,year,-100 <= X <= 100,2014,1,2014-01-01,23.0
6,Digital,additional monetization/savings from CRM,quarter,0 <= X,2014,3,2014-03-01,24514.12
7,Programs,additional monetization/savings from programs,quarter,0 <= X,2014,3,2014-03-01,252685.97
8,Programs - Children - Counceling,reachability,quarter,0 <= % <= 100,2014,3,2014-03-01,42.19
9,HR,share of teams constituted as circles,month,0 <= % <= 100,2023,1,2023-01-01,35.0


In [335]:
# Creating subplots for the time series of each KPI 
kpis = dfAllYears.kpi.unique().tolist()
fig = make_subplots(rows=5, cols=2, subplot_titles=kpis, shared_xaxes=False)
kpis = dfUnique.kpi.tolist()

row = 1
col = 1
for kpi in kpis:
    dfKpi = dfAllYears[dfAllYears['kpi'] == kpi]
    trace = go.Scatter(x=dfKpi['date'], y=dfKpi['value'], name=kpi, mode='lines')
    fig.add_trace(trace, row=row, col=col)
    col += 1
    if col > 2:
        col = 1
        row += 1

fig.update_layout(height=1200, width=1000, title_text='Time Series of Each KPI')
fig.show()

In [336]:
# transfer toy values to new structure df

# create empty df with new input fields
dfInputFields = pd.DataFrame(columns=['kpi','circle', # permanent
                                    'periodicity', # permanent
                                    'unit','initial_value','target_value', # permanent
                                    'date', # data entry
                                    'value' #  data entry
                                    ])

# 1. PERMANENT INPUT FIELDS
# descriptive columns
kpis = dfUnique.kpi.tolist()
dfInputFields['kpi'] = kpis
dfInputFields['circle'] = dfUnique.circle

# time based
dfInputFields['periodicity'] = dfUnique.periodicity
dfInputFields


Unnamed: 0,kpi,circle,periodicity,unit,initial_value,target_value,date,value
0,private donations,Fundraising,month,,,,,
1,involuntary headcount change (FTE),HR,month,,,,,
2,share short term leave,HR,month,,,,,
3,count leads,Programs - Parents -Online,month,,,,,
4,count sessions on .projuventute.ch,Programs - Parents -Online,month,,,,,
5,net promoter score,Programs - Parents -Online,year,,,,,
6,additional monetization/savings from CRM,Digital,quarter,,,,,
7,additional monetization/savings from programs,Programs,quarter,,,,,
8,reachability,Programs - Children - Counceling,quarter,,,,,
9,share of teams constituted as circles,HR,month,,,,,


In [337]:

# measurement based columns
dicKpis = { 'private donations':{   'circle':'Fundraising',
                                    'periodicity':'month',
                                    'unit':'chf',
                                    'initial_value':0,
                                    'target_value':dfAllYears.loc[
                                        dfAllYears.kpi=='private donations',
                                        'value'].quantile(round(random.uniform(0.8, 1.),2)).round(0)},
               
                'involuntary headcount change (FTE)':{ 'circle':'HR',
                                                        'periodicity':'month',
                                                        'unit':'%', # with negative trend
                                                        'initial_value':3.5,
                                                        'target_value':0},
                
                'share of teams constituted as circles':{   'circle':'HR',
                                                            'periodicity':'month',
                                                            'unit':'%_cumulative', 
                                                            'initial_value':0,
                                                            'target_value':100},
                
                'share short term leave':{  'circle':'HR',
                                            'periodicity':'month',
                                            'unit':'%', # with negative trend
                                            'initial_value':3.5,
                                            'target_value':0},
                
                'count leads':{ 'circle':'Programs - Parents -Online',
                                'periodicity':'month',
                                'unit':'amount', 
                                'initial_value':0,
                                'target_value':dfAllYears.loc[
                                    dfAllYears.kpi=='count leads',
                                    'value'].quantile(round(random.uniform(0.8, 1.),2)).round(0)},
                
                'count sessions on .projuventute.ch':{  'circle':'Programs - Parents -Online',
                                                        'periodicity':'month',
                                                        'unit':'amount', 
                                                        'initial_value':0,
                                                        'target_value':dfAllYears.loc[
                                                            dfAllYears.kpi=='count sessions on .projuventute.ch',
                                                            'value'].quantile(round(random.uniform(0.8, 1.),2)).round(0)},
                'net promoter score':{  'circle':'Programs - Parents -Online',
                                        'periodicity':'year',
                                        'unit':'score', 
                                        'initial_value':-100,
                                        'target_value':100},
                
                'additional monetization/savings from CRM':{'circle':'Digital',
                                                            'periodicity':'quarter',
                                                            'unit':'chf', 
                                                            'initial_value':0,
                                                            'target_value':dfAllYears.loc[
                                                                dfAllYears.kpi=='additional monetization/savings from CRM',
                                                                'value'].quantile(round(random.uniform(0.8, 1.),2)).round(0)},
                
                'additional monetization/savings from programs':{   'circle':'Program',
                                                                    'periodicity':'quarter',
                                                                     'unit':'chf', 
                                                                    'initial_value':0,
                                                                    'target_value':dfAllYears.loc[
                                                                        dfAllYears.kpi=='additional monetization/savings from programs',
                                                                        'value'].quantile(round(random.uniform(0.8, 1.),2)).round(0)},
                
                'reachability':{'circle':'Programs - Children - Counceling',
                                'periodicity':'quarter',
                                'unit':'%', 
                                'initial_value':0,
                                'target_value':100}
                }

# assign values in dictionary
for k in kpis:             
    # permanent fields:
    # hierarchy
    dfInputFields.loc[dfInputFields.kpi == k,'circle'] = dicKpis[k]['circle']
    # recurrence
    dfInputFields.loc[dfInputFields.kpi == k,'periodicity'] = dicKpis[k]['periodicity']
    # measurement
    dfInputFields.loc[dfInputFields.kpi == k,'unit'] = dicKpis[k]['unit']
    dfInputFields.loc[dfInputFields.kpi == k,'initial_value'] = dicKpis[k]['initial_value']
    dfInputFields.loc[dfInputFields.kpi == k,'target_value'] = dicKpis[k]['target_value']
dfInputFields

Unnamed: 0,kpi,circle,periodicity,unit,initial_value,target_value,date,value
0,private donations,Fundraising,month,chf,0.0,1356027.0,,
1,involuntary headcount change (FTE),HR,month,%,3.5,0.0,,
2,share short term leave,HR,month,%,3.5,0.0,,
3,count leads,Programs - Parents -Online,month,amount,0.0,474.0,,
4,count sessions on .projuventute.ch,Programs - Parents -Online,month,amount,0.0,182361.0,,
5,net promoter score,Programs - Parents -Online,year,score,-100.0,100.0,,
6,additional monetization/savings from CRM,Digital,quarter,chf,0.0,31170.0,,
7,additional monetization/savings from programs,Program,quarter,chf,0.0,252686.0,,
8,reachability,Programs - Children - Counceling,quarter,%,0.0,100.0,,
9,share of teams constituted as circles,HR,month,%_cumulative,0.0,100.0,,


In [338]:
# 2. DATA ENTRY FIELDS
dfNewStructure = pd.DataFrame()
dfNewStructure2023 = pd.DataFrame()
nMonths = 12

# reformat date
dfAllYears.date = pd.to_datetime(dfAllYears.date)

# remove share of teams constituted as circles and transfer its data separatedly
remove = {'share of teams constituted as circles'}
kpisSelected = [e for e in kpis if e not in remove]

# create temporary df to hold values within loop with 12 months for each kpi
dfTemp = pd.concat([dfInputFields] * nMonths, ignore_index=True)

# transfer data without share of teams constituted as circles
for year in range(2014,2023): #for 9 years
        for kpi in kpisSelected: #for 9 kpis
            
            # assign dates
            dfTemp.loc[dfTemp.kpi == kpi,
                       'date'] = pd.to_datetime(dict(year=year,
                                                    month=list(range(1,13)),
                                                    day=1)).tolist()
            
            # assign values by selected kpi and year
            dfTemp.loc[dfTemp.kpi == kpi,
                              'value'] = dfAllYears.loc[(dfAllYears.date.dt.year==year) &
                                                        (dfAllYears.kpi == kpi),
                                                        'value'].tolist()

            # transfer select values to new structure by concatenating each selected kpi and year          
            dfNewStructure = pd.concat([dfNewStructure,dfTemp[dfTemp.kpi == kpi]],ignore_index=True)
            
# transfer data for share of teams constituted as circles in 2023
for year in range(2023,2024): #for 1 year
        for kpi in kpis: #for 10 kpis
            
            # assign dates
            dfTemp.loc[dfTemp.kpi == kpi,
                       'date'] = pd.to_datetime(dict(year=year,
                                                    month=list(range(1,13)),
                                                    day=1)).tolist()
            
            # assign values by selected kpi and year
            dfTemp.loc[dfTemp.kpi == kpi,
                              'value'] = dfAllYears.loc[(dfAllYears.date.dt.year==year) &
                                                        (dfAllYears.kpi == kpi),
                                                        'value'].tolist()

            # transfer select values to new structure by concatenating each selected kpi and year          
            dfNewStructure2023 = pd.concat([dfNewStructure2023,dfTemp[dfTemp.kpi == kpi]],ignore_index=True)

# bring both dfs together        
dfNewStructure = pd.concat([dfNewStructure,dfNewStructure2023],ignore_index=True)
dfNewStructure.sort_values(by=['circle','kpi','date'],ascending=[True,True,True]).reset_index(drop=True, inplace=True)   
dfNewStructure['date'] = pd.to_datetime(dfNewStructure['date'])
dfNewStructure


Unnamed: 0,kpi,circle,periodicity,unit,initial_value,target_value,date,value
0,private donations,Fundraising,month,chf,0,1356027.0,2014-01-01,248384.0
1,private donations,Fundraising,month,chf,0,1356027.0,2014-02-01,677356.61
2,private donations,Fundraising,month,chf,0,1356027.0,2014-03-01,437920.52
3,private donations,Fundraising,month,chf,0,1356027.0,2014-04-01,339549.48
4,private donations,Fundraising,month,chf,0,1356027.0,2014-05-01,336036.85
...,...,...,...,...,...,...,...,...
1087,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-08-01,90.0
1088,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-09-01,90.0
1089,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-10-01,90.0
1090,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-11-01,95.0


In [339]:
# Creating subplots for the time series of each KPI 
fig = make_subplots(rows=5, cols=2, subplot_titles=kpis, shared_xaxes=False)

row = 1
col = 1
for kpi in kpis:
    dfKpi = dfNewStructure[dfNewStructure['kpi'] == kpi]
    trace = go.Scatter(x=dfKpi['date'], y=dfKpi['value'], name=kpi, mode='lines')
    fig.add_trace(trace, row=row, col=col)
    col += 1
    if col > 2:
        col = 1
        row += 1

fig.update_layout(height=1200, width=1000, title_text='Time Series of Each KPI')
fig.show()

In [340]:
# 2.1 anualization of baseline and target

# establish threshold based on year
# use cumulated values for chf, % and  amount units
for y in range(2014,2024):
    kpisYear = dfNewStructure.loc[(dfNewStructure.date.dt.year==y)].kpi.unique().tolist()
    for k in kpisYear:
        
        # select year and kpi, to calculate baseline and target based on yearly data
        periodicity = dfNewStructure.loc[dfNewStructure.kpi==k,'periodicity'].unique()[0]
        unit = dfNewStructure.loc[dfNewStructure.kpi==k,'unit'].unique()[0]
        if periodicity == 'month':
            selection = (dfNewStructure.date.dt.year == y)&(dfNewStructure.kpi == k)
        elif periodicity == 'quarter':
            selection = (dfNewStructure.date.dt.year == y)&(dfNewStructure.date.dt.month.isin([3,6,9,12]))&(dfNewStructure.kpi == k)  
        else:
                selection = (dfNewStructure.date.dt.year == y)&(dfNewStructure.date.dt.month.isin([12]))&(dfNewStructure.kpi == k)
        
        # apply cumulative values depending on unit
        if unit =='chf' or unit =='amount':
            dfNewStructure.loc[selection,'cumulated_value'] = dfNewStructure.loc[selection,'value'].cumsum()
            dfNewStructure.loc[selection,'initial_value'] = dfNewStructure.loc[selection,'cumulated_value'].tolist()[-1]*round(random.uniform(0., 0.2),2)
            dfNewStructure.loc[selection,'target_value'] = dfNewStructure.loc[selection,'cumulated_value'].tolist()[-1]*round(random.uniform(0.8, 1.),2)
        else:
            dfNewStructure.loc[selection,'cumulated_value'] = dfNewStructure.loc[selection,'value']
          
# Drop rows with None/NaN
dfNewStructure = dfNewStructure[dfNewStructure.cumulated_value.notnull()].reset_index(drop=True)
dfNewStructure


Unnamed: 0,kpi,circle,periodicity,unit,initial_value,target_value,date,value,cumulated_value
0,private donations,Fundraising,month,chf,839097.6315,4978645.9469,2014-01-01,248384.0,248384.0
1,private donations,Fundraising,month,chf,839097.6315,4978645.9469,2014-02-01,677356.61,925740.61
2,private donations,Fundraising,month,chf,839097.6315,4978645.9469,2014-03-01,437920.52,1363661.13
3,private donations,Fundraising,month,chf,839097.6315,4978645.9469,2014-04-01,339549.48,1703210.61
4,private donations,Fundraising,month,chf,839097.6315,4978645.9469,2014-05-01,336036.85,2039247.46
...,...,...,...,...,...,...,...,...,...
737,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-08-01,90.0,90.0
738,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-09-01,90.0,90.0
739,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-10-01,90.0,90.0
740,share of teams constituted as circles,HR,month,%_cumulative,0,100,2023-11-01,95.0,95.0


In [341]:
# 3. PRODUCED METRICS

# progress and performance
for k in kpis:
    unit = dfNewStructure.loc[dfNewStructure.kpi==k,'unit'].unique()[0]
    if unit=='chf' or unit=='amount':
        # progress
        dfNewStructure.loc[dfNewStructure.kpi==k,'progress'] = (
        dfNewStructure.loc[dfNewStructure.kpi==k,'cumulated_value']-dfNewStructure.loc[dfNewStructure.kpi==k,'initial_value'])/(
        dfNewStructure.loc[dfNewStructure.kpi==k,'target_value']-dfNewStructure.loc[dfNewStructure.kpi==k,'initial_value'])*100
        
        # performance
        dfNewStructure.loc[dfNewStructure.kpi==k,'performance'] = (
        dfNewStructure.loc[dfNewStructure.kpi==k,'cumulated_value']-
        dfNewStructure.loc[dfNewStructure.kpi==k,'cumulated_value'].min())/(
        dfNewStructure.loc[dfNewStructure.kpi==k,'cumulated_value'].max()-
        dfNewStructure.loc[dfNewStructure.kpi==k,'cumulated_value'].min())*100
    else:
        # progress
        dfNewStructure.loc[dfNewStructure.kpi==k,'progress'] = (
        dfNewStructure.loc[dfNewStructure.kpi==k,'value']-dfNewStructure.loc[dfNewStructure.kpi==k,'initial_value'])/(
        dfNewStructure.loc[dfNewStructure.kpi==k,'target_value']-dfNewStructure.loc[dfNewStructure.kpi==k,'initial_value'])*100

        #performance
        dfNewStructure.loc[dfNewStructure.kpi==k,'performance'] = (
        dfNewStructure.loc[dfNewStructure.kpi==k,'value']-
        dfNewStructure.loc[dfNewStructure.kpi==k,'value'].min())/(
        dfNewStructure.loc[dfNewStructure.kpi==k,'value'].max()-
        dfNewStructure.loc[dfNewStructure.kpi==k,'value'].min())*100
        
# Creating subplots for the time series progress of each KPI 
fig = make_subplots(rows=5, cols=2, subplot_titles=kpis, shared_xaxes=False)

row = 1
col = 1
for kpi in kpis:
    dfKpi = dfNewStructure[dfNewStructure['kpi'] == kpi]
    trace = go.Scatter(x=dfKpi['date'], y=dfKpi['progress'], name=kpi, mode='lines')
    fig.add_trace(trace, row=row, col=col)
    col += 1
    if col > 2:
        col = 1
        row += 1

fig.update_layout(height=1200, width=1000, title_text='Progress of Each KPI')
fig.show()

In [342]:
# Creating subplots for the time series performance of each KPI 
fig = make_subplots(rows=5, cols=2, subplot_titles=kpis, shared_xaxes=False)

row = 1
col = 1
for kpi in kpis:
    dfKpi = dfNewStructure[(dfNewStructure['kpi'] == kpi)&(dfNewStructure['date']>=pd.to_datetime('2023-01-01'))]
    trace = go.Scatter(x=dfKpi['date'], y=dfKpi['performance'], name=kpi, mode='lines')
    fig.add_trace(trace, row=row, col=col)
    col += 1
    if col > 2:
        col = 1
        row += 1

fig.update_layout(height=1200, width=1000, title_text='Performance of Each KPI')
fig.show()

In [343]:
# reorder columns
dfNewStructure.sort_values(['circle','kpi','date'],inplace=True)
dfNewStructure.reset_index(inplace=True,drop=True)
dfNewStructure.to_csv('/Users/diana/Dropbox/_hackathon/deploy_2023/_data/pj_time_series_smooth_progress_performance_2014_2023.csv')

In [344]:
dfNewStructure

Unnamed: 0,kpi,circle,periodicity,unit,initial_value,target_value,date,value,cumulated_value,progress,performance
0,additional monetization/savings from CRM,Digital,quarter,chf,2387.415,65256.01,2014-03-01,24514.12,24514.12,35.195164,12.04017
1,additional monetization/savings from CRM,Digital,quarter,chf,2387.415,65256.01,2014-06-01,23346.09,47860.21,72.329905,23.614575
2,additional monetization/savings from CRM,Digital,quarter,chf,2387.415,65256.01,2014-09-01,14161.84,62022.05,94.856001,30.635659
3,additional monetization/savings from CRM,Digital,quarter,chf,2387.415,65256.01,2014-12-01,17558.45,79580.5,122.78481,39.340697
4,additional monetization/savings from CRM,Digital,quarter,chf,7233.1311,48406.3389,2015-03-01,11900.41,11900.41,11.335718,5.78661
...,...,...,...,...,...,...,...,...,...,...,...
737,net promoter score,Programs - Parents -Online,year,score,-100,100,2019-12-01,68.0,68.0,84.0,91.836735
738,net promoter score,Programs - Parents -Online,year,score,-100,100,2020-12-01,70.0,70.0,85.0,95.918367
739,net promoter score,Programs - Parents -Online,year,score,-100,100,2021-12-01,72.0,72.0,86.0,100.0
740,net promoter score,Programs - Parents -Online,year,score,-100,100,2022-12-01,65.0,65.0,82.5,85.714286
