# Inbound Sales Dashboard Sample Data python script
Purpose:   
Generate randomized data for a portfolio dashboard that is similar to real dashboards I have built for corporations in Inbound Sales reporting.

Steps:
1) Generate sales agent employee data  
Inbound sales reporting focused primarily on sales agents' performance and calls and sales are both handled by agents.
2) Generate sample handled phone call data  
This dashboard will be focusing on sales agents so we will not be looking at service level, missed calls, etc.  So I will be looking at the employee start dates and then generating calls after their start date for each employee.  This call data will have a handled by agent field tying back to the employee and a date field tying to the date dimension.
4) Generate sample sales data  
In inbound sales, a sale can only be made when a call is handled.  So I will start with the phone call data and then create sales based on the phone call.  Not every phone call results in a sale, so need to use a randomized probability a sale was made.  The sales data will have a handled by agent field tying back to the employee and a date field tying to the date dimension.

Not Included:  
To focus the reporting we will not be looking at things like service level, missed calls, call text, call skills, orders, product tiers, commission, etc.  Things like those were usually reported on in specialized sales operations dashboards.  Inbound sales dashboards were primarily concerned with sales performance by agents and leader teams.

In [1]:
#import needed libraries
import pandas as pd #dataframes
from datetime import date, timedelta
from dateutil.relativedelta import relativedelta #job titles/sales based on tenure
import numpy as np #randoms on arrays, etc.


from faker import Faker #randomized fake data
fake = Faker() #create a usable faker instance

In [2]:
#1) generate sales agent employee data

#create a name function so all names are formatted the same + reduce modularity
def create_random_name():
    return fake.last_name() + ", " + fake.first_name()  


#create the dataframe that will hold the data prior to CSV export.
employee_columns = ['Employee_ID','Employee_Name', 'Job_Title', 'Hire_Date', 'Call_Center','Manager_Name','Supervisor_Name']


hire_date_start = date(2025-70,1,1)
hire_date_end = date(2025,1,1) 



#create a list of call centers. Each manager reports to 1 call center.
centers_data = {
    'Call_Center': ['Narshe','Zanarkand','Cornelia','Nibelheim','Costa del Sol','Midgar','Zozo','Daguerreo','Dali','Albrook']
}
centers_df = pd.DataFrame(centers_data)




#create a list of managers. Each supervisor reports to 1 manager.
managers_df = pd.DataFrame(columns=employee_columns)
managers_count = 50


#generate realistic managers data (using the call center list)
for i in range(managers_count):
    employee_id = i    
    employee_name = create_random_name()
    job_title = 'Manager, Inbound Sales'
    hire_date = fake.date_time_between(start_date=hire_date_start, end_date=hire_date_end ).date()
    call_center_name = centers_df['Call_Center'].sample(n=1).iloc[0] #randomly pick call center for this manager
    manager_name = '' #higher hierarchy not needed for this.
    supervisor_name = ''        
    
    managers_df.loc[len(managers_df)] = [employee_id, employee_name, job_title, hire_date, call_center_name, manager_name, supervisor_name] #add new row with this data

managers_df.head()



Unnamed: 0,Employee_ID,Employee_Name,Job_Title,Hire_Date,Call_Center,Manager_Name,Supervisor_Name
0,0,"Pierce, Elizabeth","Manager, Inbound Sales",2013-09-05,Narshe,,
1,1,"Jordan, Richard","Manager, Inbound Sales",1988-03-27,Cornelia,,
2,2,"Mccarty, Brittany","Manager, Inbound Sales",1976-04-12,Narshe,,
3,3,"Durham, John","Manager, Inbound Sales",1963-01-29,Zanarkand,,
4,4,"Saunders, Joshua","Manager, Inbound Sales",2006-10-26,Albrook,,


In [3]:
#generate realistic supervisors data (using the managers list)

#create a list of supervisors. Each supervisor reports to 1 manager.
supervisors_df = pd.DataFrame(columns=employee_columns)
supervisors_count = managers_count * 5

for i in range(supervisors_count):
    employee_id = i + managers_count + 1   
    employee_name = create_random_name()
    job_title = 'Supervisor, Inbound Sales'
    hire_date = fake.date_time_between(start_date=hire_date_start, end_date=hire_date_end ).date()

    manager_row = managers_df.sample(n=1) #pick one random manager to assign to this sup
    
    call_center_name = manager_row['Call_Center'].item() #use the manager's call center so it matches the hierarchy
    manager_name = manager_row['Employee_Name'].item()
    supervisor_name = ''        
    
    supervisors_df.loc[len(supervisors_df)] = [employee_id, employee_name, job_title, hire_date, call_center_name, manager_name, supervisor_name] #add new row with this data

supervisors_df.head()


Unnamed: 0,Employee_ID,Employee_Name,Job_Title,Hire_Date,Call_Center,Manager_Name,Supervisor_Name
0,51,"Brown, Sabrina","Supervisor, Inbound Sales",1980-03-15,Costa del Sol,"Simpson, Barbara",
1,52,"Boyd, Andrew","Supervisor, Inbound Sales",2015-03-11,Albrook,"Marshall, Michelle",
2,53,"Ross, Cassie","Supervisor, Inbound Sales",1984-10-04,Costa del Sol,"Gibbs, Ashley",
3,54,"Hickman, Justin","Supervisor, Inbound Sales",1976-09-29,Zozo,"Sosa, Alexandra",
4,55,"Lee, Lauren","Supervisor, Inbound Sales",1997-08-26,Daguerreo,"Rodriguez, Renee",


In [4]:
#generate realistic employee data (using the supervisors list)

#create a list of agents. Each agent reports to 1 supervisor
agents_df = pd.DataFrame(columns=employee_columns)
agents_df['Tenure_Years'] = np.nan

agents_count = supervisors_count * 10

for i in range(agents_count):
    employee_id = i + managers_count + supervisors_count + 10   
    employee_name = create_random_name()
    
    hire_date = fake.date_time_between(start_date=hire_date_start, end_date=hire_date_end ).date()
    
    #increase job title & sales metrics based on tenure
    years = (relativedelta(date.today(), hire_date)).years
    
    if years <= 1:
        job_title = 'Agent I, Inbound Sales'
    elif years <= 3:
        job_title = 'Agent II, Inbound Sales'
    elif years <= 5:
        job_title = 'Agent III, Inbound Sales'
    else:
        job_title = 'Agent IV, Inbound Sales'

    supervisor_row = supervisors_df.sample(n=1) #pick one random manager to assign to this sup
    
    call_center_name = supervisor_row['Call_Center'].item() #use the manager's call center so it matches the hierarchy
    manager_name = supervisor_row['Manager_Name'].item()
    supervisor_name = supervisor_row['Employee_Name'].item()
    
    agents_df.loc[len(agents_df)] = [employee_id, employee_name, job_title, hire_date, call_center_name, manager_name, supervisor_name, years] #add new row with this data

agents_df.head()



Unnamed: 0,Employee_ID,Employee_Name,Job_Title,Hire_Date,Call_Center,Manager_Name,Supervisor_Name,Tenure_Years
0,310,"Moran, Kevin","Agent IV, Inbound Sales",1968-04-16,Dali,"Sharp, Ashley","Castro, Taylor",57
1,311,"Thomas, William","Agent IV, Inbound Sales",1982-11-14,Narshe,"Austin, Maria","Moore, Jason",42
2,312,"Scott, Craig","Agent IV, Inbound Sales",1971-10-09,Nibelheim,"Contreras, Clinton","Smith, Krista",53
3,313,"Johnson, Gregory","Agent IV, Inbound Sales",1964-02-05,Daguerreo,"Taylor, Kevin","Cooke, John",61
4,314,"Stanley, Matthew","Agent I, Inbound Sales",2024-11-13,Nibelheim,"Harvey, Marvin","Watkins, Alyssa",0


In [5]:
agent_title_test_df = agents_df[agents_df['Job_Title'] == 'Agent I, Inbound Sales']
agent_title_test_df.head()

Unnamed: 0,Employee_ID,Employee_Name,Job_Title,Hire_Date,Call_Center,Manager_Name,Supervisor_Name,Tenure_Years
4,314,"Stanley, Matthew","Agent I, Inbound Sales",2024-11-13,Nibelheim,"Harvey, Marvin","Watkins, Alyssa",0
98,408,"Houston, Mason","Agent I, Inbound Sales",2024-02-28,Midgar,"Bridges, Glenn","Gomez, Manuel",1
142,452,"Clark, Riley","Agent I, Inbound Sales",2023-09-13,Nibelheim,"Contreras, Clinton","Nunez, Vernon",1
160,470,"Brown, Stacy","Agent I, Inbound Sales",2024-06-28,Albrook,"Saunders, Joshua","Sanchez, Christina",1
205,515,"Williams, Lisa","Agent I, Inbound Sales",2024-10-12,Daguerreo,"Perez, Scott","Johnson, Michael",0


In [6]:
agents_df.to_csv('inbound sales dashboard - dim agents.csv', index=False) #export to CSV for use in the dashboard

In [7]:
#generate fake handled phone call data based on the agent row & their start date.  (calls can only be handled by agents)

#create a "cross join" of dates and employees
first_date = date(2020,1,1) #make sure we don't pull 70 years of call data!
calendar_key = pd.date_range(start=first_date, end=date.today(), name='Call_Date')
agent_ids = agents_df['Employee_ID'].unique() #don't add the other fields to the multi index

calendar_agents_multi_index = pd.MultiIndex.from_product(
    [calendar_key, agent_ids]
    ,names=['Call_Date','Employee_ID']
)
calendar_agents_df = pd.DataFrame(index=calendar_agents_multi_index).reset_index() #reset index makes the multi index into a normal dataframe

#add in hire date so we can remove days before they were hired
calendar_agents_df = pd.merge(
    calendar_agents_df #left table
    , agents_df[['Employee_ID','Hire_Date','Tenure_Years']] #right table
    , on='Employee_ID' #join
    , how='left' #join type
)

#remove rows with days before hire date as potential call dates
calendar_agents_df = calendar_agents_df[
    calendar_agents_df['Call_Date'] >= calendar_agents_df['Hire_Date']
].copy() #copy guarantees a new dataframe and not a "view" reference

#remove random number of days off.  5 out of 7 days + some PTO time.
calendar_agents_df = calendar_agents_df.sample(frac=(4.9 / 7)).reset_index(drop=True) #reset index with drop creates a new sequential index.

#Add in randomized call data using a set-based (vectorized) approach to prevent for loop performance issues with dataframes
#Standard Deviation = (max - min) / 6
rows_count = len(calendar_agents_df)
calendar_agents_df['Sales_Calls'] = np.random.normal(20, (40-1) / 6, size=rows_count).clip(0).astype(int)
calendar_agents_df['New_Customer_Calls'] = np.random.normal(15, (30-1)/6, size=rows_count).clip(0).astype(int)
calendar_agents_df['Upgrade_Calls'] = np.random.normal(5, (10-0)/6, size=rows_count).clip(0).astype(int)
calendar_agents_df['Total_Calls'] = 0

#phone status time
calendar_agents_df['ACW_Time'] = np.random.normal(60*60*2, (((60*60*3)-(60*60*0))/6), size=rows_count).clip(0).astype(int) #account notes & call avoidance
calendar_agents_df['Hold_Time'] = np.random.normal(60*60*1, (((60*60*3)-(60*60*0))/6), size=rows_count).clip(0).astype(int) #customer on hold
calendar_agents_df['AUX_Time'] = np.random.normal(60*60*1, (((60*60*3)-(60*60*1))/6), size=rows_count).clip(0).astype(int) #meetings, breaks & call avoidance
calendar_agents_df['Call_Time'] = np.random.normal(60*60*6, (((60*60*7)-(60*60*2))/6), size=rows_count).clip(0).astype(int) #calls
calendar_agents_df['Avail_Time'] = np.random.normal(60*60*1, (((60*60*3)-(60*60*0))/6), size=rows_count).clip(0).astype(int) #not on call but available
calendar_agents_df['Total_Staffed_Time'] = 0

calendar_agents_df['Total_Calls'] = calendar_agents_df[
    ['Sales_Calls','New_Customer_Calls','Upgrade_Calls']
].sum(axis=1) #axis 1 = sum over rows (SQL); axis 0 = sum over columns (Excel)

calendar_agents_df['Total_Staffed_Time'] = calendar_agents_df[
    ['ACW_Time','Hold_Time','AUX_Time','Call_Time','Avail_Time']
].sum(axis=1)




calendar_agents_df.head()

Unnamed: 0,Call_Date,Employee_ID,Hire_Date,Tenure_Years,Sales_Calls,New_Customer_Calls,Upgrade_Calls,Total_Calls,ACW_Time,Hold_Time,AUX_Time,Call_Time,Avail_Time,Total_Staffed_Time
0,2025-01-22,2747,2015-05-26,10,18,21,6,45,5924,2390,2295,22295,3160,36064
1,2021-08-19,2771,2018-07-06,7,16,9,5,30,5764,4427,4049,21912,4972,41124
2,2020-08-24,864,2013-05-02,12,24,8,6,38,3864,3555,3500,25595,4316,40830
3,2022-04-30,2356,1988-03-16,37,20,17,4,41,8287,4259,4802,17254,3294,37896
4,2025-05-12,720,1970-07-28,55,23,15,6,44,7245,2046,2460,20124,2601,34476


In [8]:
#go ahead and add sales metrics in also as it's dependent on calls.  We can split into separate tables in power query.
#use the beta function for percentages
calendar_agents_df['RGU_Close_Rate'] = np.random.beta(a=6.65, b=9.98, size=rows_count)

calendar_agents_df['RGU_Total'] = np.trunc(calendar_agents_df['RGU_Close_Rate'] * calendar_agents_df['Total_Calls']).clip(0).astype(int)

calendar_agents_df['Customer_Close_Rate'] = np.random.beta(a=7.57, b=3.24, size=rows_count)
calendar_agents_df['Customer_Total'] = np.trunc(calendar_agents_df['Customer_Close_Rate'] * calendar_agents_df['Total_Calls']).clip(0).astype(int)

calendar_agents_df['Revenue_Per_Call'] = np.random.normal(30, (50-10)/6, size=rows_count)
calendar_agents_df['Revenue'] = (calendar_agents_df['Revenue_Per_Call'] * calendar_agents_df['Total_Calls']).clip(0)

calendar_agents_df['Call_Quality'] = np.random.beta(a=4.59, b=1.97, size=rows_count)

calendar_agents_df.head()

Unnamed: 0,Call_Date,Employee_ID,Hire_Date,Tenure_Years,Sales_Calls,New_Customer_Calls,Upgrade_Calls,Total_Calls,ACW_Time,Hold_Time,...,Call_Time,Avail_Time,Total_Staffed_Time,RGU_Close_Rate,RGU_Total,Customer_Close_Rate,Customer_Total,Revenue_Per_Call,Revenue,Call_Quality
0,2025-01-22,2747,2015-05-26,10,18,21,6,45,5924,2390,...,22295,3160,36064,0.344751,15,0.600732,27,34.888374,1569.976842,0.667322
1,2021-08-19,2771,2018-07-06,7,16,9,5,30,5764,4427,...,21912,4972,41124,0.263166,7,0.667993,20,43.010465,1290.313963,0.692549
2,2020-08-24,864,2013-05-02,12,24,8,6,38,3864,3555,...,25595,4316,40830,0.345741,13,0.835463,31,39.386834,1496.699693,0.765803
3,2022-04-30,2356,1988-03-16,37,20,17,4,41,8287,4259,...,17254,3294,37896,0.409825,16,0.966864,39,28.891477,1184.550567,0.715077
4,2025-05-12,720,1970-07-28,55,23,15,6,44,7245,2046,...,20124,2601,34476,0.354309,15,0.586174,25,25.818669,1136.021428,0.591632


In [9]:
calendar_agents_df.to_csv('Inbound Sales Dashboard Call and Sales Data.csv')