In [17]:
import pandas as pd
import numpy as np
import faker

# create some fake data
fake = faker.Faker()

# function to create a dataframe with fake values for our workers
def make_workers(num):
    
    # lists to randomly assign to workers
    status_list = ['Full Time', 'Part Time', 'Contractual']
    team_list = [fake.color_name() for x in range(4)]
    

    fake_workers = [{'Worker ID':x+1000,
                  'Worker Name':fake.name(), 
                  'Hire Date':fake.date_between(start_date='-30y', end_date='today'),
                  'Worker Status':np.random.choice(status_list, p=[0.50, 0.30, 0.20]), # assign items from list with different probabilities
                  'Team':np.random.choice(team_list)} for x in range(num)]
        
    return fake_workers

worker_df = pd.DataFrame(make_workers(num=5000))
worker_df.head()

Unnamed: 0,Worker ID,Worker Name,Hire Date,Worker Status,Team
0,1000,Robin Brandt,2016-09-28,Contractual,SeaShell
1,1001,Rachel Ford,1998-07-29,Part Time,SeaShell
2,1002,Timothy Whitehead,1994-11-24,Full Time,SeaShell
3,1003,John Stark,2005-08-13,Part Time,SeaShell
4,1004,Adam Shea,1996-01-03,Full Time,BlanchedAlmond


In [18]:
# function to create widget data

import random


def make_widget_data(num):
    
    fake_widgets = [{'Item Number':id(y),
                     'Step 1':np.random.gamma(shape=3, scale=1),
                     'Step 2':np.random.normal(5), 
                     'Step 3':np.random.exponential(4)} for y in range(num)]
    
    return fake_widgets

# empty list to store our widget dataframes in    
dfs_list = []

# now lets make some widget data for each worker
# iterate through the worker dataframe
for index, row in worker_df.iterrows():
    
    # not all workers work at the same rate - or the same number of hours
    # randomly select a number of widgets for them to create based on 'worker status'
    if row['Worker Status'] == 'Full Time':
        num_widgets = random.randrange(500, 1000)
    elif row['Worker Status'] == 'Part Time':
        num_widgets = random.randrange(100, 500)
    else:
        num_widgets = random.randrange(1, 1000)
    
    # make widgets for each worker
    tmp_widgets = pd.DataFrame(make_widget_data(num=num_widgets))
    
    # add worker id so we know who made the widget
    tmp_widgets['Worker ID'] = row['Worker ID']
    
    # make sure item number is unique by appending worker id
    tmp_widgets['Item Number'] = tmp_widgets['Item Number'].astype('str')+ '-' + tmp_widgets['Worker ID'].astype('str')
    
    # append to df list
    dfs_list.append(tmp_widgets)
    
# concatenate all the dfs 
widget_df = pd.concat(dfs_list)
print(widget_df.shape)
widget_df.head()

(2835663, 5)


Unnamed: 0,Item Number,Step 1,Step 2,Step 3,Worker ID
0,9801216-1000,1.606306,4.341674,1.258898,1000
1,9801248-1000,1.173527,4.942131,17.236404,1000
2,9801280-1000,3.724143,5.553505,1.784641,1000
3,9801312-1000,1.036998,5.249245,1.157287,1000
4,9801344-1000,4.664264,5.882848,8.10643,1000


In [16]:
worker_df.to_csv('workers.csv', index=False)