In [8]:
import pandas as pd
import random
from faker import Faker
from faker.providers import DynamicProvider

In [9]:
Faker.seed(27)
random.seed(27)

fake = Faker()

posn_type = DynamicProvider(
     provider_name="posn_type",
     elements=["Analyst", "Officer", "Engineer"],
)

fake.add_provider(posn_type)

location = DynamicProvider(
     provider_name="location",
     elements=["Boston", "Chicago"],
)

fake.add_provider(location)

dept = DynamicProvider(
     provider_name="dept",
     elements=["Logistics", "Research"],
)

fake.add_provider(dept)

grade = DynamicProvider(
     provider_name="grade",
     elements=[11, 12, 13],
)

fake.add_provider(grade)

In [10]:
# generate positions data
posn_id, posn_type, city, dept = [[] for k in range(0,4)] 

for row in range(0,100):
    posn_id.append('P' + str(fake.unique.random_int(min=111111, max=999999)))
    posn_type.append(fake.posn_type())
    city.append(fake.location())
    dept.append(fake.dept())

d = {"posn_id":posn_id, "posn_type":posn_type, "city":city, "dept":dept}
posn_df = pd.DataFrame(d)
posn_df.head()

Unnamed: 0,posn_id,posn_type,city,dept
0,P595078,Analyst,Chicago,Research
1,P805891,Engineer,Boston,Logistics
2,P323716,Officer,Chicago,Research
3,P583453,Analyst,Chicago,Logistics
4,P794502,Officer,Boston,Logistics


In [11]:
# generate employee data
emp_id, emp_name, grade, experience = [[] for k in range(0,4)] 

for row in range(0,90):
    emp_id.append('E' + str(fake.unique.random_int(min=111111, max=999999)))
    emp_name.append(fake.name())
    grade.append(fake.grade())
    experience.append(random.randint(1,25))

d = {"emp_id":emp_id, "emp_name":emp_name, "grade":grade, "experience":experience}
emp_df = pd.DataFrame(d)
emp_df.head()

Unnamed: 0,emp_id,emp_name,grade,experience
0,E769769,Amanda Molina,11,23
1,E978248,Samantha Herman,12,17
2,E864288,Gregory Jones,13,17
3,E132078,Alison Williams,11,22
4,E842653,Mr. Chad Gregory,11,21


In [15]:
# add divisions under each dept
posn_df['div'] = ''
for idx, row in posn_df[posn_df['dept'] == 'Logistics'].iterrows():
    posn_df.at[idx, 'div'] = random.choice(['L0','L1','L2'])
for idx, row in posn_df[posn_df['dept'] == 'Research'].iterrows():
    posn_df.at[idx, 'div'] = random.choice(['R0','R1','R2'])
posn_df.head()

Unnamed: 0,posn_id,posn_type,city,dept,div
0,P595078,Analyst,Chicago,Research,R0
1,P805891,Engineer,Boston,Logistics,L1
2,P323716,Officer,Chicago,Research,R1
3,P583453,Analyst,Chicago,Logistics,L0
4,P794502,Officer,Boston,Logistics,L2


In [16]:
# add manager positions
posn_df.loc[len(posn_df.index)] = ['P001101','Supervisor','Chicago','Logistics','L1']
posn_df.loc[len(posn_df.index)] = ['P001102','Supervisor','Boston','Logistics','L2']
posn_df.loc[len(posn_df.index)] = ['P001100','Manager','Chicago','Logistics','L0']

posn_df.loc[len(posn_df.index)] = ['P001201','Supervisor','Boston','Research','R1']
posn_df.loc[len(posn_df.index)] = ['P001202','Supervisor','Chicago','Research','R2']
posn_df.loc[len(posn_df.index)] = ['P001200','Manager','Boston','Research','R0']

In [17]:
# add a director position
posn_df.loc[len(posn_df.index)] = ['P001300','Director','Boston','Corporate','Corporate']

In [18]:
# add manager and director employees
emp_df.loc[len(emp_df.index)] = ['E001101',fake.name(),14,random.randint(10,25)]
emp_df.loc[len(emp_df.index)] = ['E001102',fake.name(),14,random.randint(10,25)]
emp_df.loc[len(emp_df.index)] = ['E001100',fake.name(),15,random.randint(10,25)]

emp_df.loc[len(emp_df.index)] = ['E001201',fake.name(),14,random.randint(10,25)]
emp_df.loc[len(emp_df.index)] = ['E001202',fake.name(),14,random.randint(10,25)]
emp_df.loc[len(emp_df.index)] = ['E001200',fake.name(),15,random.randint(10,25)]

emp_df.loc[len(emp_df.index)] = ['E001300',fake.name(),15,random.randint(20,25)]

In [25]:
# add reporting relationships
rpt_df = pd.DataFrame(columns=['source','target','type'])
rpt_df.head()

Unnamed: 0,source,target,type


In [26]:
for idx, row in posn_df[posn_df['div'] == 'L1'].iterrows():
    if (row['posn_id'] == 'P001101'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001100', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001101', 'reports_to']

for idx, row in posn_df[posn_df['div'] == 'L2'].iterrows():
    if (row['posn_id'] == 'P001102'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001100', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001102', 'reports_to']

for idx, row in posn_df[posn_df['div'] == 'L0'].iterrows():
    if (row['posn_id'] == 'P001100'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001300', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001100', 'reports_to']

In [27]:
for idx, row in posn_df[posn_df['div'] == 'R1'].iterrows():
    if (row['posn_id'] == 'P001201'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001200', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001201', 'reports_to']

for idx, row in posn_df[posn_df['div'] == 'R2'].iterrows():
    if (row['posn_id'] == 'P001202'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001200', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001202', 'reports_to']

for idx, row in posn_df[posn_df['div'] == 'R0'].iterrows():
    if (row['posn_id'] == 'P001200'):
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001300', 'reports_to']
    else:
        rpt_df.loc[len(rpt_df.index)] = [row['posn_id'], 'P001200', 'reports_to']

In [28]:
rpt_df.head()

Unnamed: 0,source,target,type
0,P805891,P001101,reports_to
1,P724605,P001101,reports_to
2,P762817,P001101,reports_to
3,P216208,P001101,reports_to
4,P485073,P001101,reports_to


In [29]:
# write the csv files
posn_df.to_csv('posn_df.csv', index=False)

In [32]:
emp_df.to_csv('emp_df.csv', index=False)

In [31]:
rpt_df.to_csv('rpt_df.csv', index=False)