Generate datasets that would represent the timeline from initially contacting a creator
to completing their campaign. This is represented via "tickets" where each creator-employee pair
would have a ticket with an associated start and end timestamp.

In [85]:
import datetime
import pandas as pd
import random
import names
random.seed(1) # To maintain the same results every time

In [86]:
# (MIN, MAX) number of days each department may take to complete
DEPARTMENT_CONFIG = dict(
    SALES=dict(
        min_max=(2, 30),
        n_emp=10
    ),
    CONTRACT=dict(
        min_max=(1,5),
        n_emp=3
        ),
    DESIGN=dict(
        min_max=(10,50),
        n_emp=15
        ),
    MANUFACTURING=dict(
        min_max=(10,40),
        n_emp=4
        ),
    CAMPAIGN=dict(
        min_max=(21,25),
        n_emp=3
        )
)

N_CREATORS = 700
START_DATE = datetime.datetime.fromisoformat('2019-01-01') # Day company started
END_DATE = datetime.datetime.now() # Last day available

In [87]:
def create_names(n):
    """Create n random names."""
    return [names.get_full_name() for x in range(n)]

def get_random_date():
    """Returns a random datetime between two datetime objects."""
    delta = END_DATE - START_DATE
    int_delta = (delta.days * 24 * 60 * 60) + delta.seconds
    random_second = random.randrange(int_delta)
    return START_DATE + datetime.timedelta(seconds=random_second)

In [92]:
def create_creator_table():
    "Create creators.csv"
    creator_names = create_names(N_CREATORS)
    df = pd.DataFrame()
    df['Creator'] = creator_names
    df.to_csv("./resources/creators.csv")
    return df

def create_employee_table():
    "Create employees.csv"
    dfs = []
    for department, info in DEPARTMENT_CONFIG.items():
        df = pd.DataFrame()
        df["Name"] = create_names(info['n_emp'])
        df["Department"] = [department]*df.shape[0]
        dfs.append(df)
    
    dfs = pd.concat(dfs).reset_index(drop=True)
    dfs.to_csv("./resources/employees.csv")
    return dfs

def create_department_table(previous_department_data, min_max, employees, department_name, creators):
    """Create [DEPARTMENT].csv"""
    if previous_department_data is None:    
        df = pd.DataFrame()
        df['Creator'] = creators['Creator']
        df['start_date'] = df['Creator'].apply(lambda x: get_random_date())
    
    else:
        previous_department_data = previous_department_data[~previous_department_data["end_date"].isnull()]
        df = previous_department_data
        df['start_date'] = df['end_date']

    df = df.sort_values("start_date").reset_index(drop=True)

    e_list = []
    for employee in employees['Name']:
        e_list += [employee]*(int(df.shape[0]/employees.shape[0] + 1))
    random.shuffle(e_list)
    df['Employee'] = e_list[:df.shape[0]]

    df['end_date'] = df['start_date'].apply(lambda x: x+datetime.timedelta(days=random.randrange(*min_max)))
    proportion_incomplete = random.uniform(0.9,0.95) # % of creators who are still at this step
    df['end_date'] = [x if i <= int(proportion_incomplete*df.shape[0]) else None for i, x in enumerate(df['end_date'])]

    df.to_csv(f"./intermediates/{department_name}.csv")
    return df


In [93]:
def main():
    """
    Iterate over departments and create random end timestamps 
    for each creator.
    """
    creators = create_creator_table()
    employees = create_employee_table()

    previous_department_data = None
    for department, info in DEPARTMENT_CONFIG.items():
        department_employees = employees[employees["Department"] == department]
        department_data = create_department_table(previous_department_data, info["min_max"], department_employees, department, creators)
        previous_department_data = department_data
main()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['start_date'] = df['end_date']
