#Packages

In [None]:
import pandas as pd
import numpy as np
import math
import random
import multiprocessing as mp

#Create Google's culture

-- no need to rerun

In [None]:
mean = 2
sd = 0.1
len_google = 6
employee = 30
num_repeats = 10

single_sample = np.random.normal(mean, sd, len_google * employee )
data = list(np.tile(single_sample, num_repeats))
#print(*data, sep='\n')
#this is for initializing Google's culture (180 people for 10 initial conditions
#in the simulation it indicates firms 0-5
#for other variables of employees in Google, we just keep them as the input file for Model 1

#Simulation Model

In [None]:
n_processes = 5  # Number of runs you want to parallelize per group
n_groups = 2  # Number of groups you want to parallelize the whole input, should be one from 10/5/2 because we have 10 input conditions

def simulation(args):
  group, run_id = args
  random.seed(run_id)
  np.random.seed(run_id)

  ### parameters that will be varied----------------------------------------------
  r1list = (0.1, .2, .3,.4,.5,.6,.7,.8,.9, 1) #varying r1 of other firms
  s1list = (0.1, .2, .3,.4,.5,.6,.7,.8,.9,1, 1.1, 1.2, 1.3, 1.4, 1.5) #varying s1 of other firms
  b1list = (0, 0.1, .2, .3,.4,.5,.6,.7,.8,.9, 1) #varying b1 of other firms
  s0list = (0.03, ) # base rate of random entry. hiring parameters, s0=1 corresponds to simulating isolated firms
  r0list = (0.05, ) # corresponding to high/low industry turnover conditions
  r1_google_list = (.3, 1) #google's r1
  b1_google_list = (.1,.7) #google's b1
  s1_google_list = (.3, 1) #google's s1

  ### Set global parameters for simulations
  employee = 30 # default number of employees per firm
  fm_no = 30    # default number of firms (will merge the first six later)
  time = 120    # number of time periods (months)
  var_win = 0.1 # which within variance in the input data?

  # departure parameters
  r0=0.03 # turnover base rate (3.5% monthly according to JOLTS)
  r2=0.05 # max increase in turnover probability

  # socialization parameters
  b0=0
  b2=.3 # speed of socialization susceptibility decline by tenure
  b3=.1 # speed of socialization susceptibility decline by prior employments

  google_set =[0,1,2,3,4,5] #this is for merging the first six firms as Google (180 people)

  input = 'initial_conditions_strong_culture_big_google.csv'
  output = f'output_data_group_{group}_run_{run_id}.csv'
  #add date automatically

  data=[] # a big table for results

  # Load the entire CSV file
  df_all = pd.read_csv(input)

  # Filter rows
  filtered_df = df_all[df_all['var_win'] == var_win]

  # Calculate the number of units and split into groups
  # Process each unit of 900 people (30*30)
  num_units = len(filtered_df) // int(employee * fm_no)
  units_per_group = num_units // n_groups
  group_units = range(group * units_per_group, (group + 1) * units_per_group)

  # Loop over different initial conditions--------------------------------------
  for unit in group_units:

    start_row = unit * int(employee * fm_no)
    end_row = (unit + 1) * int(employee * fm_no)
    df_unit = filtered_df.iloc[start_row:end_row]

    # Extract updating variables from selected initial condition
    culture = list(df_unit['culture'])
    tenure = list(df_unit['tenure'])
    employments = list(df_unit['employments'])

    # Loop over different parameters -------------------------------------------
    for s0 in s0list:
      for r0 in r0list:
        for r1_default in r1list:
          for b1_default in b1list:
            for s1_default in s1list:
              for r1 in r1_google_list:
                for b1 in b1_google_list:
                  for s1 in s1_google_list:

                    ### REORG VARIABLES
                    # c_all, t_all, e_all (or e_all copy): three 30*30 list of lists (~tables)
                    # row is for a firm
                    # column is for an employee in a firm

                    # employees in different firms and their cultural scores
                    c_all=[]
                    for i in range(fm_no):
                      c_firm = culture[i*employee: (i+1)*employee]
                      c_all.append(c_firm)

                    # firm culture is calculated as the median of its employees’ cultural scores
                    firm_culture =[]
                    for i in range(fm_no):
                      if ((i in google_set) ==True):
                        temp = sum(c_all[:len(google_set)], [])
                        firm_culture.append(np.median(temp)) # here merge the first six as the big "Google"
                        # and the big Google's median culture as Google culture
                      else:
                        firm_culture.append(np.median(c_all[i]))

                    # tenure
                    t_all=[]
                    for i in range(fm_no):
                      t_firm = tenure[i*employee: (i+1)*employee]
                      t_all.append(t_firm)

                    # prior employments
                    e_all=[]
                    for i in range(fm_no):
                      e_firm = employments[i*employee: (i+1)*employee]
                      e_all.append(e_firm)
                    e_all_copy = e_all.copy()# used for recording update

                    initial_firm0 =  firm_culture[0] #No.0-5 firms as the merged firm sharing identical culture
                    initial_other = np.mean(firm_culture[len(google_set):]) # initial others' culture
                    # use the mean of the other 24 firms' cultures to record the changing trend
                    # how they converge to Google's culture and vice versa

                    # initialize additional variables
                    ave_hire_list=[] # ave hire proportion
                    carrier_list=[]  # ave carrier proportion
                    frac_googlers_list =[] #rehire from google for other firms
                    frac_g_hire_others_list =[] #rehire from other firms for google

            #------------------loop over months---------------------------------
                    for t in range(time):

            #------------------loop over firms----------------------------------
                      for i in range(fm_no):
                        if ((i in google_set) == True):
                          temp = sum(c_all[:len(google_set)], [])
                          firm_culture.append(np.median(temp)) # google culture
                        else:
                          firm_culture.append(np.median(c_all[i]))#others' culture
                          # update firm culture, always, for every round of simulation
                          # this impacts all following processes

            #-----------departure-----------------------------------------------
                      dps =[] # the list of departure probability lists for firms

                      for i in range(fm_no):
                        #defult values for other firms
                        firm_r1 = r1_default
                        if ((i in google_set) == True):  # For firm_0 (Google)
                        #a lower turnover
                          firm_r1 = r1

                        dp =[] #departure probability for every step, every person in a single firm
                        for j in range(employee):
                          dp.append(random.random() > ((r0+r2) - r2* math.exp(-(((firm_culture[i] - c_all[i][j])**2)/(2* firm_r1 *firm_r1 ))))) #the equation 4
                        dps.append(dp)
                      for i in range(fm_no):
                        for j in range(employee):
                          if(dps[i][j] == True):
                            t_all[i][j] = t_all[i][j]+1
                            #do not update employee value here because you don't know if she will be rehired or not

                      #who will leave?
                      #the pool of available indexes and their culture scores for each firm
                      index0=[] # to track persons (index) who will leave
                      available=[] #available pool of (departure) culture scores for each firm
                      for i in range(fm_no):
                        index_temp=[]
                        ava_temp=[]
                        for j in range(employee):
                          if (dps[i][j] == False):
                            index_temp.append(j)
                            ava_temp.append(c_all[i][j])
                        index0.append(index_temp)
                        available.append(ava_temp)

                      #remove departure employees from the firm
                      for i in range(fm_no):
                        c_all[i] = list(np.delete(c_all[i], index0[i]))
                        t_all[i] = list(np.delete(t_all[i], index0[i]))
                        e_all_copy[i] = list(np.delete(e_all_copy[i], index0[i]))

            #-----------recruitment---------------------------------------------
                      # initialize average mobility
                      carriers=0
                      googler=0
                      g_hire_others=0

                      count=[]
                      for i in range(fm_no):
                        count.append(len(available[i]))

                      google_turnover = sum(count[:len(google_set)])
                      others_turnover = sum(count[len(google_set):])

                      ave_hire = (np.sum(count)  / (employee * fm_no))

                      random_order = []
                      for i in range(len(count)):
                        repeat = [[i] * count[i]]
                        random_order += repeat

                      overall=[]
                      for i in range(len(random_order)):
                        for j in range(len(random_order[i])):
                          overall.append(random_order[i][j])
                      random.shuffle(overall) # slot-based randomization

                      for q in range(len(overall)):
                        i = overall[q]
                        firm_s1 = s1_default
                        #defult values for other firms
                        if ((i in google_set) == True):  # For firm_0 (Google)
                        #a lower bandwidth for hiring
                          firm_s1 = s1

                        other_cul=[]
                        other_index =[]
                        for k in range(fm_no):
                          if ((i in google_set) == False):
                            if ((k == i) ==False):
                              other_cul.append(available[k][:])
                              convert_list = sum(len(index0[i]) for i in range(k))
                              other_index.append([item + convert_list for item in index0[k]]) #others' index
                          else:
                            if ((k in google_set) ==False):
                              other_cul.append(available[k][:])
                              convert_list = sum(len(index0[i]) for i in range(k))
                              other_index.append([item + convert_list for item in index0[k]]) #others' index
                        #you only consider hiring an employee from other firms

                        for u in range(1, len(other_cul)):
                          other_cul[0] = np.append(other_cul[0], other_cul[u])
                          other_index[0] = np.append(other_index[0], other_index[u])
                        other_cul = (list(other_cul[0])).copy()
                        other_index = (list(other_index[0])).copy()

                        others_c = []
                        others_index=[]
                        for n in range(len(other_cul)):
                            if abs((firm_culture[i]) - (other_cul[n])) < 2 * firm_s1:
                                others_c.append(other_cul[n])
                                others_index.append(other_index[n])

            #----recruitment from external sources------------------------------
                        if((random.random() < s0) or (len(others_c) ==0) ):
                          #new employee
                          new = np.random.normal(firm_culture[i], firm_s1, 1)
                          t_all[i] = list(np.append(t_all[i] , [0]))
                          c_all[i] = list(np.append(c_all[i] , new))
                          e_all_copy[i]  = list(np.append(e_all_copy[i] , [1]))

            #----recruitment from the available pool----------------------------
                        else: # already checked that at least one exists
                          carriers= carriers+1
                          v=list(random.sample(others_c, 1))
                          hire_index = others_index[others_c.index(v[0])]

                          if ((i in google_set) == True): # if firm hiring is Google
                              g_hire_others = g_hire_others+1
                          else: # if firm hiring is other
                            if (( hire_index in list(range(len(google_set * employee)))) == True): # if emp hired is from Google
                              googler = googler+1

                          c_all[i] =  list(np.append(c_all[i] , v))
                          t_all[i] = list( np.append(t_all[i] , [0]))
                          for a in range(fm_no):
                            for b in range(len(available[a])):
                              if (v[0] == available[a][b]):
                                value =index0[a][b]
                                e_all_copy[i] = list( np.append(e_all_copy[i] , [e_all[a][value]+1]))

            #----socialization--------------------------------------------------
                      for i in range(fm_no):
                        #defult values for other firms
                        firm_b1 = b1_default
                        if ((i in google_set) == True):  # For firm_0 (Google)
                        #a higher socialization
                          firm_b1 = b1
                        for j in range(len(c_all[i])):
                          c_all[i][j] = c_all[i][j] + ( firm_culture[i] -c_all[i][j] )* (firm_b1* math.exp(- b2* (t_all[i][j] -1) -b3* (e_all_copy[i][j] -1)) +b0) #the equation 5

                      # after updating employments, save e_all and copy() for next period
                      e_all = e_all_copy.copy() # don't overwrite
                      e_all_copy = e_all.copy()

                      # store average hires and carries per time period
                      ave_hire_list.append(ave_hire)
                      carrier_list.append(carriers/np.sum(count))
                      frac_googlers_list.append(googler / others_turnover)
                      frac_g_hire_others_list.append(g_hire_others / google_turnover if google_turnover!=0 else 0) #an extreme condition that no one leaves google

                    turnover_average =np.mean(ave_hire_list)
                    carriers_average = np.mean(carrier_list)
                    frac_googlers_average = np.mean(frac_googlers_list)
                    frac_g_hire_others_average = np.mean(frac_g_hire_others_list)

        #----keep data ---------------------------------------------------------
                    data.append({
                                    'initcond': unit,
                                    's0': s0,
                                    'r0': r0,
                                    'r1_other': r1_default,
                                    'b1_other': b1_default,
                                    's1_other': s1_default,
                                    'r1_google': r1,
                                    'b1_google': b1,
                                    's1_google': s1,
                                    'tenure_google': np.mean(list(sum(t_all[:len(google_set)], []))),
                                    'tenure_others': sum([np.mean(t_all[i]) for i in range(len(google_set), int(fm_no))])/int(fm_no - len(google_set)),
                                    'employment_google': np.mean(list(sum(e_all[:len(google_set)], []))),
                                    'employment_others': sum([np.mean(e_all[i]) for i in range(len(google_set), int(fm_no))])/int(fm_no - len(google_set)),
                                    'turnover': turnover_average,
                                    'carriers': carriers_average,
                                    'google_culture_change':np.median(list(sum(c_all[:len(google_set)], []))) - initial_firm0,
                                    'other_culture_change': np.mean(firm_culture[len(google_set):] ) - initial_other,
                                    'googlers_to_other_firms': frac_googlers_average,
                                    'others_to_google': frac_g_hire_others_average
                                })

  df = pd.DataFrame(data)
  df.to_csv(output, index=False)
  return f"Group {group}, Run {run_id} completed."


#----parallelize----------------------------------------------------------------
def main():
    # Create a list of tuples (group, run_id) for each combination of group and run
    tasks = [(group, run_id) for group in range(n_groups) for run_id in range(n_processes)]

    with mp.Pool(n_processes * n_groups) as pool:
        results = pool.map(simulation, tasks)
    print(results)

if __name__ == '__main__':
    main()