In [None]:
import pandas as pd
import numpy as np
import math
import random
import multiprocessing as mp

n_processes = 5  # Number of runs you want to parallelize

def simulation(run_id):
  random.seed(run_id)
  np.random.seed(run_id)

  # Define data structures
  input = 'initial_conditions_201810.csv'
  output = f'output_data_run_{run_id}.csv'
  data = [] #a big table of results

  ### parameters that will be varied--------------------------------------------
  r1list = (0.1, .2, .3,.4,.5,.6,.7,.8,.9, 1) #varying r1
  s1list = (0.1, .2, .3,.4,.5,.6,.7,.8,.9,1, 1.1, 1.2, 1.3, 1.4, 1.5) #varying s1
  b1list = (0, 0.1, .2, .3,.4,.5,.6,.7,.8,.9, 1) #varying b1
  s0list = (0.8, ) # base rate of random entry. hiring parameters, s0=1 corresponds to simulating isolated firms
  r0list = (0.05, ) # turnover base rate
  google_s1_list = (0.3, 1)
  google_culture_mean =2

  ### parameters that will be fixed---------------------------------------------
  # Set global parameters for simulations
  employee = 30 # number of employees per firm
  fm_no = 30 # number of firms
  time = 120 # number of time periods (months)
  var_win = 0.1 #which within variance in the input data?

  # departure parameters
  r2=0.05 # max increase in turnover probability

  # socialization parameters
  b0=0  # base rate of socialization
  b2=.3 # speed of socialization susceptibility decline by tenure
  b3=.1 # speed of socialization susceptibility decline by employments

  # Load the entire CSV file----------------------------------------------------
  df_all = pd.read_csv(input)
  filtered_df = df_all[df_all['var_win'] == var_win] # Filter rows
  num_units = len(filtered_df) // int(employee * fm_no) # Enumerate initial conditions

  # Loop over different initial conditions--------------------------------------
  for unit in range(num_units):

    start_row = unit * int(employee * fm_no)
    end_row = (unit + 1) * int(employee * fm_no)
    df_unit = filtered_df.iloc[start_row:end_row]

    # Extract updating variables from selected initial condition
    culture = list(df_unit['culture'])
    tenure = list(df_unit['tenure'])
    employments = list(df_unit['employments'])

  # Loop over different parameters ---------------------------------------------
    for s0 in s0list:
      for r0 in r0list:
        for r1 in r1list:
          for b1 in b1list:
            for s1 in s1list:
              for google_s1 in google_s1_list:

                ### REORG VARIABLES
                # c_all, t_all, e_all (or e_all copy): three 30*30 list of lists (~tables)
                # row is for a firm
                # column is for an employee in a firm

                # employees' cultural score
                c_all=[]
                for i in range(fm_no):
                  c_firm = culture[i*employee: (i+1)*employee]
                  c_all.append(c_firm)

                # tenure
                t_all=[]
                for i in range(fm_no):
                  t_firm = tenure[i*employee: (i+1)*employee]
                  t_all.append(t_firm)

              # prior employments
                e_all=[]
                for i in range(fm_no):
                  e_firm = employments[i*employee: (i+1)*employee]
                  e_all.append(e_firm)
                e_all_copy = e_all.copy()# used for recording update

                ### CALCULATIONS

                # initial firm culture as median of employees’ cultural scores
                firm_culture =[]
                for i in range(fm_no):
                  firm_culture.append(np.median(c_all[i]))

                initial_culture_overall = np.mean(firm_culture)

    #------------------loop over months-------------------------------------------
                for t in range(time):

                  # update firm culture, always, for every round of simulation
                  # this impacts all following processes
                  for i in range(fm_no):
                    firm_culture[i] = np.median(c_all[i])

    #-----------departure---------------------------------------------------------
                  # calculate departure probability for all employees
                  dps =[] # list of lists for departure probabilities
                  for i in range(fm_no):
                    dp =[] #departure probability for every step, every person in a single firm
                    for j in range(employee):
                      dp.append(random.random() > ((r0+r2) - r2* math.exp(-(((firm_culture[i] - c_all[i][j])**2)/(2* r1*r1)))))
                    dps.append(dp)

                  # update tenure for all (assuming retained)
                  for i in range(fm_no):
                    for j in range(employee):
                      if(dps[i][j] == True):
                        t_all[i][j] = t_all[i][j]+1
                        # do not update employments yet because you don't know if she will be rehired or not

                  # determine who will leave based on pool of available indexes and their culture scores for each firm
                  index0=[]# to track persons (index) who will leave
                  available=[] #available pool of (departure) culture scores for each firm
                  for i in range(fm_no):
                    index_temp=[]
                    ava_temp=[]
                    for j in range(employee):
                      if (dps[i][j] == False):
                        index_temp.append(j)
                        ava_temp.append(c_all[i][j])
                    index0.append(index_temp)
                    available.append(ava_temp)

                  #remove departed employees from the firm
                  for i in range(fm_no):
                    c_all[i] = list(np.delete(c_all[i], index0[i]))
                    t_all[i] = list(np.delete(t_all[i], index0[i]))
                    e_all_copy[i] = list(np.delete(e_all_copy[i], index0[i]))

    #-----------hiring------------------------------------------------------------
                  # calculate open spots in each firm
                  count=[]
                  for i in range(fm_no):
                    count.append(len(available[i]))

                  # create slot-based order for hiring
                  full_order = [] # list of lists
                  for i in range(len(count)):
                    repeat = [[i] * count[i]]
                    full_order += repeat

                  overall=[] # convert list of lists to list
                  for i in range(len(full_order)):
                    for j in range(len(full_order[i])):
                      overall.append(full_order[i][j])

                  # # loop over slots
                  for q in range(len(overall)):
                    i = overall[q]
    #----random entry-------------------------------------------------------------
                    if((random.random() < s0)):
                      new = np.random.normal(firm_culture[i], s1, 1)
                      t_all[i] = list(np.append(t_all[i] , [0]))
                      c_all[i] = list(np.append(c_all[i] , new))
                      e_all_copy[i]  = list(np.append(e_all_copy[i] , [1]))

    #----google random entry------------------------------------------------------
                    else:
                      new = np.random.normal(google_culture_mean, google_s1, 1)
                      t_all[i] = list(np.append(t_all[i] , [0]))
                      c_all[i] = list(np.append(c_all[i] , new))
                      e_all_copy[i]  = list(np.append(e_all_copy[i] , [1]))

    #----socialization------------------------------------------------------------
                  for i in range(fm_no):
                    for j in range(len(c_all[i])):
                      c_all[i][j] = c_all[i][j] + ( firm_culture[i] - c_all[i][j] )* (b1* math.exp(- b2* (t_all[i][j] -1) -b3* (e_all_copy[i][j] -1)) +b0)

                  # after updating employments, save e_all and copy() for next period
                  e_all = e_all_copy.copy() # don't overwrite
                  e_all_copy = e_all.copy()

                  final_culture_overall = np.mean(firm_culture)

    #----keep data ---------------------------------------------------------------
                data.append({
                                'initcond': unit,
                                's0': s0,
                                'r0': r0,
                                'r1': r1,
                                'b1': b1,
                                'other_s1': s1,
                                'google_s1': google_s1,
                                'culture_change': final_culture_overall - initial_culture_overall
                            })
    df = pd.DataFrame(data)
    df.to_csv(output, index=False)
    return f"Run {run_id} completed."

### parallelize-----------------------------------------------------------------
def main():
    with mp.Pool(n_processes) as pool:
        results = pool.map(simulation, range(n_processes))
    print(results)

if __name__ == '__main__':
    main()