In [6]:
import pandas as pd
import numpy as np
from datetime import datetime as dt

In [7]:
# create a map of mission date and mission number
date_mission_df = pd.read_csv('hera_ind_perf.csv').loc[:, ['mission', 'date']].drop_duplicates()
date_mission_dict = dict(date_mission_df.values)

# merge date to hera normalized individual performance, IoI, ToI, and MoI
hera_perf = pd.read_csv('Individual Performance All.csv').iloc[:, 1:]
hera_perf['date'] = hera_perf['Mission'].map(date_mission_dict)

# only use hera performance for this study
hera_perf = hera_perf.loc[-hera_perf['date'].isna()].rename(
columns = {'role_id':'role', 'Mission':'mission', 'RED_Role':'red_role', 'Team':'team'}).loc[
    :, ['date', 'mission', 'role', 'red_role', 'team', 'normalized_perf', 'IoI', 'ToI', 'MoI']
]

In [8]:
hera_perf_date = list(hera_perf.date.unique())

hera_smm1 = pd.read_csv('hera_smm1_survey.csv').drop(columns = ['Campaign', 'Mission', 'Session']).rename(
columns = {'StartDate':'date', 'Q2.1':'role'})
hera_smm1_date = list(hera_smm1.date.unique())

hera_smm2 = pd.read_csv('hera_smm2_survey.csv').drop(columns = ['Campaign', 'Mission', 'Session']).rename(
columns = {'StartDate':'date', 'Q2.1':'role'})
hera_smm2_date = list(hera_smm2.date.unique())

# exclude dates where either performance or SMM surveys is missing
mutual_date = list(set(hera_perf_date).intersection(set(hera_smm1_date)).intersection(set(hera_smm2_date)))

hera_perf = hera_perf.loc[hera_perf.date.isin(mutual_date), :]

# performance data
hera_norm_perf = hera_perf.loc[:, ['date', 'role', 'normalized_perf']].dropna()
hera_IoI_perf = hera_perf.loc[:, ['date', 'role', 'IoI']].dropna()
hera_ToI_perf = hera_perf.loc[:, ['date', 'role', 'ToI']].dropna()
hera_MoI_perf = hera_perf.loc[:, ['date', 'role', 'MoI']].dropna()

In [9]:
# data on individual relation to team
relation = pd.read_csv('hera_relation.csv').dropna()
relation['relation'] = relation['relation'].astype('int')
relation['role'] = relation['role'].astype('int')

### Performance CSV

In [20]:
# compute performance csv for ALAAM
    # since the response variable in ALAAM is binary, I define high performer (1) and low performer (0)
    # based on the average normalized scores of the multi-team systems
    
def perf_csv(smm_df, perf_df, smm_name, perf_name):
    try:
        smm = smm_df.loc[smm_df['date'].isin(mutual_date), :]
        
        smm_perf = smm.loc[:, ['date', 'role']].merge(perf_df, on = ['date', 'role'])
        
        smm_perf = smm_perf.groupby('date').mean().reset_index().loc[:, ['date', perf_name]].rename(
            columns = {perf_name: 'avg_perf'}).merge(smm_perf, on = 'date')
        
        smm_perf['binary'] = 1
        smm_perf.loc[smm_perf[perf_name] < smm_perf['avg_perf'], 'binary'] = 0
        smm_perf = smm_perf.loc[:, ['binary']].rename(columns = {'binary': perf_name})
        
        smm_perf.loc[:, [f'{perf_name}']].to_csv(f'{smm_name}_{perf_name}.csv')
    except Exception as e:
        print(e)

In [21]:
perf_list = [[hera_norm_perf, 'normalized_perf'], [hera_IoI_perf, 'IoI'],
                [hera_ToI_perf, 'ToI'], [hera_MoI_perf, 'MoI']]
smm_list = [[hera_smm1, 'smm1']]

for i in perf_list:
    for j in smm_list:
        perf_csv(smm_df = j[0], perf_df = i[0], smm_name = j[1], perf_name = i[1])

### Covariates CSV

In [22]:
# compute team csv for ALAAM
    # since the response variable in ALAAM is binary, I define high performer (1) and low performer (0)
    # based on the average normalized scores of the multi-team systems
    
def cov_csv(smm_df, perf_df, smm_name, perf_name, cov_name):
    try:
        smm = smm_df.loc[smm_df['date'].isin(mutual_date), :]
        
        smm_perf = smm.loc[:, ['date', 'role']].merge(perf_df, on = ['date', 'role'])
        
        if cov_name == 'team':
            smm_perf['team'] = smm_perf['role'].map({1:'Rob', 2:'Rob', 3:'Rob', 4:'Eng', 5:'Eng', 6:'Eng',
                                        7:'Geo', 8:'Geo', 9:'Geo', 10:'HF', 11:'HF', 12:'HF'})
            smm_perf.loc[:, ['team']].to_csv(f'{smm_name}_{perf_name}_teamcov.csv')
        elif cov_name == 'mcontrol':
            smm_perf['mcontrol'] = smm_perf['role'].map({1:0, 2:1, 3:1, 4:0, 5:1, 6:1,
                                        7:1, 8:0, 9:1, 10:1, 11:1, 12:0})
            smm_perf.loc[:, ['mcontrol']].to_csv(f'{smm_name}_{perf_name}_mcontrol.csv')
        elif cov_name == 'goal':
            smm_perf['goal'] = smm_perf['role'].map({1:0, 2:0, 3:0, 4:1, 5:1, 6:1,
                                        7:1, 8:1, 9:1, 10:0, 11:0, 12:0})
            smm_perf.loc[:, ['goal']].to_csv(f'{smm_name}_{perf_name}_goalcov.csv')
        elif cov_name == 'relation':
            smm_perf = smm_perf.drop_duplicates().merge(relation.drop_duplicates(), on = ['date', 'role'])
            smm_perf.loc[:, ['relation']].to_csv(f'{smm_name}_{perf_name}_relation.csv')
        
    except Exception as e:
        print(e)

In [25]:
cov_list = ['team', 'mcontrol', 'relation', 'goal']

for i in perf_list:
    for j in smm_list:
        for k in cov_list:
            cov_csv(smm_df = j[0], perf_df = i[0], smm_name = j[1], perf_name = i[1], cov_name = k)

### Block Diagonal Matrix CSV

In [12]:
# compute SMM block diagonal matrix csv for ALAAM

def bd_matrix(smm_df, perf_df, perf_name, smm_name):
    try:
        hera = smm_df.loc[smm_df['date'].isin(mutual_date), :].merge(
            perf_df, on = ['date', 'role']).drop(columns = perf_name)
        mission = hera.date.unique()
        
        hera_smm = []
        for i in mission:
            m = hera[hera['date'] == i].iloc[:, 2:].to_numpy()
            row, col = m.shape
            smm = []
            for j in range(row - 1):
                for k in list(range(j + 1, row)):
                    temp = m[j] - m[k]
                    smm.append([j + 1, k + 1, np.sqrt(np.dot(temp.T, temp)) / 10 * np.sqrt(10)])
            hera_smm.append((i, smm))
        
        hera_link = {}
        for i in range(len(hera_smm)):
            m = hera_smm[i][1]
            smm = [x[2] for x in m]
            avg_smm = sum(smm) / len(smm)
            link = [x[:2] for x in m if x[2] < avg_smm]
            hera_link[[x[0] for x in hera_smm][i]] = link 
            
        adjacency_list = []
        for i in mission:
            edge_list = hera_link[i]
            size = hera[hera['date'] == i].shape[0]
            adjacency = [[0]*size for _ in range(size)]
            for sink, source in edge_list:
                adjacency[sink - 1][source - 1] = 1
            for i in range(size):
                for j in range(size):
                    t = adjacency[i][j]
                    adjacency[j][i] = t
            adjacency_list.append(adjacency)
            
        mat_list = []
        for i in range(len(adjacency_list)):
            mat = adjacency_list[i]
            front_sum = 0
            back_sum = 0
            for j in range(i):
                prev_mat = adjacency_list[j]
                front_sum = front_sum + np.array(prev_mat).shape[0]
            for k in range(i + 1, len(adjacency_list)):
                post_mat = adjacency_list[k]
                back_sum = back_sum + np.array(post_mat).shape[0]
            mat_list.append(np.block([np.zeros((np.array(mat).shape[0], front_sum)), 
                              np.array(mat), np.zeros((np.array(mat).shape[0], back_sum))]))
            
        concat_mat = mat_list[0]
        for i in list(range(1, len(mission))):
            concat_mat = np.concatenate([concat_mat, mat_list[i]])
        
        pd.DataFrame(concat_mat).astype('int').to_csv(f'{smm_name}_{perf_name}_bd_matrix.csv')
        
    except Exception as e:
        print(e)

In [28]:
for i in perf_list:
    for j in smm_list:
        bd_matrix(smm_df = j[0], perf_df = i[0], smm_name = j[1], perf_name = i[1])