In [1]:
import tarfile

tar = tarfile.open('comm-f2f-Resistance-network.tar.gz', "r:gz")
tar.extractall()
tar.close()

Code taken from the README file

In [45]:
import numpy as np
import pandas as pd

src = './network' # root dir of data
meta = pd.read_csv('network_list.csv')
roles = pd.read_csv('node_role.csv')

def loadGame(game):
    # N is the number of players in the game
    # load csv data
    # unweighted and weighted networks can be loaded exactly the same way
    # below shows the loader for weighted networks
    
    df_network = pd.read_csv(f"{src}/network{game}.csv", index_col=0)
    # for dense repr
    #df_network = pd.read_csv(f"{src}/network{game}_weighted.csv", index_col=0)
    
    # get meta NUMBER_OF_PARTICIPANTS by game NETWORK id
    N = meta.loc[game, "NUMBER_OF_PARTICIPANTS"]

    # T is number of timestamps (10 frames)
    T = len(df_network)
    # load VFOA network to T x N x (N+1) array
    # vfoa[t, n, i] is the probability of player n+1 looking at object i at time t
    # i: 0 - laptop, 1 - player 1, 2 - player 2, ..., N - player N
    vfoa = np.reshape(df_network.values, (T, N, N + 1))

    # print information
    print(f"network id:{game}\t length(x 1/3 second): {T}\t num of players: {N}")
    return vfoa

Flatten the adjacency matrix so that each row represents the gaze vector of the user.  
Convert this into the wikipedia format and save as CSV 

In [7]:
import os

# create `network_processed` directory if it doesn't exist
if not os.path.exists("network_processed"):
    os.makedirs("network_processed")


In [101]:
for _, meta_row in meta.iterrows():
    game_id = meta_row['NETWORK']
    if len(roles[roles['ID'] == game_id]) == 0:
        print(f"No roles for game {game_id}")
        continue
        
    game = loadGame(game_id)
    
    records = []

    t, n1, n2 = game.shape
    #game = game.reshape(t*n1, n2)
    
    records = []
    # this is 0-indexed while nodes are 1-indexed
    

    game_roles = {i: roles[
                (roles["node"] == f"P{i+1}") & (roles["ID"] == game_id)
            ]["role"].values[0] for i in range(n1)}

    for timestep, frame in enumerate(game):
        #print(f"timestep {timestep} {frame}")

        # get max index of each row
        max_index = np.argmax(frame, axis=1)
        for i, item_id in enumerate(max_index):
            # get the role of user i in game game_id
            role = game_roles[i] 

            # record: user_id, item_id, timestamp, state_label, (empty) feature
            records.append([i + 1, item_id, timestep, role, 0])

    
    game_df = pd.DataFrame(records, columns=['user_id', 'item_id', 'timestamp', 'state_label', 'comma_separated_list_of_features'])
    game_df.to_csv(f"./network_processed/network_{meta_row['NETWORK']}.csv", index=False)

network id:0	 length(x 1/3 second): 7323	 num of players: 7
No roles for game 1
network id:2	 length(x 1/3 second): 7533	 num of players: 7
network id:3	 length(x 1/3 second): 6264	 num of players: 7
network id:4	 length(x 1/3 second): 7323	 num of players: 8
network id:5	 length(x 1/3 second): 9651	 num of players: 8
network id:6	 length(x 1/3 second): 7260	 num of players: 7
network id:7	 length(x 1/3 second): 7245	 num of players: 6
network id:8	 length(x 1/3 second): 6243	 num of players: 7
No roles for game 9
network id:10	 length(x 1/3 second): 6681	 num of players: 7
network id:11	 length(x 1/3 second): 5415	 num of players: 7
network id:12	 length(x 1/3 second): 5571	 num of players: 8
network id:13	 length(x 1/3 second): 7173	 num of players: 7
network id:14	 length(x 1/3 second): 9429	 num of players: 8
network id:15	 length(x 1/3 second): 5466	 num of players: 6
network id:16	 length(x 1/3 second): 6678	 num of players: 6
network id:17	 length(x 1/3 second): 3054	 num of pla

In [90]:
game_id = 0

records = []
# this is 0-indexed while nodes are 1-indexed
game_roles = {i: roles[
            (roles["node"] == f"P{i+1}") & (roles["ID"] == game_id)
        ]["role"].values[0] for i in range(n1)}

for timestep, frame in enumerate(game):
    #print(f"timestep {timestep} {frame}")
    
    # get max index of each row
    max_index = np.argmax(frame, axis=1)
    for i, item_id in enumerate(max_index):
        # get the role of user i in game game_id
        role = game_roles[i] 
        
        # record: user_id, item_id, timestamp, state_label, (empty) feature
        records.append([i + 1, item_id, timestep, role, 0])


{0: 1, 1: 1, 2: 2, 3: 1, 4: 2, 5: 1, 6: 2}


In [93]:
records[2]

[3, 7, 0, 2, 0]

In [61]:
print(i+1, meta_row['NETWORK'])

8 0


In [69]:
game = loadGame(meta_row['NETWORK'])
for timestep, frame in enumerate(game):
    print (f"timestep {timestep} {frame}")


network id:0	 length(x 1/3 second): 7323	 num of players: 7
timestep 0 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 1 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0 0]]
timestep 2 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]]
timestep 3 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0]]
timestep 4 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 5 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 6 [[0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [

timestep 3241 [[0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 3242 [[0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 1 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 3243 [[0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 3244 [[0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0 0]]
timestep 3245 [[0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0]]
timestep 3246 [[0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]]
timestep 3247 [[0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0

timestep 6874 [[0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 0]]
timestep 6875 [[0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]]
timestep 6876 [[0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]]
timestep 6877 [[0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 1 0 0 0 0]
 [0 1 0 0 0 0 0 0]]
timestep 6878 [[0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 1 0 0]
 [1 0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0 0]
 [1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0]]
timestep 6879 [[0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1]
 [0 1 0 0 0 0 0 0]]
timestep 6880 [[1 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 1 0]
 [0 0 0 1 0 0 0 0]
 [0

In [68]:
t, n1, n2 = game.shape
game = game.reshape(t*n1, n2)
game.shape

(51261, 8)

In [63]:
roles[roles['ID'] == 8]

Unnamed: 0,ID,node,role
50,8,P1,2
51,8,P2,1
52,8,P3,2
53,8,P4,2
54,8,P5,1
55,8,P6,1
56,8,P7,1


In [38]:
# open wikipedia.csv as a dataframe
df = pd.read_csv(
    "tgn/data/wikipedia.csv",
    skiprows=1,
    header=None,
    #names=["user_id", "item_id", "timestamp", "state_label", "features"],
)



In [40]:
df.shape
df.dtypes
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,166,167,168,169,170,171,172,173,174,175
0,0,0,0.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
1,1,1,36.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
2,1,1,77.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
3,2,2,131.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
4,1,1,150.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157469,2003,632,2678155.0,0,0.090092,0.114777,0.810485,0.439210,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
157470,3762,798,2678158.0,0,-0.155317,-0.154974,1.263928,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
157471,2399,495,2678293.0,0,0.273444,0.300811,1.122434,-0.044391,0.0,-0.019095,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
157472,7479,920,2678333.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775


In [43]:
# name the first columns user_id, item_id, timestamp, state_label, features
df.columns = ["user_id", "item_id", "timestamp", "state_label"] + [f"feature {i}" for i in range(len(df.columns) - 4)]

df.head()

Unnamed: 0,user_id,item_id,timestamp,state_label,feature 0,feature 1,feature 2,feature 3,feature 4,feature 5,...,feature 162,feature 163,feature 164,feature 165,feature 166,feature 167,feature 168,feature 169,feature 170,feature 171
0,0,0,0.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
1,1,1,36.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
2,1,1,77.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
3,2,2,131.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
4,1,1,150.0,0,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775


In [44]:
df[df['state_label']!=0]

Unnamed: 0,user_id,item_id,timestamp,state_label,feature 0,feature 1,feature 2,feature 3,feature 4,feature 5,...,feature 162,feature 163,feature 164,feature 165,feature 166,feature 167,feature 168,feature 169,feature 170,feature 171
87,32,32,2053.0,1,-0.050948,-0.055755,1.187067,-0.381926,0.0,1.558807,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
701,137,114,17601.0,1,-0.141213,-0.142571,1.191367,-0.381926,0.0,0.883317,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
2571,515,196,68384.0,1,0.166254,0.158185,1.186609,-0.234370,0.0,1.468834,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
3773,688,364,95206.0,1,-0.163779,-0.164275,1.075269,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
3801,689,364,96027.0,1,-0.110184,-0.114666,1.054307,-0.381926,0.0,1.833225,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153100,8060,379,2576440.0,1,0.578090,0.489946,1.024585,-0.314755,0.0,3.197122,...,30.711725,22.931742,14.952294,30.578668,41.007183,3.636669,-0.044674,13.150104,16.955887,-0.038775
153442,7538,277,2583015.0,1,-0.115826,-0.117767,1.235343,-0.381926,0.0,1.159654,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
153459,8076,941,2583445.0,1,0.016751,0.015558,0.666964,0.748624,0.0,1.081559,...,17.794824,14.557993,44.993154,13.556722,6.259387,14.732911,-0.044674,13.150104,-0.041448,-0.038775
154390,8108,442,2606859.0,1,-0.175063,-0.176678,-0.937091,-0.381926,0.0,-0.636535,...,-0.090115,-0.096068,-0.068136,-0.060835,-0.058394,-0.062079,-0.044674,-0.050464,-0.041448,-0.038775
