In [1]:
import pandas as pd
import numpy as np
import warnings
import pickle
import argparse
import os
import copy

import torch
from gym import Env
from gym import spaces

warnings.filterwarnings('ignore')
np.set_printoptions(suppress=True, formatter={'float': lambda x: "{0:0.5f}".format(x)})
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:75% !important; }</style>"))

# Arguments

In [3]:
# @title Arguments
parser = argparse.ArgumentParser(description='Actor Critic')

parser.add_argument('--epochs', default=300, type=int, metavar='N', help='Number of epochs for training agent.')
parser.add_argument('--episodes', default=100, type=int, metavar='N', help='Number of episodes for training agent.')
parser.add_argument('--lr', '--learning-rate', default=0.005, type=float, metavar='LR', help='initial learning rate', dest='lr')
parser.add_argument('--wd', default=0.0001, type=float, help='Weight decay for training optimizer')
parser.add_argument('--seed', default=3, type=int, help='Seed for reproducibility')
parser.add_argument('--model-name', default="PPO", type=str, help='Model name for saving model.')
parser.add_argument('--gamma', default=0.99, type=float, metavar='N', help='The discount factor as mentioned in the previous section')

# Model
parser.add_argument("--latent1", default=256, required=False, help="Latent Space Size for first layer of network.")
parser.add_argument("--latent2", default=256, required=False, help="Latent Space Size for second layer of network.")

# Env Properties
parser.add_argument('--control_size', default=20, type=int, help='Beacon and Attacker Control group size')
parser.add_argument('--gene_size', default=100, type=int, help='States gene size')
parser.add_argument('--beacon_size', default=60, type=int, help='Beacon population size')
parser.add_argument('--victim_prob', default=0.8, type=float, help='Victim inside beacon or not!')
parser.add_argument('--pop_reset_freq', default=10, type=int, help='Reset Population Frequency (Epochs)')
parser.add_argument('--max_queries', default=10, type=int, help='Maximum queries per episode')


parser.add_argument("--state_dim", default=(4,), required=False, help="State Dimension")
parser.add_argument("--n-actions", default=1, required=False, help="Actions Count for each state")


# utils
parser.add_argument('--resume', default="", type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
parser.add_argument('--save-dir', default='', type=str, metavar='PATH', help='path to cache (default: none)')

# args = parser.parse_args()  # running in command line
args = parser.parse_args('')  # running in ipynb

# set command line arguments here when running in ipynb
if args.save_dir == '':
    args.save_dir = "./"

args.results_dir = args.save_dir

args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(args)

Namespace(epochs=300, episodes=100, lr=0.005, wd=0.0001, seed=3, model_name='PPO', gamma=0.99, latent1=256, latent2=256, control_size=20, gene_size=100, beacon_size=60, victim_prob=0.8, pop_reset_freq=10, max_queries=10, state_dim=(4,), n_actions=1, resume='', save_dir='./', results_dir='./', device=device(type='cuda'))


# Read Data

In [4]:
mainPath = "/mnt/kerem/CEU"

In [5]:
# CEU Beacon - it contains 164 people in total which we will divide into groups to experiment
beacon = pd.read_csv(os.path.join(mainPath, "Beacon_164.txt"), index_col=0, delim_whitespace=True)
# Reference genome, i.e. the genome that has no SNPs, all major allele pairs for each position
reference = pickle.load(open(os.path.join(mainPath, "reference.pickle"),"rb"))
# Binary representation of the beacon; 0: no SNP (i.e. no mutation) 1: SNP (i.e. mutation)
binary = np.logical_and(beacon.values != reference, beacon.values != "NN").astype(int)

In [6]:
# Table that contains MAF (minor allele frequency) values for each position. 
maf = pd.read_csv(os.path.join(mainPath, "MAF.txt"), index_col=0, delim_whitespace=True)
maf.rename(columns = {'referenceAllele':'major', 'referenceAlleleFrequency':'major_freq', 
                      'otherAllele':'minor', 'otherAlleleFrequency':'minor_freq'}, inplace = True)
maf["maf"] = np.round(maf["maf"].values, 3)
# Same variable with sorted maf values
sorted_maf = maf.sort_values(by='maf')
# Extracting column to an array for future use
maf_values = maf["maf"].values

In [7]:
beacon.shape, reference.shape, binary.shape, maf_values.shape

((4029840, 164), (4029840, 1), (4029840, 164), (4029840,))

# Environment

In [9]:
class BeaconEnv(Env):
    def __init__(self, args, beacon, maf):
        super(BeaconEnv, self).__init__()

        self.beacon=beacon
        self.maf=maf
        self.args=copy.copy(args)
        self.init_beacon_size = args.beacon_size

        # Randomly set populations and genes
        self.s_beacon, self.s_control, self.a_control, self.victim, self.mafs = self.get_populations()
        self.reset_counter = 0

        # Initialize the agents
        self._init_attacker()
        self._init_beacon()
        
        print(self.attacker_state.size())
        print(self.beacon_state.size())


        # Define action and observation space
        self.action_space = spaces.Box(low=0, high=1, shape=(1,))  # Beacon lies or not
        self.observation_space = spaces.Box(shape=(args.beacon_size, args.gene_size, 3))  # State: [Beacon Size, Gene size, 3(SNP, MAF, RES)]
        self.max_steps = args.max_queries  # Maximum number of steps per episode
        self.current_step = 0

    # Reset the environment after an episode
    def reset(self) -> torch.Tensor:
        self.reset_counter+=1
        if self.reset_counter==self.args.pop_reset_freq:
            self._reset_populations()

        # Reset the states of our agents
        self._init_attacker()
        self._init_beacon()
        
        self.current_step = 0
        return self.attacker_state, self.beacon_state

    def step(self, action): 

        # TODO: Change the states of agent according to the action 

        observation = "NEXT STATE"

        # Change the res of the asked gene to 1 in the state of beacon



        # TODO: Calculate rewards for agent (Beacon)

        reward = "REWARD"

        # Calculate the lrt for individuals in the beacon and find the min 
        # reward = min(lrt) 



        # TODO: Also add the identification : if attacker can identify the member then game will be done
        self.current_step += 1
        if self.current_step >= self.max_steps:
            done = True

        
        return observation, reward, done, {}

    def _reset_populations(self)->None:
        self.s_beacon, self.s_control, self.a_control, self.victim, self.mafs = self.get_populations()

            # Initilize the attacker states
    def _init_attacker(self)->None:
        self.attacker_state = torch.tensor([self.victim, [0]*len(self.victim), self.mafs], dtype=torch.float32).transpose(0, 1)
        total_snps = self.attacker_state[:, 0].sum().item()
        print("There are {} SNPs".format(total_snps))

    # Initilize the beacon states
    def _init_beacon(self)->None:
        temp_maf = torch.Tensor(self.mafs).unsqueeze(0).expand(self.args.beacon_size, -1)
        responses = torch.zeros(size=(self.args.beacon_size, self.args.gene_size))
        print(temp_maf.size(), responses.size(), torch.Tensor(self.s_beacon.T).size())
        self.beacon_state = torch.stack([torch.Tensor(self.s_beacon.T), temp_maf, responses], dim=-1)

    # Defining the populations and genes randomly
    def get_populations(self):
        self.args.beacon_size = self.init_beacon_size # Handling the one extra victim in the beacon
        # Prepare index arrays for future use
        genes = np.random.permutation(self.beacon.shape[0])[:self.args.gene_size] # Randomly select gene indexes
        shuffled = np.random.permutation(self.beacon.shape[1]) # Randomly select population indexes


        # Difine different groups of people
        victim_ind = shuffled[0]
        a_cind = shuffled[1:1+self.args.control_size]
        s_cind = shuffled[41:61]
        s_ind = shuffled[80:140]


        if np.random.random() < self.args.victim_prob:
            print("Victim is inside the Beacon!")
            s_ind = np.append(s_ind, victim_ind)
            s_beacon = binary[:, s_ind][genes, :] # Victim inside beacon
            self.args.beacon_size += 1 # Handling the one extra victim in the beacon
        else: 
            print("Victim is NOT inside the Beacon!")
            s_beacon = binary[:, s_ind][genes, :]

        a_control = binary[:, a_cind][genes, :]
        s_control = binary[:, s_cind][genes, :]
        victim = binary[:, victim_ind][genes]
        return s_beacon, s_control, a_control, victim, self.maf[genes]


SyntaxError: unmatched ')' (710542715.py, line 8)