In [1]:
import pandas as pd
import numpy as np
import pickle
import argparse
import os
import random

import torch

from utils import *

In [2]:
def reproducibility(seed: int):
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    random.seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.cuda.manual_seed(seed)
reproducibility(3)

In [3]:
# set device to cpu or cuda
device = torch.device('cpu')

if(torch.cuda.is_available()):
    device = torch.device('cuda:0')
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")

Device set to : NVIDIA TITAN RTX


In [4]:
def args_create():
    # @title Arguments
    parser = argparse.ArgumentParser(description='Actor Critic')

    parser.add_argument('--data', default="/mnt/kerem/CEU", type=str, help='Dataset Path')
    parser.add_argument('--epochs', default=64, type=int, metavar='N', help='Number of epochs for training agent.')
    parser.add_argument('--episodes', default=9, type=int, metavar='N', help='Number of episodes for training agent.')
    parser.add_argument('--lr', '--learning-rate', default=0.005, type=float, metavar='LR', help='initial learning rate', dest='lr')
    parser.add_argument('--wd', default=0.0001, type=float, help='Weight decay for training optimizer')
    parser.add_argument('--seed', default=3, type=int, help='Seed for reproducibility')
    parser.add_argument('--model-name', default="PPO", type=str, help='Model name for saving model.')
    parser.add_argument('--gamma', default=0.99, type=float, metavar='N', help='The discount factor as mentioned in the previous section')

    # Model
    parser.add_argument("--latent1", default=256, required=False, help="Latent Space Size for first layer of network.")
    parser.add_argument("--latent2", default=256, required=False, help="Latent Space Size for second layer of network.")

    # Env Properties
    parser.add_argument('--a_control_size', default=50, type=int, help='Attack Control group size')
    parser.add_argument('--b_control_size', default=50, type=int, help='Beacon Control group size')
    parser.add_argument('--gene_size', default=100, type=int, help='States gene size')
    parser.add_argument('--beacon_size', default=10, type=int, help='Beacon population size')
    parser.add_argument('--victim_prob', default=1, type=float, help='Victim inside beacon or not!')
    parser.add_argument('--max_queries', default=5, type=int, help='Maximum queries per episode')


    parser.add_argument('--attacker_type', default="agent", choices=["random", "optimal", "agent"], type=str, help='Type of the attacker')
    parser.add_argument('--beacon_type', default="truth", choices=["random", "agent", "truth"], type=str, help='Type of the beacon')


    parser.add_argument('--pop_reset_freq', default=100000000, type=int, help='Reset Population Frequency (Epochs)')
    parser.add_argument('--plot-freq', default=1, type=int, metavar='N', help='Plot Frequencies')
    parser.add_argument('--val-freq', default=20, type=int, metavar='N', help='Validation frequencies')
    parser.add_argument('--control-lrts', default=None, type=str, help='Control groups LRTS path')

    # utils
    parser.add_argument('--resume', default="", type=str, metavar='PATH', help='path to latest checkpoint (default: none)')
    parser.add_argument('--results-dir', default='./results/simulation', type=str, metavar='PATH', help='path to cache (default: none)')

    # args = parser.parse_args()  # running in command line
    args = parser.parse_args('')  # running in ipynb

    args.results_dir = os.path.join(args.results_dir, "run"+str(len(os.listdir(args.results_dir))))
    os.makedirs(args.results_dir)
    os.makedirs(args.results_dir+"/logs")
    os.makedirs(args.results_dir+"/rewards")
    os.makedirs(args.results_dir+"/indrewards")
    os.makedirs(args.results_dir+"/actions")
    os.makedirs(args.results_dir+"/pvalues")

    args.device = device

    print(args)
    return args

# args = args_create()

In [5]:
# CEU Beacon - it contains 164 people in total which we will divide into groups to experiment
beacon = pd.read_csv(os.path.join("/mnt/kerem/CEU", "Beacon_164.txt"), index_col=0, delim_whitespace=True)
# Reference genome, i.e. the genome that has no SNPs, all major allele pairs for each position
reference = pickle.load(open(os.path.join("/mnt/kerem/CEU", "reference.pickle"),"rb"))
# Binary representation of the beacon; 0: no SNP (i.e. no mutation) 1: SNP (i.e. mutation)
binary = np.logical_and(beacon.values != reference, beacon.values != "NN").astype(int)

In [6]:
# Table that contains MAF (minor allele frequency) values for each position. 
maf = pd.read_csv(os.path.join("/mnt/kerem/CEU", "MAF.txt"), index_col=0, delim_whitespace=True)
maf.rename(columns = {'referenceAllele':'major', 'referenceAlleleFrequency':'major_freq', 
                      'otherAllele':'minor', 'otherAlleleFrequency':'minor_freq'}, inplace = True)
maf["maf"] = np.round(maf["maf"].values, 3)
# Same variable with sorted maf values
sorted_maf = maf.sort_values(by='maf')
# Extracting column to an array for future use
maf_values = maf["maf"].values

binary = binary.T
binary.shape #(164, 4029840)

(164, 4029840)

In [7]:
beacon.shape, reference.shape, binary.shape, maf_values.shape

((4029840, 164), (4029840, 1), (164, 4029840), (4029840,))

In [11]:
from ppo import PPO
%load_ext autoreload
%autoreload 2

from env import Env

args=args_create()
env = Env(args, beacon, maf_values, binary)
 
################ PPO hyperparameters ################
K_epochs = 300         # update policy for K epochs
eps_clip = 0.1           # clip parameter for PPO
gamma = 0.99                # discount factor

lr_actor = 0.0001      # learning rate for actor network
lr_critic = 0.0001        # learning rate for critic network

i_episode = 0

if args.attacker_type == "agent":
    attacker_state_dim = 400
    attacker_action_dim = 10

    attacker_agent = PPO(attacker_state_dim, attacker_action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, False, None)
    attacker_agent.load("/mnt/sobhan/Beacons/agents/attacker1/PPO_0.pth")

if args.attacker_type == "agent":
    while i_episode <= args.episodes:
        for t in range(1, args.max_queries+1):
            _, rewards, done, _  = env.step(attacker_agent=attacker_agent)
            if done:
                break
        print("Victim: {} \t Current Episode Reward : {}".format(env.victim_id, rewards[1]))
        env.reset()

else:
    for t in range(1, args.max_queries+1):
        _, rewards, done, _  = env.step()
        if done:
            break

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Namespace(data='/mnt/kerem/CEU', epochs=64, episodes=9, lr=0.005, wd=0.0001, seed=3, model_name='PPO', gamma=0.99, latent1=256, latent2=256, a_control_size=50, b_control_size=50, gene_size=100, beacon_size=10, victim_prob=1, max_queries=5, attacker_type='agent', beacon_type='truth', pop_reset_freq=100000000, plot_freq=1, val_freq=20, control_lrts=None, resume='', results_dir='./results/simulation/run4', device=device(type='cuda', index=0))
./results/simulation/run4/logs/beacon_log.csv
./results/simulation/run4/logs/beacon_control_log.csv
./results/simulation/run4/logs/attacker_log.csv
./results/simulation/run4/logs/attacker_control_log.csv
-------------------------
Attacker Action:  7
Attacker Action:  0
⛔⛔⛔ Attacker Identified the VICTIM ⛔⛔⛔
Victim: 0 	 Current Episode Reward : 39
-------------------------
Attacker Action:  0
Attacker Action:  3
⛔⛔⛔ Attacker Identified the VICTIM ⛔⛔⛔
Victim: 1 	 Cu

IndexError: index 11 is out of bounds for axis 0 with size 11

In [None]:
    has_continuous_action_space = True                

action_std = 0.4             # starting std for action distribution (Multivariate Normal)
action_std_decay_rate = 0.0025       # linearly decay action_std (action_std = action_std - action_std_decay_rate)
min_action_std = 0.05                # minimum action_std (stop decay after action_std <= min_action_std)
action_std_decay_freq = int(2.5e5)

################ PPO hyperparameters ################
K_epochs = 200          # update policy for K epochs
eps_clip = 0.2              # clip parameter for PPO
gamma = 0.99                # discount factor

lr_actor = 0.0003       # learning rate for actor network
lr_critic = 0.001       # learning rate for critic network

random_seed = 0      

In [None]:
%load_ext autoreload
%autoreload 2

from ppo import PPO
from environment import BeaconEnv

args = args_create()

print("\n=============================================\n")
print("Start Simulation Using Optimal Attacker")

state_dim = 7
action_dim = env.action_space.shape[0]
ppo_agent = PPO(state_dim, action_dim, lr_actor, lr_critic, gamma, K_epochs, eps_clip, has_continuous_action_space, action_std)
ppo_agent.load("/mnt/sobhan/Beaconsv2/results/train/run14/weights/PPO_1.pth")


env = BeaconEnv(args, beacon, maf_values, binary)
state = env.reset()[1]
total_reward = 0
current_ureward = 0
current_preward = 0
privacy_rewards = []
utility_rewards = []
total_rewards = []
lrt_values_list = []


done = False
while not done:
    # print("\n=============================================\n")

    # state = torch.flatten(state)
    with torch.no_grad():
        action, _, _ = ppo_agent.policy_old.act(torch.as_tensor(state).float().to(args.device))

    # print("Beacon Action: ", action)

    action = action.squeeze().item()
    state, reward, done, rewards = env.step([action])

    total_reward += reward
    current_preward += rewards[0]
    current_ureward += rewards[1]

    # print("Current Privacy Reward: {}\nCurrent Utility Reward: {}\nThis Episode Reward: {}\nTotal Reward: {}".format(rewards[0], rewards[1], reward, total_reward))
    total_rewards.append(total_reward)
    utility_rewards.append(current_ureward)
    privacy_rewards.append(current_preward)
    # lrt_values_list.append(lrt_values)

# plot_lists(utility_rewards, args.results_dir, 'utilities', 0)
print(f"Validation completed, total reward")
# print("\n=============================================\n")