In [4]:
import json
import os
from typing import Optional, List, Any, Dict
import numpy as np
from copy import deepcopy

In [5]:
def read_json(fname: str) -> Any:
    """
    Given a filename, reads a json file and returns the data stored inside.

    Input:
        fname (str):
            Name of the file to be read.

    Output:
        data (Any):
            The data loaded from the json file.
    """

    assert os.path.isfile(fname)
    assert fname.endswith(".json")

    with open(fname, "r") as file:
        data = json.load(file)

    return data


def write_json(
    data: Any,
    fname: str,
) -> None:
    """
    Given a data and the filename, writes the data to the specified
    fname.
    If the directory that the specified filename should be in
    does not exist, then it creates the directory first.

    Input:
        data (Any):
            the data that needs to stored in a json format.

        fname (str):
            path to the file where the data needs to be saved.

    Output:
        None
    """

    assert isinstance(fname, str) and fname.endswith(".json")
    splits = fname.split("/")[:-1]
    root_dir = "/".join(splits)
    if not os.path.isdir(root_dir):
        os.makedirs(root_dir, exist_ok=True)

    with open(fname, "w") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)

In [10]:
def generate_probability_list(
    randomize_arm_probabilities: bool,
    num_arms: int,
    difficulty: str,
) -> List[float]:
    if randomize_arm_probabilities:
        epsilon = np.random.uniform(
            low=0.2,
            high=0.4,
            size=None,
        )
        
        max_prob = 0.5 + (epsilon / 2)
        min_prob = 0.5 - (epsilon / 2)
        
        probabilities = np.random.uniform(
            low=0.0,
            high=min_prob,
            size=num_arms,
        ).tolist()
        
        random_index = np.random.randint(
            low=0.0, 
            high=num_arms,
        )
        
        probabilities[random_index] = max_prob
        
    else:
        random_index = np.random.randint(
            low=0.0, 
            high=num_arms,
        )
        
        if difficulty == "easy":
            low_parameter = 0.25
            high_parameter = 0.75
            
        else:
            low_parameter = 0.4
            high_parameter = 0.6
            
        probabilities = [
            low_parameter
            for _ in range(num_arms)
        ]
        probabilities[random_index] = high_parameter
        
        
    return probabilities
    

def generate_new_data(
    data: Dict[str, Any],
    num_new_scenarios: Dict[str, int],
) -> Dict[str, Any]:
    data = deepcopy(data)
    
    for split in ["train", "eval"]:
        data_split = data[split]
        new_data_split = []
        
        for task_scenario in data_split:
            task_description = task_scenario["agent"]
            arm_names_string = deepcopy(task_scenario["env"])
            arm_names = task_scenario["env"].lower().split(",")
            arm_names = [
                arm_names[_index].strip()
                for _index in range(len(arm_names))
            ]
            
            for task_index in range(num_new_scenarios[split]):
                probs = generate_probability_list(
                    randomize_arm_probabilities=(
                        task_index <= num_new_scenarios[split] // 2
                    ),
                    num_arms=len(arm_names),
                    difficulty=(
                        "easy" 
                        if task_index >= num_new_scenarios[split] * 0.75
                        else "hard"
                    ),
                )
                
                new_task_config = {
                    "agent": deepcopy(task_description),
                    "env": {
                        "arm_names_string": arm_names_string,
                        "arm_names": arm_names,
                        "mean_arm_rewards": probs,
                    }
                }
                
                new_data_split.append(new_task_config)
                
        data[split] = new_data_split
        
    return data


def generate_and_save_bandit_bai_data(
    old_data_path: str,
    new_data_path: str,
    num_new_scenarios: Dict[str, int],
) -> None:
    data = read_json(
        fname=old_data_path,
    )
    
    new_data = generate_new_data(
        data=data,
        num_new_scenarios=num_new_scenarios,
    )
    
    write_json(
        data=new_data,
        fname=new_data_path,
    )

In [11]:
generate_and_save_bandit_bai_data(
    old_data_path="/Users/fahimtajwar/academics/exploration/verl/verl/paprika/environments/env_configs/bandit_best_arm_identification_fixed_sampling_budget.json",
    new_data_path="/Users/fahimtajwar/academics/exploration/verl/verl/paprika/environments/env_configs/bandit_best_arm_identification_fixed_sampling_budget_new.json",
    num_new_scenarios={
        "train": 25,
        "eval": 100,
    },
)

In [None]:
def _can_place_ship(self, row, col, size, orientation) -> bool:
    if orientation == "horizontal":
        if col + size > self.grid_size:
                return False
            for c in range(col, col + size):
                if self.board[row][c] != ".":
                    return False
        else:  
            if row + size > self.grid_size:
                return False
            for r in range(row, row + size):
                if self.board[r][col] != ".":
                    return False
        return True