In [None]:
# This notebook shows how to run storage experiment

In [None]:
import os
os.environ["ANSIBLE_USER"] = "user"
os.environ["ANSIBLE_PORT"] = "8887"
os.environ["ANSIBLE_SSH_PASS"] = "yourpassword"
os.environ["ANSIBLE_BECOME_PASS"] = "yourpassword"
os.environ["ANSIBLE_USE_SUDO"] = 'true'

In [None]:
# Important: you need sudo access on your machine

In [None]:
import glob
import subprocess
import pandas as pd

In [None]:
devices = [
    {
        "name": "mem",
        "latency": 0,
        "throughput": 19000
    },
    {
        "name": "sclass",
        "latency": 7000,
        "throughput": 19000
    },
    {
        "name": "nvme",
        "latency": 50000,
        "throughput": 5000
    },
    {
        "name": "ssd",
        "latency": 250000,
        "throughput": 550
    },
    {
        "name": "io2",
        "latency": 1000000,
        "throughput": 4000
    },
    {
        "name": "hdd",
        "latency": 6000000,
        "throughput": 140
    }
]

## Routines needed

In [None]:
from tqdm import tqdm

def attach_path(df):
    mapper = {}
    for index, row in tqdm(df.iterrows()):
        if row['syscall'] == 'openat':
            ret_int = int(row['ret_int'])
            fpath = row['args'].split(',')[1]
            mapper[ret_int] = fpath

        elif row['syscall'] == 'close':
            arg = int(row['args'])
            if arg in mapper:
                del mapper[arg]

        elif row['syscall'] in ['pwrite64', 'pread64', 'read', 'write']:
            fd = int(row['args'].split(',')[0])
            if fd in mapper and './' in mapper[fd] and not '..' in mapper[fd]:
                # Modify the DataFrame directly using .loc
                df.loc[index, 'filepath'] = mapper[fd]

    return df

def strace_df_from_txt(filename):
    command = "/mnt/sdc/gleb/miniconda3/bin/strace2csv"
    arguments = [command, "--out", f'{filename}.csv', "--verbose", "2", f'{filename}']
    
    try:
        result = subprocess.run(
            arguments,
            check=True,
            stdout=subprocess.PIPE,  # Capture stdout if needed
            stderr=subprocess.PIPE   # Capture stderr to inspect errors
        )
        print(result.stdout.decode())  # Optional: Print stdout for debugging
    except subprocess.CalledProcessError as e:
        # Log or print the error message
        print(f"Command failed with return code {e.returncode}")
        print(f"Error message:\n{e.stderr.decode()}")
        raise  # Re-raise the exception to handle it outside

    return pd.read_csv(f'{filename}.csv')

## Join Experiment

In [None]:
from yardstick_benchmark.provisioning import SSH, VirtualStorage 
from yardstick_benchmark.monitoring import Telegraf, DiskStat
from yardstick_benchmark.games.minecraft.server import PaperMC
from yardstick_benchmark.games.minecraft.workload.trace_generator import generate_join
from yardstick_benchmark.games.minecraft.workload.world_setup import WorldSetup
from yardstick_benchmark.games.minecraft.workload import TraceBased

import yardstick_benchmark
from time import sleep
from datetime import datetime
from pathlib import Path
import os
import shutil
import time
from datetime import timedelta

def players_join(output_dir, player_count, joins_per_second, sample_count, latency, throughput, trace='', world_dir=''):
    experiment_metadata = {}
    
    wd = Path("/home/user/wd")
    ssh = SSH()

    dest = Path(output_dir)

    os.makedirs(output_dir, exist_ok=True)
    if not trace:
        trace = os.path.join(dest, 'trace.txt') 
        generate_join(player_count, joins_per_second, 1, trace)
    
    nodes = ssh.provision(["localhost"], wd, use_sudo=True)
    
    try:
        vs = VirtualStorage(nodes, latency, throughput)
        vs.deploy()
        vs.start()
        
        papermc = PaperMC(nodes, use_strace=True)
        papermc.deploy()

        papermc.start()
        
        ws = WorldSetup(nodes, player_count, 10000)
        ws.deploy()
        ws.start()

        papermc.stop()
        
        duration = (player_count / joins_per_second) + player_count * 3

        telegraf = Telegraf(nodes)
        telegraf.add_input_jolokia_agent(nodes[0])
        telegraf.add_input_execd_minecraft_ticks(nodes[0])
        res = telegraf.deploy()
        
        trace_based = TraceBased(nodes, 'localhost', trace, timedelta(seconds=duration))
        trace_based.deploy()

        for sample in range(sample_count):
            sample_dir = os.path.join(output_dir, str(sample))
            os.makedirs(sample_dir, exist_ok=True)
            
            sample_dest = Path(sample_dir)
            
            experiment_metadata['start_start'] = time.time()
            papermc.start()
            experiment_metadata['start_end'] = time.time()
    
            telegraf.start()
    
            experiment_metadata['start_workload'] = time.time()
            trace_based.start()
            experiment_metadata['end_workload'] = time.time()
    
            experiment_metadata['end_start'] = time.time()
            papermc.stop()
            experiment_metadata['end_end'] = time.time()
    
            telegraf.stop()
            telegraf.cleanup()
        
        yardstick_benchmark.fetch(dest, nodes)
        yardstick_benchmark.unarchive(output_dir, output_dir)
        
        vs.cleanup()
    finally:
        pass
        
    return experiment_metadata

In [1]:
dest_folder = "put in where you want the data to be saved"

In [None]:
for player_count in player_counts:
    for device in devices:
        exec_folder = os.path.join(dest_folder, str(player_count), device['name'])
        players_join(exec_folder, player_count, player_count, 1, device['latency'], device['throughput'])

        raw_data_file = glob(f"{exec_folder}/**/fsysstrace.log", recursive=True)[0]
        df = strace_df_from_txt(raw_data_file)
        df = attach_path(df)
        df.to_csv(os.path.join(exec_folder, "fsysstrace.log"))