# Functions to create CSV files of different treatments:

In [4]:
import csv
import pandas as pd
import random
import numpy as np
from itertools import product
from itertools import combinations
from pyDOE2 import *
from collections import defaultdict

### Creating a CSV combinatorially (without packet loss):

In [2]:
def create_csv(up_low, up_high, up_step, down_low, down_high, down_step, rtt_low, rtt_high, rtt_step):
    
    # creating values from range and stepsize
    up_vals = []
    for i in range(up_low, up_high+1, up_step):
        up_vals.append(i * 1000) #converting Mbps into Kbps
    
    down_vals = []
    for j in range(down_low, down_high+1, down_step):
        down_vals.append(j * 1000) #converting Mbps into Kbps
    
    rtt_vals = []
    for k in range(rtt_low, rtt_high+1, rtt_step):
        rtt_vals.append(k)
    
    # creating combinations
    combos = []
    for i in up_vals:
        for j in down_vals:
            for k in rtt_vals:
                combo = [i, j, k]
                combos.append(combo)
     
    # adding a treatment number column
    for treatment in range(len(combos)):
        line = [treatment + 1] + combos[treatment]
        combos[treatment] = line
    
    # writing out to a CSV file
    with open("test_combos.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["treatment no.", "upload (kbps)", "download (kbps)", "latency (ms)"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos.csv' ----- "

In [None]:
up_low = 0 # Enter in Mbps
up_high = 200 # Enter in Mbps
up_step = 100 # Enter in Mbps
down_low = 0 # Enter in Mbps
down_high = 200 # Enter in Mbps
down_step = 100 # Enter in Mbps
rtt_low = 20 # Enter in ms
rtt_high = 120 # Enter in ms
rtt_step = 20 # Enter in ms

create_csv(up_low, up_high, up_step, down_low, down_high, down_step, rtt_low, rtt_high, rtt_step)

### Creating a CSV combinatorially (with packet loss):

In [None]:
def create_csv_with_packet_loss(up_low, up_high, up_step, down_low, down_high, down_step, rtt_low, rtt_high, rtt_step, pack_low, pack_high, pack_step):
    
    # creating values from range and stepsize
    up_vals = []
    for w in range(up_low, up_high+1, up_step):
        up_vals.append(w * 1000) #converting Mbps into Kbps
    
    down_vals = []
    for x in range(down_low, down_high+1, down_step):
        down_vals.append(x * 1000) #converting Mbps into Kbps
    
    rtt_vals = []
    for y in range(rtt_low, rtt_high+1, rtt_step):
        rtt_vals.append(y)
        
    pack_vals = []
    for z in range(pack_low, pack_high+1, pack_step):
        pack_vals.append(z)
    
    # creating combinations
    combos = []
    for w in up_vals:
        for x in down_vals:
            for y in rtt_vals:
                for z in pack_vals:
                    combo = [w, x, y, z]
                    combos.append(combo)
    
    # adding a treatment number column
    for treatment in range(len(combos)):
        line = [treatment + 1] + combos[treatment]
        combos[treatment] = line
    
    # writing out to a CSV file
    with open("test_combos.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["treatment no.", "upload (kbps)", "download (kbps)", "latency (ms)", "packet loss (%)"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos.csv' ----- "

In [None]:
up_low = 0 # Enter in Mbps
up_high = 200 # Enter in Mbps
up_step = 100 # Enter in Mbps
down_low = 0 # Enter in Mbps
down_high = 200 # Enter in Mbps
down_step = 100 # Enter in Mbps
rtt_low = 20 # Enter in ms
rtt_high = 120 # Enter in ms
rtt_step = 20 # Enter in ms
pack_low = 0 # Enter as percentage
pack_high = 20 # Enter as percentage
pack_step = 5 # Enter as percentage

create_csv_with_packet_loss(up_low, up_high, up_step, down_low, down_high, down_step, rtt_low, rtt_high, rtt_step, pack_low, pack_high, pack_step)

### Same thing except defining values for each varible instead of range and stepsize (without packet loss):

In [2]:
def create_csv_specific(up_vals, down_vals, rtt_vals):
    
    # converting Mbps -> Kbps
    for i in range(len(up_vals)):
        up_vals[i] = up_vals[i] * 1000 
        
    for j in range(len(down_vals)):
        down_vals[j] = down_vals[j] * 1000 
    
    # creating combinations
    combos = []
    for i1 in up_vals:
        for j1 in down_vals:
            for k1 in rtt_vals:
                for i2 in up_vals:
                    for j2 in down_vals:
                        for k2 in rtt_vals:
                            combo = [i1, j1, k1, i2, j2, k2]
                            combos.append(combo)
    
    # adding a treatment number column
    for treatment in range(len(combos)):
        line = [treatment + 1] + combos[treatment]
        combos[treatment] = line
    
    # writing out to a CSV file
    with open("test_combos.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["treatment no", "e - upload (kbps)", "e - download (kbps)", "e - latency (ms)", "a - upload (kbps)", "a - download (kbps)", "a - latency (ms)"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos.csv' ----- "

In [3]:
up_vals = [1, 20, 50] # Enter in Mbps
down_vals = [10, 50, 100] # Enter in Mbps
rtt_vals = [20, 350, 700] # Enter in ms


create_csv_specific(up_vals, down_vals, rtt_vals)

" ----- 729 treatments successfully created in file 'test_combos.csv' ----- "

### Same thing except defining values for each varible instead of range and stepsize (with packet loss):

In [28]:
def create_csv_specific_with_packet_loss(up_vals, down_vals, rtt_vals, pack_vals):
    
    # converting Mbps -> Kbps
    for i in range(len(up_vals)):
        up_vals[i] = up_vals[i] * 1000 
        
    for j in range(len(down_vals)):
        down_vals[j] = down_vals[j] * 1000 
    
    # creating combinations
    combos = []
    for w in up_vals:
        for x in down_vals:
            for y in rtt_vals:
                for z in pack_vals:
                    combo = [w, x, y, z]
                    combos.append(combo)
    
    # adding a treatment number column
    for treatment in range(len(combos)):
        line = [treatment + 1] + combos[treatment]
        combos[treatment] = line
    
    # writing out to a CSV file
    with open("test_combos_aadya.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["treatment no.", "upload (kbps)", "download (kbps)", "latency (ms)", "packet loss (%)"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos.csv' ----- "

In [None]:
up_vals = [12, 25, 50, 100, 250, 1000] # Enter in Mbps
down_vals = [12, 25, 50, 100, 250, 1000] # Enter in Mbps
rtt_vals = [20, 50, 100, 150, 200, 300, 400] # Enter in ms
pack_vals = [0, 5, 10, 15, 20, 30] # Enter as percentage

create_csv_specific(up_vals, down_vals, rtt_vals, pack_vals)

# Function to shuffle the rows in a CSV file

In [25]:
def shuffle_rows(filepath, new_filepath):
    
    # importing the CSV file rows
    rows = []
    with open(filepath, 'r') as file1:
        csvreader = csv.reader(file1)
        header = next(csvreader)
        for row in csvreader:
            rows.append(row)
    
    # shuffling the rows
    random.shuffle(rows)
    
    # re-numbering the treatment no. column
    for treatment in range(len(rows)):
        rows[treatment][0] = treatment+1

    # creating separate files (one for Ellen, one for Aadya)
    new_rows = []
    for row in rows:
        new_combo = [row[0], row[1], row[2], row[3]]
        aadya_combo = [row[0], row[4], row[5], row[6]]
        new_rows.append(new_combo)
    
    # writing out shuffled rows to new CSV files 
    with open(new_filepath, "w", newline='') as file2:
        writer = csv.writer(file2)
        writer.writerow(['treatment no.', 'upload (kbps)', 'download (kbps)', 'latency (ms)'])
        for row in new_rows:
            writer.writerow(row)
    
    return f"{len(rows)} rows successfully shuffled into new file: {new_filepath}" 

In [5]:
filepath = "subjective_combos_ellen.csv"
new_filepath = "subjective_combos_shuffled_ellen.csv"
shuffle_rows(filepath, new_filepath)

"6 rows successfully shuffled into new files: 'test_combos_shuffled_ellen.csv' and 'test_combos_shuffled_aadya.csv'"

# FIRST REAL RUN CODE

In [6]:
def create_csv_specific_onesided(up_vals, down_vals, rtt_vals, repeats=3):
    
    # converting Mbps -> Kbps
    for i in range(len(up_vals)):
        up_vals[i] = up_vals[i] * 1000 
        
    for j in range(len(down_vals)):
        down_vals[j] = down_vals[j] * 1000 
    
    # creating combinations
    combos = []
    initial_treatment = 1.0
    for i1 in up_vals:
        for j1 in down_vals:
            for k1 in rtt_vals:
                treatment = initial_treatment
                for i in range(repeats):
                    combo = [i1, j1, k1, round(treatment, 1)]
                    combos.append(combo)
                    treatment +=0.1
                initial_treatment += 1
                

    # writing out to a CSV file
    with open("test_combos_shuffled_aadya_test.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["upload (kbps)", "download (kbps)", "latency (ms)", "condition"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos_shuffled_aadya.csv' ----- "

In [9]:
up =  [20, 5, 1, 0.5]  
down = [25, 12, 5, 2]
rtt = [250, 400, 550, 750]

create_csv_specific_onesided(up, down, rtt, repeats = 3)

" ----- 192 treatments successfully created in file 'test_combos_shuffled_aadya.csv' ----- "

## Shuffle rows 
and add a treatment number sequence

In [10]:
df = pd.read_csv('test_combos_shuffled_aadya.csv')
df = df.sample(frac=1).reset_index(drop=True)
df.insert(0, 'Treatment no.', range(1, len(df) + 1))
df = df.dropna(axis=1, how='all')
df.columns = ['Treatment no.', 'Upload', 'Download', 'Latency', 'Condition']
df.to_csv("test_combos_shuffled_aadya.csv", index=False)

### Create a csv file with the same length as another one but they're all the same combination

In [52]:
combos_df = pd.read_csv('test_combos_shuffled_aadya.csv')
num_rows = len(combos_df)
treatment_numbers = list(range(1, num_rows + 1))
new_data = {
    'treatment no.': treatment_numbers, 
    'upload (kbps)': ['50000'] * num_rows, 
    'download (kbps)': ['100000']* num_rows,
    'latency (ms)': ['50']* num_rows,}  
new_df = pd.DataFrame(new_data)

new_df.to_csv('test_combos_shuffled_ellen.csv', index=False)

# STAGE 1.1
adding in a better download and rtt val

In [2]:
def create_csv_stage1_1(up_vals, down_vals, rtt_vals, repeats=3, file_name = "test_combos_shuffled_aadya_1.csv"):
    
    # converting Mbps -> Kbps
    for i in range(len(up_vals)):
        up_vals[i] = up_vals[i] * 1000 
        
    for j in range(len(down_vals)):
        down_vals[j] = down_vals[j] * 1000 
    
    # creating combinations
    combos = []
    initial_treatment = 1.0
    for i1 in up_vals:
        for j1 in down_vals:
            for k1 in rtt_vals:
                treatment = initial_treatment
                for i in range(repeats):
                    combo = [i1, j1, k1, round(treatment, 1)]
                    combos.append(combo)
                    treatment +=0.1
                initial_treatment += 1
                

    # writing out to a CSV file
    with open(file_name, "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["upload (kbps)", "download (kbps)", "latency (ms)", "condition"])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'test_combos_shuffled_aadya1.csv' ----- "

In [5]:
up =  [20, 5, 1, 0.5]  
down = [50]
rtt = [100, 250, 400, 550, 750]

file1 = create_csv_stage1_1(up, down, rtt, repeats = 3, file_name = "test_combos_shuffled_aadya_1.csv")

up =  [20, 5, 1, 0.5]  
down = [25, 12, 5, 2]
rtt = [100]

file2 = create_csv_stage1_1(up, down, rtt, repeats = 3, file_name = "test_combos_shuffled_aadya_12.csv")

df1 = pd.read_csv("test_combos_shuffled_aadya_1.csv")  # Replace with the path to your first CSV file
df2 = pd.read_csv("test_combos_shuffled_aadya_12.csv")  # Replace with the path to your second CSV file

combined_df = pd.concat([df1, df2], ignore_index=True)  # Stacks the files vertically, one below the other

combined_df.to_csv('combined_file.csv', index=False)  # Replace with your desired output file path

### shuffling the rows 

In [51]:
df = pd.read_csv('combined_file.csv')
df = df.sample(frac=1).reset_index(drop=True)
df.insert(0, 'Treatment no.', range(193, len(df) + 193))
df = df.dropna(axis=1, how='all')
df.columns = ['Treatment no.', 'Upload', 'Download', 'Latency', 'Condition']
df.to_csv("combined_test_combos_shuffled_aadya.csv", index=False)

# STAGE 2
### both varying but always the same

In [55]:
up =  [20, 5, 1, 0.5]  
down = [50, 25, 12, 5, 2]
rtt = [100, 250, 400, 550, 750]
create_csv_stage1_1(up, down, rtt, repeats = 3, file_name = "STAGE2_test_combos_aadya.csv")

In [56]:
df = pd.read_csv('STAGE2_test_combos_aadya.csv')
df = df.sample(frac=1).reset_index(drop=True)
df.insert(0, 'Treatment no.', range(1, len(df) + 1))
df = df.dropna(axis=1, how='all')
df.columns = ['Treatment no.', 'Upload', 'Download', 'Latency', 'Condition']
df.to_csv("STAGE2_test_combos_shuffled_aadya.csv", index=False)

# STAGE 3
### both varying and random sample from the combinatorial space

full factorial number of combos script

In [3]:
upload_speeds = [20, 5, 1, 0.5] 
download_speeds = [50, 25, 12, 5, 2]
rtt_values = [100, 250, 400, 550, 750]

# Enumerate all individual sets
party_a_combinations = list(product(upload_speeds, download_speeds, rtt_values))
party_b_combinations = list(product(upload_speeds, download_speeds, rtt_values))

# enumerate all combination sets
all_tests = list(product(party_a_combinations, party_b_combinations))

# Print the number of tests
print(f"Total tests: {len(all_tests)}")

Total tests: 10000


## Sampling for not full factorial

#### using stratified sampling

In [14]:
random.seed(42)
# Define function to categorize based on strata (upload, download, rtt ranges)
def categorize(upload_val, download_val, rtt_val):
    # Define categories (you can adjust ranges as needed)
    upload_category = 'High' if upload_val >= 5 else 'Low'
    download_category = 'High' if download_val >= 25 else 'Low'
    rtt_category = 'High' if rtt_val >= 400 else 'Low'
    return (upload_category, download_category, rtt_category)

# Group combinations into strata
strata = defaultdict(list)
for (a, b) in all_tests:
    a_category = categorize(*a)
    b_category = categorize(*b)
    strata[(a_category, b_category)].append((a, b))

# Sample proportionally from each stratum
sample_size = 8  # adjust sample size as desired
samples = []
for category, group in strata.items():
    # Calculate number of samples to take from this stratum
    proportion = sample_size / len(all_tests)
    num_samples = max(1, int(proportion * len(group)))  # ensure at least one sample from each stratum
    samples.extend(random.sample(group, min(num_samples, len(group))))

# Print the number of samples
print(f"Number of sampled tests: {len(samples)}")
print(f"Sampled tests: {samples[:5]}")  # print a few samples to check

Number of sampled tests: 64
Sampled tests: [((20, 50, 250), (5, 25, 100)), ((20, 50, 100), (20, 25, 400)), ((5, 25, 250), (5, 2, 100)), ((20, 25, 250), (5, 2, 550)), ((20, 25, 250), (0.5, 25, 250))]


In [4]:
# Put all combos in an array
combinations_array = np.array(party_a_combinations)


def stratified_sampling(combinations, num_samples):
    # initialise a disctionary that groups combinations by their levels (eg. all combinations with upload 20 can accessed from one place)
    strata = {
        'upload': {level: [] for level in set(combinations[:, 0])},
        'download': {level: [] for level in set(combinations[:, 1])},
        'rtt': {level: [] for level in set(combinations[:, 2])}
    }
    
    for comb in combinations:
        strata['upload'][comb[0]].append(tuple(comb))
        strata['download'][comb[1]].append(tuple(comb))
        strata['rtt'][comb[2]].append(tuple(comb))
    
    # Number of samples to select from each stratum
    
    samples_per_stratum = max(num_samples // (len(strata['upload']) * len(strata['download']) * len(strata['rtt'])), 1)
    
    selected_samples = set()
    
    # Randomly sample from each stratum
    for upload_level, upload_stratum in strata['upload'].items():
        for download_level, download_stratum in strata['download'].items():
            for rtt_level, rtt_stratum in strata['rtt'].items():
                # Find common elements in all three strata
                stratum_comb = [c for c in upload_stratum if c in download_stratum and c in rtt_stratum]
                if len(stratum_comb) > 0:
                    selected_samples.update(random.sample(stratum_comb, min(samples_per_stratum, len(stratum_comb))))
    
    # Ensure we have exactly `num_samples` samples
    if len(selected_samples) < num_samples:
        remaining_samples = list(set(map(tuple, combinations)) - selected_samples)
        additional_samples = random.sample(remaining_samples, num_samples - len(selected_samples))
        selected_samples.update(additional_samples)
    
    return list(selected_samples)[:num_samples]

In [6]:
num_samples = 20
representative_samples = stratified_sampling(combinations_array, num_samples)

# Display the representative samples
print("Representative Samples:")
for i, sample in enumerate(representative_samples, start=1):
    print(f"Sample {i}: {sample}")

Representative Samples:
Sample 1: (20.0, 50.0, 400.0)
Sample 2: (1.0, 2.0, 100.0)
Sample 3: (1.0, 50.0, 750.0)
Sample 4: (20.0, 2.0, 750.0)
Sample 5: (0.5, 12.0, 400.0)
Sample 6: (5.0, 2.0, 550.0)
Sample 7: (5.0, 5.0, 250.0)
Sample 8: (1.0, 5.0, 400.0)
Sample 9: (20.0, 12.0, 400.0)
Sample 10: (1.0, 12.0, 750.0)
Sample 11: (20.0, 50.0, 100.0)
Sample 12: (0.5, 25.0, 750.0)
Sample 13: (20.0, 25.0, 400.0)
Sample 14: (1.0, 25.0, 750.0)
Sample 15: (1.0, 2.0, 550.0)
Sample 16: (20.0, 12.0, 100.0)
Sample 17: (0.5, 5.0, 100.0)
Sample 18: (5.0, 50.0, 250.0)
Sample 19: (1.0, 50.0, 400.0)
Sample 20: (20.0, 2.0, 400.0)


# stage 3 specific combos

In [7]:
up_vals =  [20, 5, 1, 0.5]  
down_vals = [50, 25, 12, 5, 2]
rtt = [100, 250, 400, 550, 750]

for i in range(len(up_vals)):
    up_vals[i] = up_vals[i] * 1000 
        
for j in range(len(down_vals)):
    down_vals[j] = down_vals[j] * 1000 

print(up_vals, down_vals)

[20000, 5000, 1000, 500.0] [50000, 25000, 12000, 5000, 2000]


In [36]:
triples = [
    (20000, 25000, 250), #444
    (5000,12000,400), #333
    (1000,5000, 550), #222
    (500, 2000, 750), #111
    (20000, 25000, 400), #443
    (20000, 25000, 550), #442
    (20000, 25000, 750), #441
    (5000,12000,250), #334
    (5000,12000,550),#224
    (5000,12000,250), #114
    (500, 2000, 400), #113
    (500, 2000, 550), #112
    (1000,5000, 750), #221
    (5000,12000,750),#331
]

# Generate all combinations of two triples
combinations_of_triples = list(combinations(triples, 2))

with open("combinations_stage3.csv", "w", newline='') as file:
    writer = csv.writer(file)
    writer.writerow([
        "e - upload (kbps)", "e - download (kbps)", "e - latency (ms)", "condition no.",
        "a - upload (kbps)", "a - download (kbps)", "a - latency (ms)", "condition no."
    ])
    for index, combo in enumerate(combinations_of_triples, start=1):
        for repeat in range(3):
            condition_number = f"{index}.{repeat}"
            row = [
                combo[0][0], combo[0][1], combo[0][2], condition_number,
                combo[1][0], combo[1][1], combo[1][2], condition_number
            ]
            writer.writerow(row)

In [29]:
def shuffle_rows_stage3(filepath):
    
    # importing the CSV file rows
    rows = []
    with open(filepath, 'r') as file1:
        csvreader = csv.reader(file1)
        header = next(csvreader)
        for row in csvreader:
            rows.append(row)
    
    # shuffling the rows
    random.shuffle(rows)

    # creating separate files (one for Ellen, one for Aadya)
    ellens_rows = []
    aadyas_rows = []
    for row in rows:
        ellen_combo = [row[0], row[1], row[2], row[3]]
        aadya_combo = [row[4], row[5], row[6], row[7]]
        ellens_rows.append(ellen_combo)
        aadyas_rows.append(aadya_combo)
    
    # writing out shuffled rows to new CSV files 
    with open("STAGE3_test_combos_shuffled_ellen.csv", "w", newline='') as file2:
        writer = csv.writer(file2)
        writer.writerow([ 'upload (kbps)', 'download (kbps)', 'latency (ms)', 'condition no.'])
        for row in ellens_rows:
            writer.writerow(row)
    with open("STAGE3_test_combos_shuffled_aadya.csv", "w", newline='') as file3:
        writer = csv.writer(file3)
        writer.writerow(['upload (kbps)', 'download (kbps)', 'latency (ms)', 'condition no.'])
        for row in aadyas_rows:
            writer.writerow(row)

    df1 = pd.read_csv("STAGE3_test_combos_shuffled_ellen.csv")
    df2 = pd.read_csv("STAGE3_test_combos_shuffled_aadya.csv")

    # Add a column with sequential numbers starting from 1
    df1['treatment no.'] = range(1, len(df1) + 1)
    df1 = df1[['treatment no.'] + [col for col in df1.columns if col != 'treatment no.']]
    df2['treatment no.'] = range(1, len(df2) + 1)
    df2 = df2[['treatment no.'] + [col for col in df2.columns if col != 'treatment no.']]

    #Save the updated DataFrames back to CSV files
    df1.to_csv("STAGE3_test_combos_shuffled_ellen.csv", index=False)
    df2.to_csv("STAGE3_test_combos_shuffled_aadya.csv", index=False)

    print("Treatments added")
    
    return f"{len(rows)} rows successfully shuffled into new files: 'STAGE3_test_combos_shuffled_ellen.csv' and 'STAGE3_test_combos_shuffled_ellen.csv'" 

In [37]:
shuffle_rows_stage3('combinations_stage3.csv')

Treatments added


"273 rows successfully shuffled into new files: 'STAGE3_test_combos_shuffled_ellen.csv' and 'STAGE3_test_combos_shuffled_ellen.csv'"

# STAGE 4 full factorial

In [33]:
def STAGE4_create_csv_specific(up_vals, down_vals, rtt_vals, repeats = 3):
    
    # converting Mbps -> Kbps
    for i in range(len(up_vals)):
        up_vals[i] = up_vals[i] * 1000 
        
    for j in range(len(down_vals)):
        down_vals[j] = down_vals[j] * 1000 
    
    # creating combinations
    combos = []
    initial_treatment = 1.0
    for i1 in up_vals:
        for j1 in down_vals:
            for k1 in rtt_vals:
                for i2 in up_vals:
                    for j2 in down_vals:
                        for k2 in rtt_vals:
                            if not ([i1, j1, k1] == [i2, j2, k2]): #removing any where they are the same as that would have been done in stage 2
                                treatment = initial_treatment
                                for i in range(repeats):
                                    combo = [i1, j1, k1, i2, j2, k2, round(treatment,1)]
                                    combos.append(combo)
                                    treatment +=0.1
                                initial_treatment += 1
    
    # writing out to a CSV file
    with open("STAGE4_test_combos.csv", "w", newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["e - upload (kbps)", "e - download (kbps)", "e - latency (ms)", "a - upload (kbps)", "a - download (kbps)", "a - latency (ms)", "condition no."])
        for combo in combos:
            writer.writerow(combo)
            
    return f" ----- {len(combos)} treatments successfully created in file 'STAGE4_test_combos.csv' ----- "

In [34]:
def shuffle_rows_stage4(filepath):
    
    # importing the CSV file rows
    rows = []
    with open(filepath, 'r') as file1:
        csvreader = csv.reader(file1)
        header = next(csvreader)
        for row in csvreader:
            rows.append(row)
    
    # shuffling the rows
    random.shuffle(rows)

    # creating separate files (one for Ellen, one for Aadya)
    ellens_rows = []
    aadyas_rows = []
    for row in rows:
        ellen_combo = [row[0], row[1], row[2], row[6]]
        aadya_combo = [row[3], row[4], row[5], row[6]]
        ellens_rows.append(ellen_combo)
        aadyas_rows.append(aadya_combo)
    
    # writing out shuffled rows to new CSV files 
    with open("STAGE4_test_combos_shuffled_ellen.csv", "w", newline='') as file2:
        writer = csv.writer(file2)
        writer.writerow([ 'upload (kbps)', 'download (kbps)', 'latency (ms)', 'condition no.'])
        for row in ellens_rows:
            writer.writerow(row)
    with open("STAGE4_test_combos_shuffled_aadya.csv", "w", newline='') as file3:
        writer = csv.writer(file3)
        writer.writerow(['upload (kbps)', 'download (kbps)', 'latency (ms)', 'condition no.'])
        for row in aadyas_rows:
            writer.writerow(row)

    df1 = pd.read_csv("STAGE4_test_combos_shuffled_ellen.csv")
    df2 = pd.read_csv("STAGE4_test_combos_shuffled_aadya.csv")

    # Add a column with sequential numbers starting from 1
    df1['treatment no.'] = range(1, len(df1) + 1)
    df1 = df1[['treatment no.'] + [col for col in df1.columns if col != 'treatment no.']]
    df2['treatment no.'] = range(1, len(df2) + 1)
    df2 = df2[['treatment no.'] + [col for col in df2.columns if col != 'treatment no.']]

    #Save the updated DataFrames back to CSV files
    df1.to_csv("STAGE4_test_combos_shuffled_ellen.csv", index=False)
    df2.to_csv("STAGE4_test_combos_shuffled_aadya.csv", index=False)

    print("Treatments added")
    
    return f"{len(rows)} rows successfully shuffled into new files: 'STAGE4_test_combos_shuffled_ellen.csv' and 'STAGE4_test_combos_shuffled_ellen.csv'" 

In [35]:
up_vals = [20, 5, 1, 0.5] 
down_vals = [25, 12, 5, 2]
rtt_vals = [250, 400, 550, 750]

STAGE4_create_csv_specific(up_vals, down_vals, rtt_vals)
shuffle_rows_stage4('STAGE3_test_combos.csv')


Treatments added


"12096 rows successfully shuffled into new files: 'STAGE4_test_combos_shuffled_ellen.csv' and 'STAGE4_test_combos_shuffled_ellen.csv'"

Ok potentially the easiest way is going to be just picking a random list of numbers between 1 and 12096 and testing - not sure about stratified sampling etc

In [49]:
levels = {
    'e-up': [1, 2, 3, 4],
    'e-down': [1, 2, 3, 4], 
    'e-rtt': [1, 2, 3, 4],  
    'a-up': [1, 2, 3, 4],  
    'a-down': [1, 2, 3, 4],  
    'a-rtt': [1, 2, 3, 4]   
}

# Generate the full factorial design
full_design = list(product(*levels.values()))
print( f" ----- full factorial design of {len(full_design)} treatments' ----- ")

filtered_design = [comb for comb in full_design if not (comb[0] == comb[3] and comb[1] == comb[4] and comb[2] == comb[5])]
print( f" ----- filtered factorial design of {len(filtered_design)} treatments' ----- ")

# Take a random sample
np.random.seed(2) 
sampled_indices = np.random.choice(len(full_design), size=60, replace=False)
fractional_design = [full_design[i] for i in sampled_indices]

# Convert to DataFrame for better readability
df = pd.DataFrame(fractional_design, columns=levels.keys())
print( f" ----- random sample factorial design of {len(df)} treatments' ----- ")
print(df.head())


 ----- full factorial design of 4096 treatments' ----- 
 ----- filtered factorial design of 4032 treatments' ----- 
 ----- random sample factorial design of 60 treatments' ----- 
   e-up  e-down  e-rtt  a-up  a-down  a-rtt
0     4       1      3     3       2      1
1     2       4      2     2       1      2
2     2       1      2     3       2      2
3     1       1      4     2       1      1
4     1       3      3     4       1      2
