In [11]:
import numpy as np
import pandas as pd
from pyDOE2 import ff2n

factors_levels = { # Renamed to clarify it's about levels
    "Claim Complexity": ["Simple", "Medium", "Complex"],
    "Debate Topic": ["Politics", "Science", "General Knowledge"],
    "Prefetching Sources": [0, 5, 10],
    "Unique Sources": [2, 7, 10],
    "Compression Ratio": [0.25, 0.5, 0.75],
    "Token Ratio": [0.25, 0.75]
}

factor_names = list(factors_levels.keys())
n_factors = len(factor_names)
arr = ff2n(n_factors) # Generate 2-level full factorial design in 0/1 encoding
design_coded = pd.DataFrame(2 * arr - 1, columns=factor_names) # Convert to -1/+1 encoding, call it design_coded

print("Shape of design before fractionalization:", design_coded.shape)

# Define generators for a 1/4 fractional design (Resolution IV for 6 factors): I = ABC = DEF
# A=Claim Complexity, B=Debate Topic, C=Prefetching Sources, D=Unique Sources, E=Compression Ratio, F=Token Ratio
gen1 = design_coded["Claim Complexity"] * design_coded["Debate Topic"] * design_coded["Prefetching Sources"]
gen2 = design_coded["Unique Sources"] * design_coded["Compression Ratio"] * design_coded["Token Ratio"]


print("\nFirst few values of gen1:\n", gen1.head()) # DEBUG - should be -1 and 1 now
print("\nFirst few values of gen2:\n", gen2.head()) # DEBUG - should be -1 and 1 now

design_fractional = design_coded.loc[(gen1 == 1) & (gen2 == 1)].copy() # Create fractional design

print("\nShape of design after fractionalization:", design_fractional.shape)
print("\nDesign after fractionalization (first few rows):\n", design_fractional.head())


# Re-encode factors to desired levels *from the -1/+1 coded design*
design = design_fractional.copy() # Work on a copy to keep coded and decoded separate if needed

design['Claim Complexity'] = np.select(
    [design["Claim Complexity"] == -1, design["Claim Complexity"] == 1],
    [0, 2], default=1
)
design['Debate Topic'] = np.select(
    [design["Debate Topic"] == -1, design["Debate Topic"] == 1],
    [0, 2], default=1
)
design['Prefetching Sources'] = np.select(
    [design["Prefetching Sources"] == -1, design["Prefetching Sources"] == 1],
    [0, 2], default=1
)
design['Unique Sources'] = np.select(
    [design["Unique Sources"] == -1, design["Unique Sources"] == 1],
    [0, 2], default=1
)
design['Compression Ratio'] = np.select(
    [design["Compression Ratio"] == -1, design["Compression Ratio"] == 1],
    [0, 2], default=1
)
design['Token Ratio'] = np.select(
    [design["Token Ratio"] == -1, design["Token Ratio"] == 1],
    [0, 1]
)


decode = {
    "Claim Complexity": {0:"Simple", 1:"Medium", 2:"Complex"},
    "Debate Topic": {0:"Politics", 1:"Science", 2:"General Knowledge"},
    "Prefetching Sources": {0: 0, 1:5, 2:10},
    "Unique Sources": {0:2, 1:7, 2:10},
    "Compression Ratio": {0: 0.25, 1:0.5, 2:0.75},
    "Token Ratio": {0: 0.25, 1: 0.75}
}


design['Claim Complexity (Decoded)'] = ""
design['Debate Topic (Decoded)'] = ""
design['Prefetching Sources (Decoded)'] = ""
design['Unique Sources (Decoded)'] = ""
design['Compression Ratio (Decoded)'] = ""
design['Token Ratio (Decoded)'] = ""


for index in design.index:
    design.loc[index, 'Claim Complexity (Decoded)'] = decode['Claim Complexity'][design['Claim Complexity'][index]]
    design.loc[index, 'Debate Topic (Decoded)'] = decode['Debate Topic'][design['Debate Topic'][index]]
    design.loc[index, 'Prefetching Sources (Decoded)'] = decode['Prefetching Sources'][design['Prefetching Sources'][index]]
    design.loc[index, 'Unique Sources (Decoded)'] = decode['Unique Sources'][design['Unique Sources'][index]]
    design.loc[index, 'Compression Ratio (Decoded)'] = decode['Compression Ratio'][design['Compression Ratio'][index]]
    design.loc[index, 'Token Ratio (Decoded)'] = decode['Token Ratio'][design['Token Ratio'][index]]


design_output = design[[
    'Claim Complexity (Decoded)',
    'Debate Topic (Decoded)',
    'Prefetching Sources (Decoded)',
    'Unique Sources (Decoded)',
    'Compression Ratio (Decoded)',
    'Token Ratio (Decoded)'
    ]]

print("\nFinal design output:\n")
print(design_output)
design_output.to_csv('fractional_factorial_design.csv', index=False)

Shape of design before fractionalization: (64, 6)

First few values of gen1:
 0   -27.0
1     9.0
2     9.0
3    -3.0
4     9.0
dtype: float64

First few values of gen2:
 0   -27.0
1   -27.0
2   -27.0
3   -27.0
4   -27.0
dtype: float64

Shape of design after fractionalization: (1, 6)

Design after fractionalization (first few rows):
     Claim Complexity  Debate Topic  Prefetching Sources  Unique Sources  \
63               1.0           1.0                  1.0             1.0   

    Compression Ratio  Token Ratio  
63                1.0          1.0  

Final design output:

   Claim Complexity (Decoded) Debate Topic (Decoded)  \
63                    Complex      General Knowledge   

   Prefetching Sources (Decoded) Unique Sources (Decoded)  \
63                            10                       10   

   Compression Ratio (Decoded) Token Ratio (Decoded)  
63                        0.75                  0.75  
