In [1]:
from src.estimation import estimate_text_distribution
from src.MLE import MLE

In [2]:
# for each subject, estimate the distribution of human-written text and AI-generated text
for name in ["CS","EESS","Math","Phys","Stat"]:
    estimate_text_distribution(f"data/training_data/{name}/human_data.parquet",f"data/training_data/{name}/ai_data.parquet",f"distribution/{name}.parquet")

In [3]:
# for each subject, estimate the alpha value of mixed text and calculate the error
for name in ["CS","EESS","Math","Phys","Stat"]:
    # load the framework
    model=MLE(f"distribution/{name}.parquet")
    for alpha in [0,0.025,0.05,0.075,0.1,0.125,0.15,0.175,0.2,0.225,0.25]:
        estimated,ci=model.inference(f"data/validation_data/{name}/ground_truth_alpha_{alpha}.parquet")
        error=abs(estimated-alpha)
        print(f"{'Ground Truth':>10},{'Prediction':>10},{'CI':>10},{'Error':>10}")
        print(f"{alpha:10.3f},{estimated:10.3f},{ci:10.3f},{error:10.3f}")
    print("=====================================")

Ground Truth,Prediction,        CI,     Error
     0.000,     0.025,     0.003,     0.025
Ground Truth,Prediction,        CI,     Error
     0.025,     0.054,     0.003,     0.029
Ground Truth,Prediction,        CI,     Error
     0.050,     0.080,     0.004,     0.030
Ground Truth,Prediction,        CI,     Error
     0.075,     0.102,     0.004,     0.027
Ground Truth,Prediction,        CI,     Error
     0.100,     0.128,     0.005,     0.028
Ground Truth,Prediction,        CI,     Error
     0.125,     0.151,     0.005,     0.026
Ground Truth,Prediction,        CI,     Error
     0.150,     0.174,     0.005,     0.024
Ground Truth,Prediction,        CI,     Error
     0.175,     0.194,     0.005,     0.019
Ground Truth,Prediction,        CI,     Error
     0.200,     0.223,     0.005,     0.023
Ground Truth,Prediction,        CI,     Error
     0.225,     0.244,     0.006,     0.019
Ground Truth,Prediction,        CI,     Error
     0.250,     0.270,     0.006,     0.020
Ground Tru

In [3]:
# for each subject, estimate the alpha value of mixed text and calculate the error with only a specifc wordphrase vocabulary 

# Open the file in write mode with 
for part_of_speech in ['adj', 'verb', 'adv']:
    with open(f"results_{part_of_speech}.txt", "w") as file:
        print(f"Results for vocabulary with just {part_of_speech}\n\n")
        file.write(f"Results for vocabulary with just {part_of_speech}\n\n")
        for name in ["CS", "EESS", "Math", "Phys", "Stat"]:
        # Load the framework
            print(name + " Results:")
            file.write(f"{name} Results:\n")
            model = MLE(f"distribution/{name}.parquet", pos=part_of_speech)
            for alpha in [0, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25]:
                estimated, ci = model.inference(f"data/validation_data/{name}/ground_truth_alpha_{alpha}.parquet")
                error = abs(estimated - alpha)
                line_header = f"{'Ground Truth':>10},{'Prediction':>10},{'CI':>10},{'Error':>10}"
                line_data = f"{alpha:10.3f},{estimated:10.3f},{ci:10.3f},{error:10.3f}"
                print(line_header)
                print(line_data)
                file.write(line_header + "\n")
                file.write(line_data + "\n")
            print("=====================================")
            file.write("=====================================\n")

Results for vocabulary with just adj


CS Results:
Ground Truth,Prediction,        CI,     Error
     0.000,     0.002,     0.002,     0.002
Ground Truth,Prediction,        CI,     Error
     0.025,     0.034,     0.007,     0.009
Ground Truth,Prediction,        CI,     Error
     0.050,     0.053,     0.008,     0.003
Ground Truth,Prediction,        CI,     Error
     0.075,     0.079,     0.009,     0.004
Ground Truth,Prediction,        CI,     Error
     0.100,     0.113,     0.009,     0.013
Ground Truth,Prediction,        CI,     Error
     0.125,     0.136,     0.010,     0.011
Ground Truth,Prediction,        CI,     Error
     0.150,     0.171,     0.010,     0.021
Ground Truth,Prediction,        CI,     Error
     0.175,     0.181,     0.010,     0.006
Ground Truth,Prediction,        CI,     Error
     0.200,     0.214,     0.010,     0.014
Ground Truth,Prediction,        CI,     Error
     0.225,     0.241,     0.011,     0.016
Ground Truth,Prediction,        CI,     Error
   