### Line plots for accuracy of binary classification

In [None]:
import numpy as np    
import matplotlib.pyplot as plt    
  
# Model sizes in desired order    
model_sizes = ["8M", "35M", "150M", "350M", "650M"]    
  
# Data sizes    
data_sizes = ["Full", "Half", "Quarter"]    

colors = ["#0072B2", "#E69F00", "#009E73"]  # Color-blind friendly palette    
  
# Test Accuracy values and standard errors- borrowed from the output of running HD_vs_CoV.py and saved info in results folder    
data = {    
    "Full": {    
        "8M": (0.6762, 0.0034), "35M": (0.6877, 0.0026), "150M": (0.7, 0.0014), "350M": (0.71522, 0.00257), "650M": (0.7089, 0.0013)    
    },    
    "Half": {    
        "8M": (0.6732, 0.0035), "35M": (0.6869, 0.0028), "150M": (0.6974, 0.0036), "350M": (0.7042, 0.0023), "650M": (0.699, 0.00343)    
    },    
    "Quarter": {    
        "8M": (0.674, 0.0034), "35M": (0.682, 0.0023), "150M": (0.6884, 0.0042), "350M": (0.69445, 0.00208), "650M": (0.6909, 0.0029)    
    }    
}    
  
# Plot    
plt.figure(figsize=(12, 6))  # Increase figure size for better visibility  
for idx, (data_size, color) in enumerate(zip(data_sizes, colors)):    
    accuracies = [data[data_size][size][0] for size in model_sizes]    
    errors = [data[data_size][size][1] for size in model_sizes]    
    plt.errorbar(model_sizes, accuracies, yerr=errors, fmt='-o', capsize=4, label=f"{data_size}", color=color)    
  
plt.xlabel("Model Size (Parameters)")    
plt.ylabel("Accuracy")    
plt.legend(title="Data Amount")  # Add legend title  
plt.grid(True, linestyle='--', alpha=0.6)    
plt.tight_layout()  # Adjust layout to prevent clipping  
plt.savefig("accuracy_plot.png", dpi=300, bbox_inches='tight')  # Save with tight bounding box  
plt.show()    