In [1]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages

# Folder containing JSON files
data_folder = "parsed"

with PdfPages('datasets_metrics.pdf') as pdf:
    for file in os.listdir(data_folder):
        if file.endswith(".json"):
            with open(os.path.join(data_folder, file), "r", encoding="utf-8") as f:
                data = json.load(f)
            
            df = pd.DataFrame(data)
            
            question_counts = df["questionNumber"].value_counts().sort_index()
            
            average_scores = df.groupby("questionNumber")["score"].mean()
            
            # Plot response count
            plt.figure(figsize=(10, 5))
            sns.barplot(x=question_counts.index, y=question_counts.values)
            plt.xlabel("Question Number")
            plt.ylabel("Number of Responses")
            plt.title(f"Number of Responses per Question - {file}")
            plt.xticks(rotation=45)
            pdf.savefig()  # Save the plot to the PDF
            plt.close()

            # Plot score distribution per question
            plt.figure(figsize=(12, 6))
            sns.boxplot(x=df["questionNumber"], y=df["score"])
            plt.xlabel("Question Number")
            plt.ylabel("Score")
            plt.title(f"Score Distribution per Question - {file}")
            plt.xticks(rotation=45)
            pdf.savefig()  # Save the plot to the PDF
            plt.close()

            # Plot average score per question
            plt.figure(figsize=(10, 5))
            sns.barplot(x=average_scores.index, y=average_scores.values)
            plt.xlabel("Question Number")
            plt.ylabel("Average Score")
            plt.title(f"Average Score per Question - {file}")
            plt.xticks(rotation=45)
            pdf.savefig()  # Save the plot to the PDF
            plt.close()
