In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load preprocessed data from CSV
preprocessed_data = pd.read_csv('data/preprocessed_data.csv')



In [None]:
# Function to analyze performance
def analyze_performance(data, test_id):

    # Calculate total correct answers
    total_correct_answers = data['is_correct'].sum()

    # Calculate total number of questions attempted
    total_questions_attempted = data[data['status'] == 'answered'].shape[0]

    # Calculate accuracy
    accuracy = total_correct_answers / total_questions_attempted if total_questions_attempted > 0 else 0

    # Visualization: Create a bar chart
    plt.figure(figsize=(10, 12))
    plt.bar(['Total Correct Answers', 'Total Questions Attempted', 'Accuracy'],
            [total_correct_answers, total_questions_attempted, accuracy])
    plt.title(f'Performance Analysis for Test ID: {test_id}')
    plt.xlabel('Metrics')
    plt.ylabel('Values')
    plt.xticks(rotation=45)
    plt.show()

    # Return performance metrics
    return {
        'test_id': test_id,
        'total_correct_answers': total_correct_answers,
        'total_questions_attempted': total_questions_attempted,
        'accuracy': accuracy
    }


In [None]:
def analyze_all_tests_performance():
    

    # Perform analysis for each test ID
    test_ids = preprocessed_data['test_id'].unique()
    for test_id in test_ids:
        test_data = preprocessed_data[preprocessed_data['test_id'] == test_id]
        performance_metrics = analyze_performance(test_data, test_id)
        print(f"Test ID: {test_id}")
        for metric, value in performance_metrics.items():
            print(f"{metric}: {value}")
        print("-" * 30)

# Call the main function
analyze_all_tests_performance()


In [None]:
# Function to plot user performance
def plot_user_performance(user_data):
    # Get user ID
    user_id = user_data['user_id'].iloc[0]
    
    # Group data by test ID and calculate total questions and correct answers
    grouped_data = user_data.groupby('test_id').agg(total_questions=('question_id', 'count'),
                                                    total_correct_answers=('is_correct', 'sum'))
    
    # Plot user performance
    plt.figure(figsize=(12, 8))
    plt.bar(grouped_data.index, grouped_data['total_questions'], color='lightblue', label='Total Questions')
    plt.bar(grouped_data.index, grouped_data['total_correct_answers'], color='green', label='Correct Answers')
    plt.xlabel('Test ID')
    plt.ylabel('Number of Questions')
    plt.title(f'User {user_id} Performance Across Tests')
    plt.xticks(rotation=45)
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
# Function to print user performance
def print_user_performance(data):
    # Group data by user ID
    grouped_data = data.groupby('user_id')
    
    # Iterate over each user's data
    for user_id, user_data in grouped_data:
        print(f"User ID: {user_id}")
        plot_user_performance(user_data)       
        
        
 # Print user performance
print_user_performance(preprocessed_data)       
        

In [None]:

import seaborn as sns

# Function to plot quality of time spent for a specific test
def plot_time_spent_quality(test_data):
    # Get test ID
    test_id = test_data['test_id'].iloc[0]

    # Calculate total time taken for the test
    total_time_taken = test_data['time_taken'].sum()

    # Calculate time taken for answered questions
    answered_time_taken = test_data[test_data['status'] == 'answered']['time_taken'].sum()

    # Calculate time for unanswered questions
    unanswered_time_taken = total_time_taken - answered_time_taken

    # Pie chart data
    labels = ['Quality Time (Answered)', 'Other Time (Unanswered)']
    sizes = [answered_time_taken, unanswered_time_taken]
    colors = sns.color_palette('pastel')  # Using Seaborn color palette
    explode = (0.1, 0)  # explode the 1st slice (Quality Time)

    # Plot pie chart
    plt.figure(figsize=(8, 6))
    plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%',
            shadow=True, startangle=140)
    plt.title(f'Time Spent Quality for Test ID: {test_id}')
    plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
    plt.show()

# Function to print quality of time spent for all tests
def print_time_spent_quality(data):
    # Group data by test ID
    grouped_data = data.groupby('test_id')

    # Iterate over each test's data
    for test_id, test_data in grouped_data:
        plot_time_spent_quality(test_data)

# Example usage
# Load preprocessed data from CSV
preprocessed_data = pd.read_csv('data/preprocessed_data.csv')

# Print quality of time spent for each test
print_time_spent_quality(preprocessed_data)
