In [21]:
import os

def main():
    try:
        # Get the input filename from the user
        filename = input("Enter a class file to grade (i.e. class1 for class1.txt): ")
        
        # Check if the file exists
        if os.path.exists(f"{filename}.txt"):
            print(f"Successfully opened {filename}.txt")
        else:
            print("File cannot be found.")
        
    except KeyboardInterrupt:
        print("\nOperation aborted by the user.")

if __name__ == "__main__":
    main()

Enter a class file to grade (i.e. class1 for class1.txt): class3
Successfully opened class3.txt


In [22]:
def is_valid_student_id(student_id):
    """
    Check if a student ID is valid.

    Args:
        student_id (str): The student ID to be checked.

    Returns:
        bool: True if the student ID is valid, False otherwise.
    """
    return len(student_id) == 9 and student_id[0] == 'N' and student_id[1:].isdigit()

def is_valid_answer(answer):
    """
    Check if an answer is valid.

    Args:
        answer (str): The answer to be checked.

    Returns:
        bool: True if the answer is valid, False otherwise.
    """
    return len(answer) == 1 and answer.isalpha()

def analyze_file(file_path):
    """
    Analyze a file containing student data and print a report.

    Args:
        file_path (str): The path to the input file.

    Returns:
        None
    """
    total_valid_lines = 0
    total_invalid_lines = 0

    try:
        with open(file_path, 'r') as file:
            print("Successfully opened", file_path)
            print("**** ANALYZING ****")
            for line_number, line in enumerate(file, start=1):
                line = line.strip()
                data = line.split(',')
                if len(data) == 26:
                    student_id = data[0]
                    if is_valid_student_id(student_id):
                        valid_answers = [answer for answer in data[1:] if is_valid_answer(answer)]
                        if len(valid_answers) == 25:
                            total_valid_lines += 1
                        else:
                            total_invalid_lines += 1
                            print("Invalid line of data:", line_number, "- N# is valid but answers are incomplete")
                    else:
                        total_invalid_lines += 1
                        print("Invalid line of data:", line_number, "- N# is invalid")
                else:
                    total_invalid_lines += 1
                    print("Invalid line of data:", line_number, "- does not contain exactly 26 values:", line)
            print("**** REPORT ****")
            print("Total valid lines of data:", total_valid_lines)
            print("Total invalid lines of data:", total_invalid_lines)
    except FileNotFoundError:
        print("File cannot be found.")

if __name__ == "__main__":
    filename = input("Enter a class to grade (i.e. class1 for class1.txt): ")
    file_path = filename + ".txt"
    analyze_file(file_path)

Enter a class to grade (i.e. class1 for class1.txt): class4
Successfully opened class4.txt
**** ANALYZING ****
Invalid line of data: 1 - N# is valid but answers are incomplete
Invalid line of data: 2 - N# is valid but answers are incomplete
Invalid line of data: 4 - N# is valid but answers are incomplete
Invalid line of data: 5 - N# is valid but answers are incomplete
Invalid line of data: 6 - N# is valid but answers are incomplete
Invalid line of data: 7 - N# is valid but answers are incomplete
Invalid line of data: 8 - N# is valid but answers are incomplete
Invalid line of data: 9 - N# is valid but answers are incomplete
Invalid line of data: 10 - N# is valid but answers are incomplete
Invalid line of data: 11 - N# is valid but answers are incomplete
Invalid line of data: 12 - N# is valid but answers are incomplete
Invalid line of data: 13 - N# is valid but answers are incomplete
Invalid line of data: 14 - N# is valid but answers are incomplete
Invalid line of data: 15 - N# is valid 

In [23]:
# Function to calculate the score for a student's answers
def calculate_score(answer_key, student_answers):
    """
    Calculate the score for a student's answers based on an answer key.

    Args:
        answer_key (list): A list of correct answers.
        student_answers (list): A list of student's answers.

    Returns:
        int: The calculated score for the student.
    """
    score = 0
    for key, answer in zip(answer_key, student_answers):
        if answer == key:
            score += 4
        elif answer == '':
            pass
        else:
            score -= 1
    return score

def main():
    answer_key = "B,A,D,D,C,B,D,A,C,C,D,B,A,B,A,C,B,D,A,C,A,A,B,D,D"
    
    # Input the class filename and construct the file path
    filename = input("Enter a class to grade (i.e. class1 for class1.txt): ")
    file_path = filename + ".txt"
    
    valid_scores = []
    question_skip_count = [0] * 25
    question_wrong_count = [0] * 25

    try:
        # Open and analyze the student data file
        with open(file_path, 'r') as file:
            print("Successfully opened", file_path)
            print("**** ANALYZING ****")
            for line in file:
                line = line.strip()
                data = line.split(',')
                if len(data) == 26:
                    student_id = data[0]
                    student_answers = data[1:]
                    if is_valid_student_id(student_id):
                        score = calculate_score(answer_key.split(','), student_answers)
                        valid_scores.append(score)
                        for index, answer in enumerate(student_answers):
                            if answer == '':
                                question_skip_count[index] += 1
                            elif answer != answer_key.split(',')[index]:
                                question_wrong_count[index] += 1

        # Calculate various statistics and print the report
        total_students = len(valid_scores)
        high_scores = sum(score > 80 for score in valid_scores)
        mean_score = round(sum(valid_scores) / total_students, 2)
        highest_score = max(valid_scores)
        lowest_score = min(valid_scores)
        score_range = highest_score - lowest_score
        sorted_scores = sorted(valid_scores)
        median_index = total_students // 2
        median_score = sorted_scores[median_index] if total_students % 2 == 1 else (sorted_scores[median_index - 1] + sorted_scores[median_index]) / 2

        print("**** REPORT ****")
        print("Total valid lines of data:", total_students)
        print("Total student of high scores:", high_scores)
        print("Mean (average) score:", mean_score)
        print("Highest score:", highest_score)
        print("Lowest score:", lowest_score)
        print("Range of scores:", score_range)
        print("Median score:", median_score)

        skip_info = [(index + 1, count, round(count / total_students, 2)) for index, count in enumerate(question_skip_count)]
        skip_info.sort(key=lambda x: x[1], reverse=True)
        print("Question that most people skip:", ', '.join(f"{q[0]} - {q[1]} - {q[2]}" for q in skip_info if q[1] > 0))

        wrong_info = [(index + 1, count, round(count / total_students, 2)) for index, count in enumerate(question_wrong_count)]
        wrong_info.sort(key=lambda x: x[1], reverse=True)
        print("Question that most people answer incorrectly:", ', '.join(f"{q[0]} - {q[1]} - {q[2]}" for q in wrong_info if q[1] > 0))

    except FileNotFoundError:
        print("File cannot be found.")

if __name__ == "__main__":
    main()

Enter a class to grade (i.e. class1 for class1.txt): class7
Successfully opened class7.txt
**** ANALYZING ****
**** REPORT ****
Total valid lines of data: 193
Total student of high scores: 59
Mean (average) score: 74.85
Highest score: 100
Lowest score: 47
Range of scores: 53
Median score: 75
Question that most people skip: 4 - 28 - 0.15, 1 - 27 - 0.14, 9 - 25 - 0.13, 11 - 24 - 0.12, 12 - 24 - 0.12, 18 - 23 - 0.12, 25 - 23 - 0.12, 13 - 22 - 0.11, 19 - 22 - 0.11, 2 - 21 - 0.11, 22 - 21 - 0.11, 5 - 20 - 0.1, 6 - 20 - 0.1, 8 - 20 - 0.1, 16 - 20 - 0.1, 20 - 20 - 0.1, 7 - 18 - 0.09, 14 - 18 - 0.09, 15 - 17 - 0.09, 17 - 17 - 0.09, 21 - 16 - 0.08, 23 - 16 - 0.08, 3 - 14 - 0.07, 10 - 13 - 0.07, 24 - 12 - 0.06
Question that most people answer incorrectly: 7 - 32 - 0.17, 4 - 28 - 0.15, 12 - 28 - 0.15, 17 - 28 - 0.15, 24 - 27 - 0.14, 10 - 26 - 0.13, 21 - 25 - 0.13, 2 - 24 - 0.12, 6 - 24 - 0.12, 9 - 24 - 0.12, 25 - 24 - 0.12, 5 - 23 - 0.12, 15 - 23 - 0.12, 13 - 22 - 0.11, 23 - 22 - 0.11, 3 - 21 - 0

In [17]:
pip install pandas numpy

Note: you may need to restart the kernel to use updated packages.


In [20]:
import pandas as pd
import numpy as np

# Function to calculate the score for a student's answers
def calculate_score(answer_key, student_answers):
    score = 0
    for i in range(len(answer_key)):
        if student_answers[i] == '':
            continue
        elif student_answers[i] == answer_key[i]:
            score += 4
        else:
            score -= 1
    return score

def main():
    answer_key = "B,A,D,D,C,B,D,A,C,C,D,B,A,B,A,C,B,D,A,C,A,A,B,D,D"
    
    # Get the input filename from the user
    filename = input("Enter a class to grade (i.e. class1 for class1.txt): ")
    file_path = filename + ".txt"
    
    try:
        # Read the student data from the input file using pandas
        df = pd.read_csv(file_path, header=None)
        df.columns = ['Student_ID'] + [f'Q{i+1}' for i in range(25)]

        # Initialize variables to store statistics
        valid_scores = {}
        skipped_questions = [0] * 25
        incorrect_questions = [0] * 25
        
        # Process each student's data
        for index, row in df.iterrows():
            student_id = row['Student_ID']
            student_answers = list(row[1:])
            score = calculate_score(answer_key.split(','), student_answers)
            valid_scores[student_id] = score
            for i, answer in enumerate(student_answers):
                if pd.isna(answer):
                    skipped_questions[i] += 1
                elif answer != answer_key[i]:
                    incorrect_questions[i] += 1
        
        valid_lines = len(df)
        
        # Calculate and display statistics
        high_scores = sum(score > 80 for score in valid_scores.values())
        print(f"Total student of high scores: {high_scores}")
        
        scores = list(valid_scores.values())
        mean_score = np.mean(scores)
        print(f"Mean (average) score: {mean_score:.2f}")
        
        highest_score = max(scores)
        print(f"Highest score: {highest_score}")
        
        lowest_score = min(scores)
        print(f"Lowest score: {lowest_score}")
        
        score_range = highest_score - lowest_score
        print(f"Range of scores: {score_range}")
        
        sorted_scores = np.sort(scores)
        mid = len(sorted_scores) // 2
        if len(sorted_scores) % 2 == 1:
            median_score = sorted_scores[mid]
        else:
            median_score = np.mean([sorted_scores[mid - 1], sorted_scores[mid]])
        print(f"Median score: {median_score}")
        
        max_skipped = max(skipped_questions)
        max_skipped_indices = [str(i + 1) for i, count in enumerate(skipped_questions) if count == max_skipped]
        skipped_percent = [count / valid_lines for count in skipped_questions]
        skipped_info = ', '.join([f"{index} - {max_skipped} - {skipped_percent[i]:.2f}" for i, index in enumerate(max_skipped_indices)])
        print(f"Question that most people skip: {skipped_info}")
        
        max_incorrect = max(incorrect_questions)
        max_incorrect_indices = [str(i + 1) for i, count in enumerate(incorrect_questions) if count == max_incorrect]
        incorrect_percent = [count / valid_lines for count in incorrect_questions]
        incorrect_info = ', '.join([f"{index} - {max_incorrect} - {incorrect_percent[i]:.2f}" for i, index in enumerate(max_incorrect_indices)])
        print(f"Question that most people answer incorrectly: {incorrect_info}")

        # Create a DataFrame for results and save it to a file
        result_df = pd.DataFrame(list(valid_scores.items()), columns=['Student_ID', 'Score'])
        result_df.to_csv(f"{filename}_grades.txt", index=False)
        print(f"Result file '{filename}_grades.txt' created successfully.")

    except FileNotFoundError:
        print("File cannot be found.")

if __name__ == "__main__":
    main()

Enter a class to grade (i.e. class1 for class1.txt): class1
Total student of high scores: 4
Mean (average) score: 73.00
Highest score: 90
Lowest score: 55
Range of scores: 35
Median score: 70.0
Question that most people skip: 3 - 4 - 0.15, 5 - 4 - 0.10, 23 - 4 - 0.20
Question that most people answer incorrectly: 8 - 20 - 0.15, 18 - 20 - 0.90
Result file 'class1_grades.txt' created successfully.
