In [2]:
import pandas as pd

# Read the data from the file
file_path = 'Raw_Data.xlsx'
data = pd.read_excel(file_path, header=None, names=['Course'])

# Function to determine the level
def determine_level(course):
    if 'HL' in course:
        return 'High Level'
    elif 'SL' in course:
        return 'Standard Level'
    elif 'EE' in course or 'TK' in course:
        return 'Core'
    else:
        return 'Unknown'

# Create the new column
data['Level'] = data['Course'].apply(determine_level)

# Reorder the columns
data = data[['Level', 'Course']]

# Save the modified data to a new file
output_file_path = 'processed_data.csv'
data.to_csv(output_file_path, index=False)

print("Processed data has been saved to", output_file_path)


Processed data has been saved to processed_data.csv


In [4]:
import pandas as pd

# Read the existing data from the file
file_path = 'processed_data.xlsx'
data = pd.read_excel(file_path)

# Function to determine the new classification
def classify_level(level):
    if level in ['High Level', 'Standard Level']:
        return 'Regular'
    elif level == 'Core':
        return 'Core'
    elif level == 'Total Points':
        return 'Total Points'
    else:
        return 'Unknown'

# Create the new column
data['Classification'] = data['Level'].apply(classify_level)

# Reorder the columns to make 'Classification' the third column
cols = list(data.columns)
cols.insert(2, cols.pop(cols.index('Classification')))
data = data[cols]

# Save the modified data to a new file
output_file_path = 'classified_data.csv'
data.to_csv(output_file_path, index=False)

print("Classified data has been saved to", output_file_path)


Classified data has been saved to classified_data.csv


In [26]:
import pandas as pd
import re

# Load data from Excel file
data = pd.read_excel('stud_data.xlsx', header=None)

# Initialize an empty list to store the formatted data
formatted_data = []

# Function to determine the level and core status
def get_level_and_core(subject):
    if "HL" in subject:
        level = "Higher Level"
    elif "SL" in subject:
        level = "Standard Level"
    else:
        level = "N/A"

    if "EE" in subject or "THEORY KNOWL. TK" in subject:
        core = "Core"
    else:
        core = "Regular"

    return level, core

# Function to extract subject and grade
def extract_subject_and_grade(subject):
    match = re.match(r"(.*) (\d+|[A-D])$", subject)
    if match:
        return match.groups()
    return subject, "N/A"

# Split data into blocks by empty rows
blocks = []
block = []

for _, row in data.iterrows():
    if row.isnull().all():
        if block:
            blocks.append(block)
            block = []
    else:
        block.append(row)
if block:
    blocks.append(block)

# Process each block
for block in blocks:
    candidate_info = {}
    subjects_grades = []

    for item in block:
        key, value = item[0], item[1]
        if key == 'Candidate':
            personal_code = value.split()[-1].replace("(", "").replace(")", "").strip()
            candidate_info['Personal code'] = personal_code
        elif key == 'Name':
            candidate_info['Name'] = value
            candidate_info['Gender'] = "F" if "female" in value.lower() else "M"  # Placeholder, adjust as needed
        elif key == 'Category':
            candidate_info['Category'] = value
        elif key == 'Birth Date':
            candidate_info['Birth Date'] = value
        elif key == 'Result':
            candidate_info['Result'] = value
        elif key == 'EE/TOK points':
            candidate_info['EE/TOK points'] = value
        elif key == 'Total Points':
            candidate_info['Total Points'] = value
        elif 'MAY' in key:
            subjects_grades.append((key, value))

    for subject, grade in subjects_grades:
        subject, grade = extract_subject_and_grade(subject)
        level, core = get_level_and_core(subject)
        formatted_data.append({
            "Personal code": candidate_info.get('Personal code', 'N/A'),
            "Name": candidate_info.get('Name', 'N/A'),
            "Gender": candidate_info.get('Gender', 'N/A'),
            "Category": candidate_info.get('Category', 'N/A'),
            "Subjects": subject,
            "TOTAL POINTS/Core/HL/SL": level,
            "TOTAL POINTS/Core/Regular/Additional Subject": core,
            "Grade": grade,
            "Result": candidate_info.get('Result', 'N/A')
        })

    # Adding EE/TOK points and Total Points if they exist
    if 'EE/TOK points' in candidate_info:
        formatted_data.append({
            "Personal code": candidate_info.get('Personal code', 'N/A'),
            "Name": candidate_info.get('Name', 'N/A'),
            "Gender": candidate_info.get('Gender', 'N/A'),
            "Category": candidate_info.get('Category', 'N/A'),
            "Subjects": "EE/TOK points",
            "TOTAL POINTS/Core/HL/SL": "Core",
            "TOTAL POINTS/Core/Regular/Additional Subject": "Core",
            "Grade": candidate_info['EE/TOK points'],
            "Result": candidate_info.get('Result', 'N/A')
        })
    if 'Total Points' in candidate_info:
        formatted_data.append({
            "Personal code": candidate_info.get('Personal code', 'N/A'),
            "Name": candidate_info.get('Name', 'N/A'),
            "Gender": candidate_info.get('Gender', 'N/A'),
            "Category": candidate_info.get('Category', 'N/A'),
            "Subjects": "Total Points",
            "TOTAL POINTS/Core/HL/SL": "Total Points",
            "TOTAL POINTS/Core/Regular/Additional Subject": "Total Points",
            "Grade": candidate_info['Total Points'],
            "Result": candidate_info.get('Result', 'N/A')
        })

# Convert formatted data to a DataFrame
df_formatted = pd.DataFrame(formatted_data)

# Save DataFrame to CSV
df_formatted.to_csv('formatted_output.csv', index=False)

print("Formatted data saved to formatted_output.csv")


Formatted data saved to formatted_output.csv
