In [None]:
import pandas as pd
import numpy as np

# Load the CSV file
data = pd.read_csv('./cleaned_merged_course_data.csv')

# Convert 'Eksamensgennemsnit' to numeric (replace comma with dot first)
data['Eksamensgennemsnit'] = data['Eksamensgennemsnit'].str.replace(',', '.').astype(float)

# Ensure the data is sorted by 'Course code', 'Year', and 'Season'
data = data.sort_values(by=['Course code', 'Year', 'Season']).reset_index(drop=True)

# Define a function to calculate WMA for each group
def calculate_wma(group):
    wma_values = []
    for i in range(len(group)):
        if i == 0:
            wma_values.append(group['Eksamensgennemsnit'].iloc[i])
        else:
            weights = group['Fremmødte'].iloc[:i+1]
            weighted_gpas = group['Eksamensgennemsnit'].iloc[:i+1] * weights
            wma = weighted_gpas.sum() / weights.sum()
            wma_values.append(wma)
    group['WMA'] = wma_values
    return group

# Apply the WMA calculation for each course code group
data = data.groupby('Course code').apply(calculate_wma)

# Save the data as CSV and Excel files
csv_file_path = 'cleaned_with_weight.csv'
excel_file_path = 'cleaned_with_weight.xlsx'

# Save as CSV
data.to_csv(csv_file_path, index=False)

# Save as Excel
data.to_excel(excel_file_path, index=False)

# Optionally, print out the head of the dataframe to verify
print(data.head())