In [106]:
import time
import json
import random
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
seed = 42
random.seed(seed)
np.random.seed(seed)

In [107]:
def load_excel_to_dataframe(file_path):
    """
    Reads an Excel file into a Pandas DataFrame and measures the loading time.
    
    Args:
        file_path (str): The path to the Excel file.

    Returns:
        pd.DataFrame: The loaded DataFrame.
        float: The loading time in seconds.
    """
    start_time = time.time()
    df = pd.read_excel(file_path)
    end_time = time.time()
    loading_time = end_time - start_time
    return df, loading_time

In [108]:
loaded_df, loading_time = load_excel_to_dataframe("20221012_karakterdata_til_studenterprojekt.xlsx")
loaded_df.head(5)

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,SKALA,ECTS,UDPROVNING,CENSUR,BEDOMMELSESDATO
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4,7-trinsskala,5.0,Skriftlig,ekstern censur,2018-12-19
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7,7-trinsskala,5.0,Afløsningsopgave,ekstern censur,2019-12-30
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7,7-trinsskala,5.0,Afløsningsopgave,intern censur,2019-06-07
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10,7-trinsskala,10.0,Skriftlig,ekstern censur,2020-05-27
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10,7-trinsskala,5.0,Afløsningsopgave,intern censur,2018-12-18


In [109]:
print(loading_time)

23.003116369247437


In [110]:
loaded_df.shape

(327248, 10)

In [111]:
loaded_df.dtypes

STUDIENR                   object
UDDANNELSE                 object
KURSKODE                   object
KURSTXT                    object
BEDOMMELSE                 object
SKALA                      object
ECTS                      float64
UDPROVNING                 object
CENSUR                     object
BEDOMMELSESDATO    datetime64[ns]
dtype: object

### Data Cleaning

In [112]:
loaded_df['BEDOMMELSE'] = pd.to_numeric(loaded_df['BEDOMMELSE'], errors='coerce')
loaded_df['ECTS'] = pd.to_numeric(loaded_df['ECTS'], errors='coerce')
loaded_df['KURSKODE'] = pd.to_numeric(loaded_df['KURSKODE'], errors='coerce').astype('Int64')
loaded_df['BEDOMMELSESDATO'] = pd.to_datetime(loaded_df['BEDOMMELSESDATO'], format='%Y-%m-%d')
loaded_df.dtypes

STUDIENR                   object
UDDANNELSE                 object
KURSKODE                    Int64
KURSTXT                    object
BEDOMMELSE                float64
SKALA                      object
ECTS                      float64
UDPROVNING                 object
CENSUR                     object
BEDOMMELSESDATO    datetime64[ns]
dtype: object

In [113]:
loaded_df.dropna(inplace=True)

In [114]:
loaded_df.shape

(250379, 10)

In [115]:
event_log = loaded_df[['STUDIENR','UDDANNELSE','KURSKODE','KURSTXT','BEDOMMELSE','ECTS', 'BEDOMMELSESDATO']]
event_log.head()

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18


In [116]:
# event_log.to_csv('students_data.csv', index=False)

In [119]:
event_df = event_log.copy()

In [120]:
event_df

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18
...,...,...,...,...,...,...,...
327241,STNR025846,"Medicin og teknologi, cand.polyt.",31598,31598 Engineering World Health Sommerskole i m...,4.0,5.0,2017-07-06
327244,STNR025852,"Technology Entrepreneurship, cand.tech.",42383,42383 Entrepreneurial Life Skills,4.0,5.0,2021-01-11
327245,STNR025852,"Technology Entrepreneurship, cand.tech.",42387,42387 Digitale trends for entreprenører,10.0,5.0,2020-12-14
327246,STNR025852,"Technology Entrepreneurship, cand.tech.",42389,42389 Feasibility studies,10.0,10.0,2021-01-22


In [121]:
event_df["HAS_PREREQUISITE"] = 0
event_df["TOTAL_PREREQUISITE_BEDOMMELSE"] = 0
event_df['KURSKODE'] = event_df['KURSKODE'].astype(str)

In [122]:
prerequisite_courses = {
    "2269":["2105","2158","2141"], # Processing minging prerequisite coureses:
    "2291":["2141","2161","2170"], # System integration prerequisite coureses:
    "2502":["1001","1002","1003","1004","1005","1006","1015","1016","2101","2102","2402","2403","2405","2002","2631","2632","2633","2692"], # Image analysis prerequisite coureses
    "2234":["2232","2233"], # Current Topics in System Security prerequisite coureses:
    "2285":["1017","2101","2105","2180","2156"], # Artificial Intelligence and Multi-Agent Systems prerequisite coureses:
    "2806":["2822","2467"], # Social data analysis and visualization  prerequisite coureses:
    "2582":["2409","2450","27411"], # Computational Data Analysis prerequisite coureses:
    "2516": ["02456"], # Introduction to Deep Learning in Computer Vision prerequisite coureses:
    "34365": ["34315","34338","34346"], # IoT Prototyping prerequisite coureses
    "2110":["2105","2326"] # Algorithms and Data Structures 2 prerequisite coureses
}

In [123]:
# Iterate through the rows and update "TOTAL_PREREQUISITE" based on prerequisites
for index, row in event_df.iterrows():
    kurskode = row["KURSKODE"]
    if kurskode in prerequisite_courses:
        prerequisites = prerequisite_courses[kurskode]
        pr = event_df[(event_df["STUDIENR"] == row["STUDIENR"]) & (event_df["KURSKODE"].isin(prerequisites))]
        event_df.at[index, "HAS_PREREQUISITE"] = len(pr)
        event_df.at[index, "TOTAL_PREREQUISITE_BEDOMMELSE"] = pr['BEDOMMELSE'].sum()

In [124]:
event_df

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19,0,0
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30,0,0
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07,0,0
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27,0,0
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18,0,0
...,...,...,...,...,...,...,...,...,...
327241,STNR025846,"Medicin og teknologi, cand.polyt.",31598,31598 Engineering World Health Sommerskole i m...,4.0,5.0,2017-07-06,0,0
327244,STNR025852,"Technology Entrepreneurship, cand.tech.",42383,42383 Entrepreneurial Life Skills,4.0,5.0,2021-01-11,0,0
327245,STNR025852,"Technology Entrepreneurship, cand.tech.",42387,42387 Digitale trends for entreprenører,10.0,5.0,2020-12-14,0,0
327246,STNR025852,"Technology Entrepreneurship, cand.tech.",42389,42389 Feasibility studies,10.0,10.0,2021-01-22,0,0


In [125]:
event_df.dtypes

STUDIENR                                 object
UDDANNELSE                               object
KURSKODE                                 object
KURSTXT                                  object
BEDOMMELSE                              float64
ECTS                                    float64
BEDOMMELSESDATO                  datetime64[ns]
HAS_PREREQUISITE                          int64
TOTAL_PREREQUISITE_BEDOMMELSE             int64
dtype: object

### Build Model

In [126]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [127]:
event_df['Year'] = event_df['BEDOMMELSESDATO'].dt.year
event_df['Month'] = event_df['BEDOMMELSESDATO'].dt.month
# event_df = event_df.drop('BEDOMMELSESDATO', axis=1)
event_df

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19,0,0,2018,12
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30,0,0,2019,12
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07,0,0,2019,6
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27,0,0,2020,5
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18,0,0,2018,12
...,...,...,...,...,...,...,...,...,...,...,...
327241,STNR025846,"Medicin og teknologi, cand.polyt.",31598,31598 Engineering World Health Sommerskole i m...,4.0,5.0,2017-07-06,0,0,2017,7
327244,STNR025852,"Technology Entrepreneurship, cand.tech.",42383,42383 Entrepreneurial Life Skills,4.0,5.0,2021-01-11,0,0,2021,1
327245,STNR025852,"Technology Entrepreneurship, cand.tech.",42387,42387 Digitale trends for entreprenører,10.0,5.0,2020-12-14,0,0,2020,12
327246,STNR025852,"Technology Entrepreneurship, cand.tech.",42389,42389 Feasibility studies,10.0,10.0,2021-01-22,0,0,2021,1


In [128]:
# total_bedommelse = event_df[(event_df['STUDIENR'] == "STNR025852")]['ECTS'].sum()
# total_bedommelse

In [129]:
# total_ects_per_student = event_df.groupby('STUDIENR')['ECTS'].sum()
# total_ects_per_student

In [130]:
# event_df.to_csv('students_data_pre.csv', index=False)

In [131]:
# Select features and target variable
X = event_df[['KURSKODE', 'ECTS', 'HAS_PREREQUISITE', 'TOTAL_PREREQUISITE_BEDOMMELSE']]
y = event_df['BEDOMMELSE']

In [132]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [133]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

***Linear Regression***

In [134]:
linear_model = LinearRegression()
linear_model.fit(X_train_scaled, y_train)
linear_predictions = linear_model.predict(X_test_scaled)

***Decision Tree Regression***

In [135]:
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train_scaled, y_train)
tree_predictions = tree_model.predict(X_test_scaled)

***Random Forest Regression***

In [136]:
forest_model = RandomForestRegressor()
forest_model.fit(X_train_scaled, y_train)
forest_predictions = forest_model.predict(X_test_scaled)

***Gradient Boosting Regressor***

In [137]:
gboostin_model = GradientBoostingRegressor()
gboostin_model.fit(X_train_scaled, y_train)
gboostin_predictions = gboostin_model.predict(X_test_scaled)

In [138]:
# Evaluate models
def evaluate_model(predictions, y_true):
    mae = mean_absolute_error(y_true, predictions)
    mse = mean_squared_error(y_true, predictions)
    r2 = r2_score(y_true, predictions)
    return mae, mse, r2

linear_metrics = evaluate_model(linear_predictions, y_test)
tree_metrics = evaluate_model(tree_predictions, y_test)
forest_metrics = evaluate_model(forest_predictions, y_test)
gboostin_metrics = evaluate_model(gboostin_predictions, y_test)

print("Linear Regression Metrics:", linear_metrics)
print("Decision Tree Regression Metrics:", tree_metrics)
print("Random Forest Regression Metrics:", forest_metrics)
print("Gradient Boosting Regressor Metrics:", gboostin_metrics)

Linear Regression Metrics: (3.086472218364357, 14.554520970388051, 0.007412629034405205)
Decision Tree Regression Metrics: (2.7414605182823752, 11.465584801939974, 0.2180715051841895)
Random Forest Regression Metrics: (2.7411096932430605, 11.45837853962882, 0.21856295694522443)
Gradient Boosting Regressor Metrics: (2.974815148077186, 13.037268318016032, 0.1108860325474832)


In [139]:
def predict_academic_performance(user_input, model):
    user_df = pd.DataFrame(user_input, index=[0])
    X_user = user_df[['KURSKODE', 'ECTS', 'HAS_PREREQUISITE', 'TOTAL_PREREQUISITE_BEDOMMELSE']]
    prediction = model.predict(X_user)
    return prediction[0]

In [140]:
# Example usage
user_input = {
    'KURSKODE': [12345],
    'ECTS': [5.0],
    'HAS_PREREQUISITE': [1],
    'TOTAL_PREREQUISITE_BEDOMMELSE': [10]
}

In [141]:
l_predicted_grade = predict_academic_performance(user_input, linear_model)
dt_predicted_grade = predict_academic_performance(user_input, tree_model)
rf_predicted_grade = predict_academic_performance(user_input, forest_model)
gb_predicted_grade = predict_academic_performance(user_input, gboostin_model)

In [142]:
print(f'Linear Regression Predicted Grade: {l_predicted_grade}')
print(f'Decision Tree Regression Predicted Grade: {dt_predicted_grade}')
print(f'Random Forest Regression Predicted Grade: {rf_predicted_grade}')
print(f'Gradient Boosting Regressor Predicted Grade: {gb_predicted_grade}')

Linear Regression Predicted Grade: 2880.580330674632
Decision Tree Regression Predicted Grade: 8.358288770053475
Random Forest Regression Predicted Grade: 8.356773419414036
Gradient Boosting Regressor Predicted Grade: 8.653782128998198


***Overall Assessment:***

1. The decision tree, random forest, and gradient boosting models outperform linear regression, capturing more complex relationships in the data.
2. Both random forest and gradient boosting have similar performance, with gradient boosting having a slight edge in terms of R2.
3. Model selection may depend on other factors such as interpretability, computational efficiency, and ease of tuning hyperparameters.

In [205]:
new_df = event_df.copy()
new_df.head(5)

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19,0,0,2018,12
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30,0,0,2019,12
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07,0,0,2019,6
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27,0,0,2020,5
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18,0,0,2018,12


In [206]:
# Group by 'STUDIENR' and sum the 'ECTS_TOTAL' for each student
sum_ects_by_student = new_df.groupby('STUDIENR')['ECTS'].sum().reset_index()
sum_ects_by_student = sum_ects_by_student.rename(columns={'ECTS': 'TOTAL_ECTS'})
# sum_ects_by_student = sum_ects_by_student.sort_values(by='TOTAL_ECTS', ascending=False)
sum_ects_by_student

Unnamed: 0,STUDIENR,TOTAL_ECTS
0,STNR000001,90.0
1,STNR000003,165.0
2,STNR000004,145.0
3,STNR000005,120.0
4,STNR000006,165.0
...,...,...
20038,STNR025830,5.0
20039,STNR025831,10.0
20040,STNR025845,5.0
20041,STNR025846,5.0


In [207]:
filter_df = pd.DataFrame(sum_ects_by_student)

In [208]:
new_merged_df = new_df.merge(filter_df, how='left', left_on='STUDIENR', right_on='STUDIENR')
new_merged_df.head(5)

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month,TOTAL_ECTS
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19,0,0,2018,12,90.0
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30,0,0,2019,12,90.0
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07,0,0,2019,6,90.0
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27,0,0,2020,5,90.0
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18,0,0,2018,12,90.0


In [209]:
filtered_total_ect_data = new_merged_df[new_merged_df['TOTAL_ECTS'] >= 120]

In [210]:
filtered_total_ect_data

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month,TOTAL_ECTS
16,STNR000003,"Bioteknologi, ing.bach.",1005,01005 Matematik 1,7.0,20.0,2018-09-14,0,0,2018,9,165.0
17,STNR000003,"Bioteknologi, ing.bach.",2402,02402 Introduktion til statistik,2.0,5.0,2019-05-29,0,0,2019,5,165.0
18,STNR000003,"Bioteknologi, ing.bach.",2633,02633 Introduktion til programmering og databe...,4.0,5.0,2018-07-10,0,0,2018,7,165.0
19,STNR000003,"Bioteknologi, ing.bach.",10050,10050 Modelfysik,7.0,10.0,2019-06-21,0,0,2019,6,165.0
20,STNR000003,"Bioteknologi, ing.bach.",22111,22111 Introduktion til bioinformatik,7.0,5.0,2020-06-25,0,0,2020,6,165.0
...,...,...,...,...,...,...,...,...,...,...,...,...
250304,STNR025731,"Byggeri og infrastruktur, ing.prof.bach.",62277,62277 Vand og klimatilpasning,2.0,5.0,2020-05-20,0,0,2020,5,145.0
250305,STNR025731,"Byggeri og infrastruktur, ing.prof.bach.",62279,62279 Bygnings- og infrastrukturprojekt 2,12.0,10.0,2021-01-22,0,0,2021,1,145.0
250306,STNR025731,"Byggeri og infrastruktur, ing.prof.bach.",62318,62318 Byggeri,4.0,20.0,2019-01-25,0,0,2019,1,145.0
250307,STNR025731,"Byggeri og infrastruktur, ing.prof.bach.",62327,62327 Bygningsprojekteringsprojekt,2.0,5.0,2019-06-27,0,0,2019,6,145.0


In [211]:
total_students_by_education = filtered_total_ect_data.groupby('UDDANNELSE')['STUDIENR'].nunique().reset_index()
# Display the total number of students for each education
total_students_by_education = total_students_by_education.rename(columns={'STUDIENR': 'TOTAL_STUDIENR'})
total_students_by_education = total_students_by_education.sort_values(by='TOTAL_STUDIENR', ascending=False)
total_students_by_education

Unnamed: 0,UDDANNELSE,TOTAL_STUDIENR
10,"Byggeri og infrastruktur, ing.prof.bach.",445
48,"Maskinteknik, ing.prof.bach.",290
63,"Softwareteknologi, ingeniør bach.",230
49,"Matematik og teknologi, ingeniør bach.",222
9,"Bygge og anlæg, ingeniør bach.",218
...,...,...
42,"Kommunikationsteknologier og systemdesign, can...",10
1,"Akvatisk videnskab og teknologi, cand.polyt.",8
58,"Olie- og gasteknologi, cand.polyt.",6
65,"Technology Entrepreneurship, cand.tech.",1


In [212]:
department_filter_df = pd.DataFrame(total_students_by_education)

In [214]:
new_merged_depart_df = new_merged_df.merge(department_filter_df, how='left', left_on='UDDANNELSE', right_on='UDDANNELSE')
new_merged_depart_df.head(5)

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month,TOTAL_ECTS,TOTAL_STUDIENR
0,STNR000001,"Materiale- og procesteknologi, cand.polyt.",2402,02402 Introduktion til statistik,4.0,5.0,2018-12-19,0,0,2018,12,90.0,17.0
1,STNR000001,"Materiale- og procesteknologi, cand.polyt.",28213,28213 Polymerteknologi,7.0,5.0,2019-12-30,0,0,2019,12,90.0,17.0
2,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41632,41632 Robust design af produkter og mekanismer,7.0,5.0,2019-06-07,0,0,2019,6,90.0,17.0
3,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41656,41656 Materialer i avancerede anvendelser og p...,10.0,10.0,2020-05-27,0,0,2020,5,90.0,17.0
4,STNR000001,"Materiale- og procesteknologi, cand.polyt.",41661,41661 Metallære,10.0,5.0,2018-12-18,0,0,2018,12,90.0,17.0


In [215]:
filtered_new_department_merged_df = new_merged_depart_df[new_merged_depart_df['TOTAL_STUDIENR'] >= 200]
filtered_new_department_merged_df

Unnamed: 0,STUDIENR,UDDANNELSE,KURSKODE,KURSTXT,BEDOMMELSE,ECTS,BEDOMMELSESDATO,HAS_PREREQUISITE,TOTAL_PREREQUISITE_BEDOMMELSE,Year,Month,TOTAL_ECTS,TOTAL_STUDIENR
83,STNR000006,"Produktion og konstruktion, ingeniør bach.",1035,01035 Matematik 2,4.0,5.0,2018-06-01,0,0,2018,6,165.0,209.0
84,STNR000006,"Produktion og konstruktion, ingeniør bach.",2402,02402 Introduktion til statistik,7.0,5.0,2019-05-29,0,0,2019,5,165.0,209.0
85,STNR000006,"Produktion og konstruktion, ingeniør bach.",2601,02601 Introduktion til numeriske algoritmer,7.0,5.0,2019-06-18,0,0,2019,6,165.0,209.0
87,STNR000006,"Produktion og konstruktion, ingeniør bach.",2633,02633 Introduktion til programmering og databe...,10.0,5.0,2017-11-14,0,0,2017,11,165.0,209.0
88,STNR000006,"Produktion og konstruktion, ingeniør bach.",31300,31300 Reguleringsteknik 1,7.0,5.0,2019-06-21,0,0,2019,6,165.0,209.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
250335,STNR025745,"Byggeri og infrastruktur, ing.prof.bach.",1920,01920 Basismat 2 - Videregående matematik for ...,4.0,5.0,2021-06-24,0,0,2021,6,55.0,445.0
250336,STNR025745,"Byggeri og infrastruktur, ing.prof.bach.",11767,11767 Statistik i eksperimentel materialelære,4.0,5.0,2021-05-27,0,0,2021,5,55.0,445.0
250337,STNR025745,"Byggeri og infrastruktur, ing.prof.bach.",62318,62318 Byggeri,4.0,20.0,2021-01-20,0,0,2021,1,55.0,445.0
250338,STNR025745,"Byggeri og infrastruktur, ing.prof.bach.",62328,62328 Anlæg og Infrastruktur,7.0,20.0,2021-06-01,0,0,2021,6,55.0,445.0


In [217]:
# filtered_new_department_merged_df.to_csv('filtered_students_data.csv', index=False)