# Problema Admiterii la un Liceu de Elită (Elite High School Admission Problem)

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder, PolynomialFeatures
from sklearn.model_selection import cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

## Load Data

In [2]:
df_train = pd.read_csv("train_data.csv").drop("Unnamed: 0", axis=1)
df_test = pd.read_csv("test_data.csv").drop("Unnamed: 0", axis=1)
df_train

Unnamed: 0,id,gen,judet,NT,MEV,MATE,MGIM,status_admitere
0,407,F,BT,4.91,8.82,6.23,6.85,0
1,444,F,BC,9.50,7.31,8.97,8.36,1
2,117,F,BH,6.23,8.63,5.40,8.54,0
3,30,M,SM,1.35,9.49,9.06,5.59,0
4,415,M,OT,1.82,6.71,8.67,8.34,0
...,...,...,...,...,...,...,...,...
265,294,F,MS,1.23,9.90,8.93,5.06,0
266,433,F,MH,1.35,8.63,6.84,7.43,0
267,322,F,AG,2.63,7.75,6.77,8.35,0
268,233,F,CT,9.29,7.82,7.30,9.21,1


## Subtask 1

In [3]:
dif_NT_MEV = df_test["NT"] - df_test["MEV"]
dif_NT_MEV

0      3.28
1     -1.00
2     -0.69
3     -0.74
4     -4.89
       ... 
175   -1.33
176   -3.71
177   -3.20
178    0.18
179   -3.57
Length: 180, dtype: float64

In [4]:
subtask1_rows = []
for id_, val in zip(df_test["id"], dif_NT_MEV):
    subtask1_rows.append((1, id_, val))

## Subtask 2

In [5]:
test_ranked_ids = df_test.sort_values("MEV", ascending=False)["id"].to_list()
loc_MEV = df_test["id"].apply(lambda x: test_ranked_ids.index(x) + 1)
loc_MEV

0      168
1       31
2       88
3      122
4      150
      ... 
175    149
176     36
177    109
178    151
179    175
Name: id, Length: 180, dtype: int64

In [6]:
subtask2_rows = []
for id_, val in zip(df_test["id"], loc_MEV):
    subtask2_rows.append((2, id_, val))

## Subtask 3

In [7]:
X_train = df_train.copy()
X_train, y_train = X_train.drop(["id", "status_admitere"], axis=1), X_train["status_admitere"]
X_test = df_test.copy().drop("id", axis=1)

In [8]:
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(drop="first", handle_unknown="ignore"), ["gen", "judet"]),
    ("num", Pipeline([
        ('poly', PolynomialFeatures(degree=4)),
        ('scaler', StandardScaler())
    ]), ["NT", "MEV", "MATE", "MGIM"])
])

In [9]:
pipeline = Pipeline([
    ("pre", preprocessor),
    ("clf", LogisticRegression(random_state=42))
])

In [10]:
cross_val_score(pipeline, X_train, y_train, cv=5, scoring="accuracy")



array([0.98148148, 0.98148148, 1.        , 1.        , 1.        ])

In [11]:
pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_test)

In [12]:
subtask3_rows = []
for id_, val in zip(df_test["id"], preds):
    subtask3_rows.append((3, id_, val))

## Save answers

In [13]:
submission_rows = subtask1_rows + subtask2_rows + subtask3_rows
df_submission = pd.DataFrame(submission_rows, columns=["subtaskID", "datapointID", "answer"])
df_submission.to_csv("submission.csv", index=False)

## Submission results

Subtask 1:
- Accuracy: 1
- Score: 20/20

Subtask 2:
- Accuracy: 0.766666
- Score: 15/20 (There is a mistake in this subtask. See ["Communication" tab](https://judge.nitro-ai.org/roai-2025/simulare-ojia-3/communication/replies) for more details.)

Subtask 3:
- Accuracy: 0.972222
- Score: 58/60