In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [2]:
# Load Data
train = pd.read_csv('data/split/train_M.csv')
test = pd.read_csv('data/split/test_M.csv')

encoded_features = ['Level', 'climber', 'problem_category']
target = 'status'

X_train, X_test = train[encoded_features], test[encoded_features]
y_train, y_test = train[target], test[target]

In [3]:
# Encode Features
encoder = OneHotEncoder(drop='first', sparse_output=False)
X_train_encoded = encoder.fit_transform(X_train)
X_test_encoded = encoder.transform(X_test)

X_train_encoded_df = pd.DataFrame(X_train_encoded, columns=encoder.get_feature_names_out(encoded_features))
X_test_encoded_df = pd.DataFrame(X_test_encoded, columns=encoder.get_feature_names_out(encoded_features))

In [9]:
# Scale the data - so our intercept isn't over 1
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_encoded_df)
X_test_scaled = scaler.transform(X_test_encoded_df)

# Train Model
logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train_scaled, y_train)

# Apply model onto Test
y_pred = logreg.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.673859400430514


In [10]:
# Retrieve and display regression coefficients
coefficients = logreg.coef_[0]
intercept = logreg.intercept_[0]
feature_names = X_train_encoded_df.columns

coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
})
print("Intercept:", intercept)
print("Coefficients:\n", coef_df)

Intercept: 0.265633008758891
Coefficients:
                              Feature  Coefficient
0                            Level_Q    -0.120914
1                            Level_S    -0.083864
2      climber_Dmitrii Sharafutdinov     0.032827
3   climber_Guillaume Glairon Mondet    -0.001130
4                  climber_Jan Hojer    -0.005325
5              climber_Jernej Kruder    -0.017201
6          climber_Kilian Fischhuber     0.059846
7               climber_Kokoro Fujii     0.019130
8                      climber_Other    -0.381549
9            climber_Rustam Gelmanov     0.016797
10               climber_Sean Mccoll    -0.016615
11              climber_Tsukuru Hori    -0.012984
12             problem_category_Zone     0.658834


In [8]:
# Retrieve and display regression coefficients
coefficients = logreg.coef_[0]
intercept = logreg.intercept_[0]
feature_names = X_train_encoded_df.columns

coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
})
print("Intercept:", intercept)
print("Coefficients:\n", coef_df)

Intercept: 0.7704030060913453
Coefficients:
                              Feature  Coefficient
0                            Level_Q    -0.290688
1                            Level_S    -0.221188
2      climber_Dmitrii Sharafutdinov     0.380986
3   climber_Guillaume Glairon Mondet     0.075595
4                  climber_Jan Hojer     0.034953
5              climber_Jernej Kruder    -0.060959
6          climber_Kilian Fischhuber     0.625608
7               climber_Kokoro Fujii     0.268453
8                      climber_Other    -1.061499
9            climber_Rustam Gelmanov     0.223364
10               climber_Sean Mccoll    -0.066478
11              climber_Tsukuru Hori    -0.035481
12             problem_category_Zone     1.318027
