In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
def load_and_prepare_data(filepath, has_target=True):
    """Load dataset; skip first row; auto select feature columns"""
    data = pd.read_excel(filepath)
    data = data.iloc[1:, :] # if the data doesn't have a header. Turn [1:,:] to [:,:]
    features = data.iloc[:, 1:9]  # [:,column which feature start:column which feature end]
    if has_target:
        target = data['Qualified']
    else:
        target = None
    return features, target, data

def loocv_train(features, target):
    """Perform Leave-One-Out Cross Validation on Logistic Regression,
        this will be useful when sample size is less than 300"""
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    loo = LeaveOneOut()
    preds, trues = [], []

    model = LogisticRegression()

    for train_index, test_index in loo.split(features_scaled):
        X_train, X_test = features_scaled[train_index], features_scaled[test_index]
        y_train, y_test = target.iloc[train_index], target.iloc[test_index]

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        preds.extend(y_pred)
        trues.extend(y_test)

    # Evaluation
    print("===== LOOCV Evaluation =====")
    print("Accuracy:", accuracy_score(trues, preds))
    print("Confusion Matrix:\n", confusion_matrix(trues, preds))
    print("Classification Report:\n", classification_report(trues, preds))

    # Final model trained on full data
    final_model = LogisticRegression()
    final_model.fit(features_scaled, target)
    return final_model, scaler


def apply_model_to_new_data(model, scaler, filepath):
    """Predict unknown dataset without Qualified column"""
    new_features, _, new_data = load_and_prepare_data(filepath, has_target=False)
    new_scaled = scaler.transform(new_features)
    predictions = model.predict(new_scaled)

    new_data['Qualified'] = predictions
    return new_data


In [None]:
training_file = "path"
unknown_file = "path"


features, target, _ = load_and_prepare_data(training_file, has_target=True)
model, scaler = loocv_train(features, target)


qualified_data = apply_model_to_new_data(model, scaler, unknown_file)

qualified_data.to_excel("predicted_qualified_output.xlsx", index=False)
print("End")