In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = "student_admission_record_dirty.xlsx"
data = pd.read_excel(file_path)

# Drop irrelevant column 'Name' and rows with missing target values
data_cleaned = data.drop(columns=["Name"]).dropna(subset=["Admission Status"])

# Fill missing numeric values with their respective column mean
numeric_cols = ["Age", "Admission Test Score", "High School Percentage"]
data_cleaned[numeric_cols] = data_cleaned[numeric_cols].fillna(data_cleaned[numeric_cols].mean())

# Encode categorical variables
label_encoders = {}
for col in ["Gender", "City", "Admission Status"]:
    le = LabelEncoder()
    data_cleaned[col] = le.fit_transform(data_cleaned[col])
    label_encoders[col] = le

# Separate features and target variable
X = data_cleaned.drop(columns=["Admission Status"])
y = data_cleaned["Admission Status"]

# Standardize the numerical features for SVM
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train an SVM classifier
svm_model = SVC(kernel="linear", random_state=42)
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Output results
print("Accuracy:", accuracy)
print("Classification Report:\n", report)


Accuracy: 0.43333333333333335
Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.55      0.41        11
           1       0.58      0.37      0.45        19

    accuracy                           0.43        30
   macro avg       0.46      0.46      0.43        30
weighted avg       0.49      0.43      0.44        30

