In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from scipy.sparse import hstack

# Load the datasets
train_df = pd.read_csv(r'C:\Users\Ahmed Ashraf\Desktop\archive (3)\playground-series-s4e8\train.csv')
test_df = pd.read_csv(r'C:\Users\Ahmed Ashraf\Desktop\archive (3)\playground-series-s4e8\test.csv')

# Separate features and target
X = train_df.drop(columns=['class', 'id'])
y = train_df['class']

# Encode the target
le = LabelEncoder()
y = le.fit_transform(y)  # 'e' becomes 0 and 'p' becomes 1

# Handle missing values by imputing with the most frequent value
imputer = SimpleImputer(strategy='most_frequent')
X = imputer.fit_transform(X)
X_test = imputer.transform(test_df.drop(columns=['id']))

# One-hot encode the categorical variables and convert to sparse matrix
encoder = OneHotEncoder(sparse_output=True, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X)
X_test_encoded = encoder.transform(X_test)

# Split the training data into a train and validation set
X_train, X_val, y_train, y_val = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# Train a simple Logistic Regression model using sparse data
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Validate the model
y_val_pred = model.predict(X_val)
print(f'Validation Accuracy: {accuracy_score(y_val, y_val_pred)}')

# Make predictions on the test set
test_predictions = model.predict(X_test_encoded)

# Convert predictions back to original labels
test_predictions = le.inverse_transform(test_predictions)

# Prepare the submission file
submission_df = pd.DataFrame({'id': test_df['id'], 'class': test_predictions})
submission_df.to_csv(r'C:\Users\Ahmed Ashraf\Desktop\submission.csv', index=False)

print("Submission file created successfully!")


Validation Accuracy: 0.8860149922440081
Submission file created successfully!
