In [5]:
# 📦 Import Required Libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
import numpy as np

# 📂 Load the Data
train_df = pd.read_csv("Train_Data.csv")
test_df = pd.read_csv("Test_Data.csv")
sample_submission_df = pd.read_csv("Sample_Submission.csv")

# 🧹 Clean Data & Encode Target
train_df = train_df.dropna(subset=['age_group'])  # Drop rows without target
train_df['age_group'] = train_df['age_group'].map({'Adult': 0, 'Senior': 1})  # Encode target

# 🎯 Split features and labels
X = train_df.drop(columns=['age_group'])
y = train_df['age_group']

# 🔄 Combine for uniform preprocessing
combined = pd.concat([X, test_df], axis=0)

# 🧼 Handle Missing Values
imputer = SimpleImputer(strategy='mean')
combined_imputed = imputer.fit_transform(combined)

# 🔄 Split again after imputation
X_train_final = combined_imputed[:len(X)]
X_test_final = combined_imputed[len(X):]

# 🤖 Train the Model
model = RandomForestClassifier(random_state=42)
model.fit(X_train_final, y)

# 🔮 Predict on Test Set
predictions = model.predict(X_test_final)

# 📤 Create Submission File
submission_df = pd.DataFrame({'age_group': predictions.astype(int)})
submission_df.to_csv("Submission.csv", index=False)

# ✅ Show preview of submission
print("Sample predictions:")
print(submission_df.head())
print("\n✅ Submission file saved as 'Submission.csv'")

Sample predictions:
   age_group
0          0
1          0
2          0
3          0
4          0

✅ Submission file saved as 'Submission.csv'
