In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [3]:
train_df = pd.read_csv('Train_Data.csv')
test_df = pd.read_csv('Test_Data.csv')

In [4]:
train_df.drop(columns=['SEQN'], inplace=True)
test_ids = test_df['SEQN']
test_df.drop(columns=['SEQN'], inplace=True)

In [5]:
train_df['RIAGENDR'] = train_df['RIAGENDR'].map({1: 0, 2: 1})
test_df['RIAGENDR'] = test_df['RIAGENDR'].map({1: 0, 2: 1})

In [6]:
train_df['age_group'] = train_df['age_group'].map({'Adult': 0, 'Senior': 1})

In [7]:
train_df = train_df.dropna(subset=['age_group'])

In [8]:
X = train_df.drop(columns=['age_group'])
y = train_df['age_group'].astype(int)

In [9]:
imputer = SimpleImputer(strategy='median')
X_imputed = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
test_imputed = pd.DataFrame(imputer.transform(test_df), columns=test_df.columns)

In [10]:
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_imputed), columns=X.columns)
test_scaled = pd.DataFrame(scaler.transform(test_imputed), columns=test_df.columns)

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)

In [12]:
model = RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
model.fit(X_train, y_train)

In [13]:
y_val_pred = model.predict(X_val)
val_f1 = f1_score(y_val, y_val_pred)
print("Validation F1 Score:", val_f1)

Validation F1 Score: 0.1951219512195122


In [14]:
test_predictions = model.predict(test_scaled)

In [17]:
submission = pd.DataFrame({'age_group': test_predictions})
print(submission)
submission.to_csv('Submission.csv', index=False)

     age_group
0            0
1            0
2            0
3            0
4            0
..         ...
307          0
308          0
309          0
310          0
311          0

[312 rows x 1 columns]
