In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler

In [32]:
# Create imputer object
imputer = IterativeImputer(max_iter=100, tol=1e-3)

df = pd.read_csv('data/iith_foml_2023_train.csv')
y_train = df.iloc[:, -1]
X_train = df.iloc[:, :-1]
# Fit the imputer model on the dataset to learn the data patterns
X_train.drop(columns=['Feature 16', 'Feature 17', 'Feature 18'], inplace=True)
imputer.fit(X_train)

# Transform the dataset to replace missing values
# Convert back to a DataFrame
X_train_imputed = pd.DataFrame(imputer.transform(X_train), columns=X_train.columns)

In [33]:
df_test = pd.read_csv('data/iith_foml_2023_test.csv')
df_test.drop(columns=['Feature 16', 'Feature 17', 'Feature 18'], inplace=True)
X_test = pd.DataFrame(imputer.transform(df_test), columns=df_test.columns)

In [34]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test)

In [35]:
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier

# Initialize models
bagging_model = BaggingClassifier()
extratrees_model = ExtraTreesClassifier()
randomforest_model = RandomForestClassifier()

model = VotingClassifier(
    estimators=[
        ('rf', randomforest_model),
        ('bg', bagging_model),
        ('et', extratrees_model)
    ], voting='hard'
)

# Train the model on the training data
model.fit(X_train_scaled, y_train)

predictions = model.predict(X_test_scaled)

In [36]:
pred_df = pd.DataFrame()
pred_df['Category'] = predictions
pred_df['id'] = pred_df.index + 1
pred_df = pred_df[['id', 'Category']]
pred_df.to_csv('output/voting.csv', index=False)