In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [2]:
# Create imputer object
imputer = IterativeImputer(max_iter=100, tol=1e-3)

df = pd.read_csv('data/iith_foml_2023_train.csv')
y_train = df.iloc[:, -1]
X_train = df.iloc[:, :-1]
# Fit the imputer model on the dataset to learn the data patterns
X_train.drop(columns=['Feature 16', 'Feature 17'], inplace=True)
imputer.fit(X_train)

# Transform the dataset to replace missing values
# Convert back to a DataFrame
X_train_imputed = pd.DataFrame(imputer.transform(X_train), columns=X_train.columns)

In [3]:
df_test = pd.read_csv('data/iith_foml_2023_test.csv')
df_test.drop(columns=['Feature 16', 'Feature 17'], inplace=True)
X_test = pd.DataFrame(imputer.transform(df_test), columns=df_test.columns)

In [4]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test)

In [5]:
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier

# Initialize models
bagging_model = BaggingClassifier(random_state=42)
extratrees_model = ExtraTreesClassifier(random_state=42)
randomforest_model = RandomForestClassifier(random_state=42)

model = VotingClassifier(
    estimators=[
        ('rf', randomforest_model),
        ('bg', bagging_model),
        ('et', extratrees_model)
    ], voting='hard'
)

# Train the model on the training data
model.fit(X_train_scaled, y_train)

predictions = model.predict(X_test_scaled)

In [15]:
xpred = np.asarray([[-0.35300642,  1.86526984, -0.95229802,  0.41903394,  0.14193402,
         0.56064079, -0.40846433, -0.41986506,  0.89865619,  1.28663235,
         1.02863444, -0.18268335, -1.19357302,  1.19390121,  0.49132456,
        -0.3119616 , -0.60935857,  0.97064142, -1.00108563, -0.93295979,
        -0.35449325,  0.49124206]], dtype=float)
ypred = model.predict(xpred)
ypred

array([12], dtype=int64)

In [6]:
pred_df = pd.DataFrame()
pred_df['Category'] = predictions
pred_df['id'] = pred_df.index + 1
pred_df = pred_df[['id', 'Category']]
pred_df.to_csv('output/voting.csv', index=False)