In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

In [None]:
train=pd.read_csv("")
test=pd.read_csv("")

In [None]:
train.isnull().sum()

In [None]:
test.isnull().sum()

In [None]:
train.info()

In [None]:
test_id=test['id']
test=test.drop(columns=['id'])

In [None]:
X=train.drop(columns=['Class'])
y=train['Class']

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [None]:
numeric_features=X.select_dtypes(include=['int64','float64']).columns
categorical_features=X.select_dtypes(include=['object']).columns

In [None]:
numerical_pipeline=Pipeline(steps=[
    ('impute',SimpleImputer(strategy='mean')),
    ('scaler',StandardScaler())
])
categorical_pipeline=Pipeline(steps=[
    ('impute',SimpleImputer(strategy='most_frequent')),
    ('encode',OneHotEncoder(handle_unknown='ignore'))
])

In [None]:
preprocessing=ColumnTransformer(transformers=[
    ('num',numerical_pipeline,numeric_features),
    ('cat',categorical_pipeline,categorical_features)
])

In [None]:
model = GradientBoostingClassifier(
    n_estimators=920,
    learning_rate=0.02,
    max_depth=4,
    min_samples_split=2,
    min_samples_leaf=1,
    subsample=0.7,
    random_state=42
)

In [None]:
pipeline=Pipeline(steps=[
    ('preprocessor',preprocessing),
    ('model',model)
])

In [None]:
not_null_index = y_train.notnull()
X_train = X_train[not_null_index]
y_train = y_train[not_null_index]

In [None]:
pipeline.fit(X_train,y_train)

In [None]:
not_null_index = y_test.notnull()
X_test = X_test.loc[not_null_index]
y_test = y_test.loc[not_null_index]

In [None]:
y_pred=pipeline.predict(X_test)

In [None]:
accu=accuracy_score(y_pred,y_test)

In [None]:
print(accu)

In [None]:
y_final=pipeline.predict(test)

In [None]:
submission=pd.DataFrame({
    'id':test_id,
    'Class':y_final
})

In [None]:
submission.to_csv("submission.csv",index=False)