In [2]:
from xgboost import XGBClassifier
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
import pandas as pd


train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

X = train_data.drop(columns=['Target'])
y = train_data['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

preprocess = ColumnTransformer(
    transformers=[
        ('numerical', StandardScaler(), X.columns)
    ])

xgb = XGBClassifier( 
    scale_pos_weight=len(y_train[y_train == 0]) / len(y_train[y_train == 1]), 
    random_state=100,
    n_estimators=850,
    subsample=0.9,
    min_child_weight=100,
    max_depth=4,
    learning_rate=0.01,
    gamma=0.2,
    colsample_bytree=0.7,
)

model = Pipeline([
    ('preprocessor', preprocess),
    ('classifier', xgb)
]) 

model.fit(X_train, y_train)

y_test_pred = model.predict(X_test)

print(balanced_accuracy_score(y_test, y_test_pred))

data_test_pred = model.predict(test_data)

submission = pd.DataFrame({
    'Id': test_data['Id'],
    'Target': data_test_pred
})

submission.to_csv('submission.csv', index=False)



0.7514127288349339
