In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train_df = pd.read_csv("/kaggle/input/playground-series-s3e22/train.csv")
test_df = pd.read_csv("/kaggle/input/playground-series-s3e22/test.csv")
sample_submission = pd.read_csv("/kaggle/input/playground-series-s3e22/sample_submission.csv")

In [None]:
train_df.head()

In [None]:
train_df.tail()

In [None]:
train_df.info()

In [None]:
def df_info(df):
    info_df = pd.DataFrame(df.dtypes, columns=['dtypes'])
    info_df["Nan"] = df.isna().sum()
    info_df["Nan %"] = df.isna().sum() / len(df)
    info_df["Nunique"] = df.nunique()
    info_df["dtypes"] = df.dtypes
    print(f"Spahe: {df.shape}")
    return info_df.style.background_gradient(cmap='Blues')

In [None]:
df_info(train_df)

In [None]:
df_info(test_df)

In [None]:
train_df = train_df.fillna(method = "bfill")
test_df = test_df.fillna(method = "bfill")

"""train_df = train_df.dropna(axis = 1)
test_df = test_df.dropna(axis = 1)"""

In [None]:
train_df.shape

In [None]:
train_df.drop(["id", "hospital_number"], axis = 1, inplace = True)
test_df.drop(["id", "hospital_number"], axis = 1, inplace = True)

In [None]:
fig, ax = plt.subplots(nrows=14, ncols=2, figsize=(15, 30), constrained_layout=True)

for i, col in enumerate(train_df.columns):
    plt.subplot(14, 2, i + 1)
    sns.scatterplot(train_df[col])
plt.show()

In [None]:
fig, ax = plt.subplots(nrows=14, ncols=2, figsize=(15, 30), constrained_layout=True)

for i, col in enumerate(train_df.columns):
    plt.subplot(14, 2, i + 1)
    if train_df[col].dtype in ["int", "float"]:
        sns.scatterplot(x = col, y = "total_protein", hue = "outcome", data = train_df)
    else:
        sns.barplot(x = col, y = "total_protein", hue = "outcome", data = train_df)
plt.show()

In [None]:
numeric_train = train_df.select_dtypes(include=[np.number])

plt.figure(figsize = (8, 8))
sns.heatmap(numeric_train.corr(), annot = True)
plt.show()

In [None]:
concat_df = pd.concat([train_df, test_df], axis = 0)
concat_df.shape

In [None]:
object_col_names = [col for col in train_df.columns if (str(concat_df[col].dtype) in ["object"]) & (col not in ["outcome"])]
object_col_names

In [None]:
from sklearn.preprocessing import LabelEncoder

def dummies(dataframe):
    dummies_df = pd.DataFrame()
    for col in object_col_names:
        if dataframe[col].nunique() == 2:
            le = LabelEncoder()
            arr = le.fit_transform(dataframe[col])
            temp_df = pd.DataFrame(data = arr, columns = [le.classes_[1]])
            dummies_df = pd.concat([dummies_df, temp_df.reset_index(drop=True)], axis = 1)
        else:
            temp_df = pd.get_dummies(dataframe[col]).astype(int)
            dummies_df = pd.concat([dummies_df, temp_df.reset_index(drop=True)], axis = 1)
    return dummies_df

In [None]:
dummies_concat_df = dummies(concat_df)
dummies_concat_df.head()

In [None]:
dummies_concat_df.shape

In [None]:
num_col_names = [col for col in train_df.columns if concat_df[col].dtype in ["int", "float"]]
num_col_names

In [None]:
from sklearn.preprocessing import MinMaxScaler

ss_df = pd.DataFrame(index=concat_df.index)
for col in num_col_names:
    ss = MinMaxScaler()
    temp_df = ss.fit_transform(concat_df[col].values.reshape(-1, 1))
    ss_df[col] = temp_df
ss_df.head()

In [None]:
df = pd.concat([dummies_concat_df, ss_df.reset_index(drop=True)], axis = 1)
df.head()

In [None]:
df.shape

In [None]:
shape = train_df.shape[0]
train = df.iloc[:shape, :]
test = df.iloc[shape:, :]

In [None]:
print(train.shape)
print(test.shape)

In [None]:
x_train = train
y_train = pd.get_dummies(train_df[["outcome"]]).astype(int)

x_test = test

In [None]:
y_train

In [None]:
!pip install FLAML
!pip install "ray[tune]<2.5.0"

In [None]:
from flaml import AutoML
automl = AutoML()

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(train_df["outcome"])

automl.fit(x_train.values, y_encoded, task="classification", metric='accuracy', time_budget=900)

In [None]:
print('Best ML leaner:', automl.best_estimator)
print('Best hyperparmeter config:', automl.best_config)
print('Best mse on validation data: {0:.4g}'.format(automl.best_loss))
print('Training duration of best run: {0:.4g} s'.format(automl.best_config_train_time))

In [None]:
y_pred = automl.predict(x_test.values)
y_pred

In [None]:
class_labels = ["died", "euthanized", "lived"]
pred_test = [class_labels[i] for i in y_pred]

In [None]:
sample_submission['outcome'] = pred_test
sample_submission.to_csv('outcome.csv', index=False)
sample_submission