In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import seaborn as sns
import warnings

warnings.filterwarnings("ignore")
plt.style.use('grayscale')

<img src = "https://i.imgur.com/uHVJtv0.png">

In [None]:
DATA = "../input/tabular-playground-series-may-2021/"
data_train = pd.read_csv(os.path.join(DATA,'train.csv'))
data_test = pd.read_csv(os.path.join(DATA,'test.csv'))
submission = pd.read_csv(os.path.join(DATA,'sample_submission.csv'))

In [None]:
data_train.head()

In [None]:
data_test.head()


In [None]:
data_train.info()

In [None]:
data_train.describe()

In [None]:
dt_i=[]
dt_fl=[]
dt_o=[]

for col in data_train.columns:
    x=data_train[col].dtype
    if x=='int64':
        dt_i.append(col)
    elif x=='float64':
        dt_fl.append(col)
    else:
        dt_o.append(col)

In [None]:
dt_fl

In [None]:
dt_i

In [None]:
dt_o

In [None]:
plt.figure(figsize = (10,8))
sns.barplot(x = 'feature_1',y = 'target',data = data_train)
plt.show()

In [None]:
plt.figure(figsize=(18,8))
plt.subplot(1,2,1)
data_train.target.value_counts().plot.pie(explode=[0,0.1,0.2,0.3])
plt.subplot(1,2,2)
sns.countplot(data_train.target)
plt.suptitle("Target Distribution", size=20)
plt.show()

In [None]:
for i in dt_i:   
    cnt_srs = data_train[i].value_counts()

    plt.figure(figsize=(20,8))
    plt.bar(cnt_srs.index, cnt_srs.values, alpha=0.8)
    plt.ylabel('Number of Occurrences', fontsize=12)
    plt.xlabel('Values', fontsize=12)
    plt.title(f'Count for {i}')
    plt.show()

In [None]:
corr = data_train[dt_i].corr().abs()

fig, ax = plt.subplots(figsize=(20, 12))
ax.text(-1.1, -0.7, 'Correlation between the Features', fontsize=20)
sns.heatmap(corr, annot=False, fmt=".2f",cmap='gray',
            cbar_kws={"shrink": .8}, vmin=0, vmax=0.05)
plt.yticks(rotation=0)
plt.show()

## mljar -- Machine Learning for Humans
<p> The mljar-supervised is an Automated Machine Learning Python package that works with tabular data. It is designed to save time for a data scientist 😎. It abstracts the common way to preprocess the data, construct the machine learning models, and perform hyper-parameters tuning to find the best model 🏆. It is no black-box as you can see exactly how the ML pipeline is constructed (with a detailed Markdown report for each ML model). </p>

<p> The mljar-supervised will help you with: </p>

* explaining and understanding your data,
* trying many different machine learning models,
* creating Markdown reports from analysis with details about all models,
* saving, re-running and loading the analysis and ML models.


* <b>Source --> https://github.com/mljar/mljar-supervised/ </b>

<img src = "https://raw.githubusercontent.com/mljar/mljar-examples/master/media/AutoML_overview_mljar_v3.svg">

In [None]:
!pip install mljar-supervised -q

In [None]:
from supervised.automl import AutoML
X=data_train.drop(['target','id'],axis=1)
y=data_train['target']
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 , stratify = y )

In [None]:
# train models with AutoML
automl = AutoML(mode="Compete",ml_task = 'multiclass_classification',eval_metric='logloss')
automl.fit(X_train, y_train)

In [None]:
# compute the performance on test data
predictions = automl.predict_all(X_test)
predictions

In [None]:
result = pd.DataFrame({"Predicted": np.array(predictions["label"]), "Target": np.array(y_test)})
result

In [None]:
df_res = pd.DataFrame(result)
confusion_matrix = pd.crosstab(df_res['Target'], df_res['Predicted'], rownames=['Target'], colnames=['Predicted'], margins = True)
plt.figure(figsize=(18,8))
sns.heatmap(confusion_matrix, annot=True,cmap='gray')
plt.show()

In [None]:
data_test = data_test.drop('id',1)
predictions_all = automl.predict_all(data_test)
submission[['Class_1','Class_2', 'Class_3', 'Class_4']] = predictions_all[['prediction_Class_1', 'prediction_Class_2', 'prediction_Class_3',
       'prediction_Class_4']]
submission.to_csv(f'submission.csv',index=False)

<img src = "https://media.tenor.com/images/92481d2c662be7f7528dbd7d5049ce54/tenor.gif">