![](https://wallpapercave.com/wp/0Irh26y.jpg)

## Importing Required Libraries
---
---

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')

import warnings
warnings.filterwarnings("ignore")

from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier

from tqdm.notebook import tqdm

## Helper Functions
---
---

In [None]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int8','int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2

    for col in df.columns:
        col_type = df[col].dtypes

        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()

            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2

    if verbose:
        print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

#-------------------------------------------------------------
def my_print(s):
    '''Custom print function by @kartik2khandelwal :)'''
    a = 4
    for i in s:
        a+=1
    return print('-' * a + '\n' + '| ' + s + ' |' + '\n' + '-' * a)

#-------------------------------------------------------------
def quantile(df):
    for i in tqdm(df.columns, total=len(df.columns)):
        low = df[i].quantile(0.005)
        high = df[i].quantile(0.99)
        df[i] = df[i].apply(lambda x:df[i].mean() if(x > high or x < low) else x)
    my_print('DataFrame columns are now in Quantile Range')
    return df

#-------------------------------------------------------------
def iterative_imputer(df):
    my_print(f'Null Values - {df.isnull().sum().sum()}')
    my_print('Applying Iterative Imputer...')
    imputer = IterativeImputer(max_iter=10)
    data = imputer.fit_transform(df)
    df = pd.DataFrame(data, columns=list(df.columns))
    my_print('Iterative Imputer Applied !!!')
    my_print(f'Null Values Left - {df.isnull().sum().sum()}') 
    return df

## Loading Dataset
---
---

In [None]:
df = pd.read_csv('../input/tabular-playground-series-feb-2022/train.csv')
X_test = pd.read_csv('../input/tabular-playground-series-feb-2022/test.csv')
X_test.drop('row_id', axis=1, inplace=True)

X_test = reduce_mem_usage(X_test)
df = reduce_mem_usage(df)

df.head()

In [None]:
keep_rows = df.drop('row_id', axis=1).drop_duplicates(keep='first').index.values
df = df.iloc[keep_rows].reset_index()

In [None]:
df.drop('index', axis=1, inplace=True)

## Exploratory Data Analysis
---
---

In [None]:
plt.figure(figsize=(10,5))
sns.countplot(y=df['target'], palette='cool', orient='h')
my_print('Checking Class Imbalace')
plt.show()

In [None]:
d = {'Salmonella_enterica': 0, 'Enterococcus_hirae': 1, 'Escherichia_coli': 2, 'Streptococcus_pyogenes': 3,
     'Campylobacter_jejuni': 4, 'Streptococcus_pneumoniae': 5, 'Staphylococcus_aureus': 6,
     'Escherichia_fergusonii': 7, 'Bacteroides_fragilis': 8, 'Klebsiella_pneumoniae': 9}
d_inv = {v: k for k, v in d.items()}

## Splitting of Dependent & Independent Variables
---
---

In [None]:
X = df.drop(['target', 'row_id'], axis=1)
y = df['target']
y = y.map(d)

## Scaling Data
---
---

In [None]:
from sklearn.preprocessing import StandardScaler
def scaling(df):
    '''This function scale the dataset using Standard Scaling method'''
    scale = StandardScaler()
    col = df.columns
    df = pd.DataFrame(scale.fit_transform(df))
    df.columns = col
    return df

In [None]:
X_scaled = scaling(X)
X_test_scaled = scaling(X_test)

## Dimensionality Reduction
---
---

In [None]:
from sklearn.decomposition import PCA
pca = PCA(0.95)
X_pca = pca.fit_transform(X_scaled)
X_test_pca = pca.fit_transform(X_test_scaled)

In [None]:
percent = (X_scaled.shape[-1] - X_pca.shape[-1])/X_scaled.shape[-1]
my_print(f'{round(percent * 100)} % data features removed !!!')

In [None]:
X = pd.DataFrame(X_pca)
X_test = pd.DataFrame(X_test_pca)

## Feature Enginnering
---
---

In [None]:
X = quantile(X)

In [None]:
X = iterative_imputer(X)

In [None]:
X_test = X_test[X.columns]

## Model Training
---
---

In [None]:
N_SPLITS = 7
y_preds = []

folds = StratifiedKFold(n_splits=N_SPLITS, shuffle=True)

for fold, (train_id, test_id) in enumerate(folds.split(X, y)):
    X_train = X.iloc[train_id]
    y_train = y.iloc[train_id]
    X_valid = X.iloc[test_id]
    y_valid = y.iloc[test_id]
    
    model = KNeighborsClassifier(3)
    model.fit(X_train, y_train)
    
    valid_score = model.score(X_valid, y_valid)
    
    print(f'Fold: {fold + 1}')
    my_print(f'Training Accuracy   :- {(model.score(X_train, y_train)*100).round(2)}%')
    my_print(f'Validation Accuracy :- {(model.score(X_valid, y_valid)*100).round(2)}%')
    
    y_preds.append(model.predict(X_test))

## Prediction | Submission
---
---

In [None]:
temp = pd.DataFrame(y_preds).T

In [None]:
y_pred = temp.mode(axis=1)[0].astype('int32')

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-feb-2022/sample_submission.csv')

submission['target'] = y_pred
submission['target'] = submission['target'].map(d_inv)
submission.to_csv('submission.csv', index=False)

### Please Leave an Upvote, If You Like It. 😊