### <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color: black; font-size:180%; text-align:left;padding:3.0px; background: #ffebcc; border-bottom: 8px solid black" > TABLE OF CONTENTS<br><div>  
* [IMPORTS AND INSTALLATIONS](#1)
* [FOREWORD](#2)
* [PREPROCESSING](#3)
* [MODEL TRAINING](#4)
* [REFERENCES](#5)

<a id="1"></a>
# <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color:white; font-size:120%; text-align:left;padding:3.0px; background: #b32d00; border-bottom: 8px solid black" > PACKAGE IMPORTS AND INSTALLATIONS<br> <div>

In [None]:
%%time 

from IPython.display import clear_output
from gc import collect
!pip install -q autogluon.tabular --force-reinstall;

collect();
clear_output();

In [None]:
%%time 

from IPython.display import clear_output
from gc import collect

from warnings import filterwarnings
filterwarnings('ignore')

from IPython.display import display_html, clear_output
from pprint import pprint
from functools import partial
from copy import deepcopy
import pandas as pd, numpy as np, os, joblib
import re

from warnings import filterwarnings
filterwarnings('ignore')
from tqdm.notebook import tqdm

# Pipeline specific packages:-
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

# AutoML package:-
from autogluon.tabular import TabularDataset, TabularPredictor
from sklearn.metrics import mean_squared_error as mse

print()
collect()

<a id="2"></a>
# <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color:white; font-size:120%; text-align:left;padding:3.0px; background: #b32d00; border-bottom: 8px solid black" > APPROACH DETAILS<br> <div>


    
**Data columns**<br>
This is available in the original data description as [here](https://www.kaggle.com/competitions/playground-series-s4e4/discussion/488073) <br>
 <br>
<br>**Competition details and notebook objectives**<br>
1. This is a regression challenge to predict abalone age details. **RMSLE** is the metric for the challenge - **this needs to be minimized**<br>
2. In this starter notebook, we start the assignment with a detailed EDA, feature plots, interaction effects, adversarial CV analysis and develop starter models to initiate the challenge. We will also incorporate other opinions and approaches as we move along the challenge.<br>
<br>
**Model strategy** <br>
We start off with autogluon model and blend with good public work in this kernel. <br>

|Version Number|Details|CV score|LB score|
|----| -----| :-:|:-:|
|1 | * No Log transform <br> * 1 copy of original data|0.14696|0.14572|
|2 | * Log transform <br> * 1 copy of original data|0.14697|0.14554|
|3 | * Log transform <br> * 3 copies of original data|||
|4 | * No Log transform <br> * 3 copies of original data|||
|5 | * Log transform <br> * 8 copies of original data|||

<a id="3"></a>
# <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color:white; font-size:120%; text-align:left;padding:3.0px; background: #b32d00; border-bottom: 8px solid black" > PREPROCESSING<br> <div>

In [None]:
target       = "Rings"
use_pipeline = True
nb_copies    = 8

In [None]:
%%time 

train    = pd.read_csv("/kaggle/input/playground-series-s4e4/train.csv", index_col = "id")
test     = pd.read_csv("/kaggle/input/playground-series-s4e4/test.csv", index_col = "id")
original = pd.read_csv("/kaggle/input/playgrounds4e04originaldata/Original.csv",index_col = "id")
sub_fl   = pd.read_csv("/kaggle/input/playground-series-s4e4/sample_submission.csv",)

original.columns = train.columns;

for tbl in [train, original, test]:
    tbl.columns = tbl.columns.str.replace(r"\(|\)|\.|\s+","", regex = True);
    
print(f"---> Train shape after append = {train.shape}");   
train = pd.concat([train] + [original] * nb_copies, 
                  axis=0, 
                  ignore_index = True
                 );
print(f"---> Train shape after appending {nb_copies} original data copies = {train.shape}");

train[target] = np.log1p(train[target].values);

print();
collect();

In [None]:
%%time 

class Xformer(BaseEstimator, TransformerMixin):
    """
    This class performs data transformations and determines the dataset for model development purposes
    """
    
    def __init__(self):
        pass
    
    def fit(self, X, y= None, **params):
        """
        Source- https://www.kaggle.com/code/mfmfmf3/clean-code-voting-regressor-base-3-models
        This function converts the column to a positive transform and takes its log
        """
        
        self.cols    = X.columns[1:-1]
        self.col_min = X[self.cols].min(axis=0)
        return self
    
    def transform(self, X, y= None, **params):
        """
        This method effectuates the data transform by offsetting the column value with the min and taking log-transform
        """
        
        df = X.copy()
        
        for col in self.cols:
            df[col] = np.log1p(X[col] - self.col_min.loc[col])
            
        return df  

In [None]:
%%time 

if use_pipeline:
    print(f"---> Train-test shape before data transform = {train.shape} | {test.shape}");
    pipe  = Pipeline(steps = [("Xfrm", Xformer())])
    train = pipe.fit_transform(train)
    test  = pipe.transform(test)

    print(f"---> Train-test shape after data transform = {train.shape} | {test.shape}\n\n");

<a id="4"></a>
# <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color:white; font-size:120%; text-align:left;padding:3.0px; background: #b32d00; border-bottom: 8px solid black" > MODEL TRAINING<br> <div>

In [None]:
%%time 

model = TabularPredictor(label        = target,
                         problem_type = "regression",
                         eval_metric  = "root_mean_squared_error",
                         path         = "/kaggle/working/AG",
                         verbosity    = 2,
                         )

model.fit(train.drop(columns = ['id', "Source"], errors = "ignore"),
          save_space      = True,
          presets         = "optimize_for_deployment",
          time_limit      = 9000,
          excluded_model_types = ["NN_TORCH", "FASTAI", "NN"]
          )

preds = \
model.predict(test.drop(columns = ['id', "Source"], errors = "ignore"), 
              as_pandas = False
             )

clear_output();

print()
display(model.leaderboard().\
        style.format(precision = 5).\
        set_caption(f"\nModel Leaderboard\n")
       )

collect()
print()

In [None]:
%%time 

def PostProcessSub(sub_fl: pd.DataFrame):
    """
    This function post-processes the submission file to clip the predictions between 1 and 29 
    It also removes class 28
    """
    
    sub_fl[target] = sub_fl[target].clip(1, 29);
    sub_fl[target] = np.where(sub_fl[target].between(27.5,29), 29, sub_fl[target])
    
    return sub_fl
    

In [None]:
%%time 

sub_fl[target] = np.expm1(preds)

sub_noblend = sub_fl.copy()
sub_noblend = PostProcessSub(sub_noblend)
sub_fl.to_csv(f"submission_noblend.csv", index = None)
del sub_noblend

# Blending with good public work:-
sub1 = pd.read_csv(f"/kaggle/input/playgrounds4e04-blender/submission.csv")[target].values
sub2 = pd.read_csv(f"/kaggle/input/clean-code-voting-regressor-base-3-models/submission.csv")[target].values
sub3 = pd.read_csv(f"/kaggle/input/abalone-rings-ensemble/submission.csv")[target].values

sub_fl[target] = \
np.average(np.c_[sub_fl[target].values, sub1, sub2, sub3], 
           axis    = 1, 
           weights = [0.40, 0.05, 0.30, 0.25]
          )

sub_fl = PostProcessSub(sub_fl)
sub_fl.to_csv(f"submission.csv", index = None)

print(f"---> Final submission file with blend")
display(sub_fl.head(10).style.format(precision = 4))

<a id="5"></a>
# <div style= "font-family: Cambria; font-weight:bold; letter-spacing: 0px; color:white; font-size:120%; text-align:left;padding:3.0px; background: #b32d00; border-bottom: 8px solid black" > REFERENCES<br> <div>

1. https://www.kaggle.com/code/igorvolianiuk/abalone-rings-ensemble <br>
2. https://www.kaggle.com/code/mfmfmf3/clean-code-voting-regressor-base-3-models <br>
3. https://www.kaggle.com/code/ravi20076/playgrounds4e04-blender<br>