In [1]:
import numpy as np
import pandas as pd

from typing import List

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
torch.set_default_dtype(torch.float64)

In [2]:
training_set = pd.read_csv('/kaggle/input/playground-series-s3e11/train.csv')
submission_set = pd.read_csv('/kaggle/input/playground-series-s3e11/test.csv')
sample_submission =pd.read_csv('/kaggle/input/playground-series-s3e11/sample_submission.csv')
training_set.head()

Unnamed: 0,id,store_sales(in millions),unit_sales(in millions),total_children,num_children_at_home,avg_cars_at home(approx).1,gross_weight,recyclable_package,low_fat,units_per_case,store_sqft,coffee_bar,video_store,salad_bar,prepared_food,florist,cost
0,0,8.61,3.0,2.0,2.0,2.0,10.3,1.0,0.0,32.0,36509.0,0.0,0.0,0.0,0.0,0.0,62.09
1,1,5.0,2.0,4.0,0.0,3.0,6.66,1.0,0.0,1.0,28206.0,1.0,0.0,0.0,0.0,0.0,121.8
2,2,14.08,4.0,0.0,0.0,3.0,21.3,1.0,0.0,26.0,21215.0,1.0,0.0,0.0,0.0,0.0,83.51
3,3,4.02,3.0,5.0,0.0,0.0,14.8,0.0,1.0,36.0,21215.0,1.0,0.0,0.0,0.0,0.0,66.78
4,4,2.13,3.0,5.0,0.0,3.0,17.0,1.0,1.0,20.0,27694.0,1.0,1.0,1.0,1.0,1.0,111.51


**The Neural Network class**

In [3]:
class FCN(nn.Module):
    def __init__(self, input_size, output_size=1, norm=False):
        super(FCN, self).__init__()
        
        # use normalization layers or not
        self.norm = norm
        
        if self.norm == False: # need normalization 
            self.layers = nn.ModuleList([
                nn.Linear(input_size,100),
                nn.ReLU(),
                nn.Linear(100,100),  
                nn.ReLU(),
                nn.Linear(100,90),
                nn.ReLU(),
                nn.Linear(100,100),
                nn.ReLU(),
                nn.Linear(100,100),
                nn.ReLU()
            ])
            
        else: # change leckyrelu to relu, then norm works (layernorm is better)
            self.layers = nn.ModuleList([
                nn.LayerNorm(input_size), 
                nn.Linear(input_size,100),
                nn.ReLU(),
                nn.LayerNorm(100), 
                nn.Linear(100,100),
                nn.ReLU(),
                nn.LayerNorm(100),
                nn.Linear(100,100),
                nn.ReLU(),
                nn.LayerNorm(100),
                nn.Linear(100,100),
                nn.ReLU(),
                nn.LayerNorm(100), 
                nn.Linear(100,output_size),
                nn.ReLU() # must use relu in the last layer to keep nonnegative
            ])
            
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
            
        return x

In [4]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0.1):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, val_loss):
        if self.best_loss == None:
            self.best_loss = val_loss
        elif self.best_loss - val_loss > self.min_delta:
            self.best_loss = val_loss
            # reset counter if validation loss improves
            self.counter = 0
        elif self.best_loss - val_loss < self.min_delta:
            self.counter += 1
            print(f"INFO: Early stopping counter {self.counter} of {self.patience}")
            if self.counter >= self.patience:
                print('INFO: Early stopping')
                self.early_stop = True

In [5]:
class custom_dataset():
    def __init__(self, features, target):
        
        self.features = features
        self.target = target
        
    def __len__(self):
        return len(self.target)
    
    def __getitem__(self, idx):
        
        target = self.target[idx]
        features = self.features[idx,:]
        
        return features, target    

### PyTorch implementation of the **Root Mean Squared Logarithmic Error** for scoring our predictions: <a id='section_id0'></a>

In [6]:
class RMSLE(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self, pred, true):
        
        if (pred < 0).any() or (true < 0).any():
            raise ValueError(
                "Root Mean Squared Logarithmic Error cannot be used when "
                "the argument contain negative values."  )
        
        return torch.sqrt(self.mse(torch.log1p(pred), torch.log1p(true)))

### **Preprocessing**

In [7]:
training_set = training_set.drop(['id'], axis = 1)
training_set['child_report_ratio'] = training_set['num_children_at_home'] / training_set['total_children']
training_set['child_report_ratio'].fillna(0, inplace = True)
training_set["revenue_per_store"] =  training_set['store_sales(in millions)'] / training_set['unit_sales(in millions)']
training_set = training_set.drop(['salad_bar', 'gross_weight','recyclable_package', 'units_per_case', 'store_sales(in millions)', 'unit_sales(in millions)'], axis = 1)

submission_set['child_report_ratio'] = submission_set['num_children_at_home'] / submission_set['total_children']
submission_set['child_report_ratio'].fillna(0, inplace = True)
submission_set["revenue_per_store"] =  submission_set['store_sales(in millions)'] / submission_set['unit_sales(in millions)']
submission_set = submission_set.drop(['salad_bar', 'gross_weight','recyclable_package', 'units_per_case', 'store_sales(in millions)', 'unit_sales(in millions)'], axis = 1)

ids = submission_set.pop('id')
target = training_set.pop('cost')

sc = StandardScaler()
y = target.to_numpy()
X = training_set.to_numpy()

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 17)

X_train = sc.fit_transform(X)
# X_test = sc.transform(X_test)

### **Modelling Options**

In [8]:
n_features = len(list(training_set.columns))
n_epochs = 100
batch_size = 100

# Early stopping parameters
patience = 15
min_change = 0.0001

### **Main training loop**

In [10]:
from sklearn.model_selection import KFold
# add 5 cv
device = torch.device('cuda:0')

model = FCN(n_features, norm=True)

model.to(device)

loss_function = RMSLE()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
early_stopper = EarlyStopping(patience = patience, min_delta = min_change)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( 
                optimizer = optimizer,
                mode='min',
                patience = 10,
                factor = 0.1,
                min_lr=1e-5,
                verbose=True)

k_folds = 5
kfold = KFold(n_splits=k_folds, shuffle=True)
dataset = custom_dataset(X_train, y)

In [11]:
from torch.utils.data import SubsetRandomSampler


for fold, (train_ids, valid_ids) in enumerate(kfold.split(dataset)):
    print(f'FOLD {fold+1}')
    print('--------------------------------')
    train_subsampler = SubsetRandomSampler(train_ids)
    valid_subsampler = SubsetRandomSampler(valid_ids)
    
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler)
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=valid_subsampler)
    for epoch in range(150):
        with torch.no_grad():
            valid_loss = 0
            num_right = 0
            for tensor_x, tensor_y in valid_loader:
                tensor_x = tensor_x.to(device)
                tensor_y = tensor_y.reshape(-1, 1).to(device)
                output = model1(tensor_x)
                loss = loss_function(output, tensor_y)
                valid_loss += loss.item() * len(tensor_x)

            
            valid_loss = valid_loss / len(valid_loader.sampler.indices)

            if epoch % 50 == 0:
                print('Valid Loss:{} Epoch:{}'.format(valid_loss, epoch+50))

        train_loss = 0
        num_right = 0
        for tensor_x, tensor_y in train_loader:
            tensor_x = tensor_x.to(device)
            tensor_y = tensor_y.reshape(-1, 1).to(device)
            optimizer.zero_grad()
            output = model1(tensor_x)
            loss = loss_function(output, tensor_y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * len(tensor_x)

        
        train_loss = train_loss / len(train_loader.sampler.indices)
    
        if epoch % 50 == 0:
            print('Loss: {} Epoch:{}'.format(train_loss, epoch+50))

FOLD 1
--------------------------------
Valid Loss:4.302041241850126 Epoch:50
Loss: 0.6299599446886005 Epoch:50
Valid Loss:0.2959791551764197 Epoch:100
Loss: 0.2940599266202339 Epoch:100
Valid Loss:0.2950030829053492 Epoch:150
Loss: 0.2923824488132978 Epoch:150
FOLD 2
--------------------------------
Valid Loss:0.2907411669143708 Epoch:50
Loss: 0.2925615279541563 Epoch:50
Valid Loss:0.2937225362165123 Epoch:100
Loss: 0.29117581956636124 Epoch:100
Valid Loss:0.2943367692584605 Epoch:150
Loss: 0.29052155013342035 Epoch:150
FOLD 3
--------------------------------
Valid Loss:0.2892351624918386 Epoch:50
Loss: 0.29113141123777914 Epoch:50
Valid Loss:0.2940648012172618 Epoch:100
Loss: 0.28976146383008056 Epoch:100
Valid Loss:0.29476241674040726 Epoch:150
Loss: 0.289209875928229 Epoch:150
FOLD 4
--------------------------------
Valid Loss:0.2871787549177392 Epoch:50
Loss: 0.2906779183873658 Epoch:50
Valid Loss:0.29209845824636127 Epoch:100
Loss: 0.28932063403268177 Epoch:100
Valid Loss:0.29301

### **Predictions on the test set**

In [13]:
submission_features = torch.tensor(sc.transform(submission_set.to_numpy())).to(device)
predictions = model1(submission_features)

test_predictions = pd.DataFrame(predictions.flatten().tolist(), columns=['cost'])

result = pd.concat([ids, test_predictions], axis=1)

result.to_csv('submission.csv',index =False)

In [14]:
result

Unnamed: 0,id,cost
0,360336,94.529705
1,360337,91.177008
2,360338,89.415476
3,360339,108.532481
4,360340,79.510250
...,...,...
240219,600555,101.802530
240220,600556,93.375311
240221,600557,96.517180
240222,600558,108.511054
