<a href="https://www.kaggle.com/code/averma111/house-price-pytorch-v1?scriptVersionId=127113353" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

In [None]:
ROOT_PATH='/kaggle/input/house-prices-advanced-regression-techniques'
train = pd.read_csv(ROOT_PATH+'/train.csv')
train.head()

In [None]:
train.describe(include='all')

In [None]:
train.drop(['Id'], axis = 1, inplace = True)
train.head()

In [None]:
num_rows = len(train.index)
print('The total number of rows in the dataframe is:',num_rows)

In [None]:
print(train.info())

## Filling the missing NAN values with mean and mode`

In [None]:
categorical_features = train.select_dtypes(include = ["object","bool"]).columns
numerical_features = train.select_dtypes(include = ["int64","float64"]).columns
numerical_features = numerical_features.drop("SalePrice")

print("Numerical features : " + str(len(numerical_features)))
print("Categorical features : " + str(len(categorical_features)))
train_num = train[numerical_features]
train_cat = train[categorical_features]

In [None]:
# Handle missing values for numerical features by using median as replacement
print("NAs for numerical features in train : " + str(train[numerical_features].isnull().values.sum()))
train[numerical_features] = train[numerical_features].fillna(train[numerical_features].median())
print("Remaining NAs for numerical features in train : " + str(train[numerical_features].isnull().values.sum()))

In [None]:
# Use OrdinalEncoder as one-hot encoding results in different columns counts between Model Training and Test 
from sklearn.preprocessing import OrdinalEncoder
encoder = OrdinalEncoder()
train[categorical_features] = encoder.fit_transform(train[categorical_features])

In [None]:
# Handle missing values for categorical features
print("NANs for categorical features in train : " + str(train[categorical_features].isnull().values.sum()))
train[categorical_features] = train[categorical_features].fillna(method="ffill")
train[categorical_features] = train[categorical_features].fillna(0)
print("Remaining NAs for categorical features in train : " + str(train[categorical_features].isnull().values.sum()))

In [None]:
output_col = [train.columns[-1]]
output_col

In [None]:
input_col = train.columns[0:79]
input_col

In [None]:
def datafrme_to_arrays(dataset):
    dataframe = dataset.copy(deep=True)
    
    inputs_array = dataframe[input_col].to_numpy()
    targets_array = dataframe[output_col].to_numpy()
    return inputs_array,targets_array.reshape((1460,1))

In [None]:
inputs_array,targets_array=datafrme_to_arrays(train)
inputs_array,targets_array

In [None]:
import torch
import torchvision

In [None]:
inputs = torch.Tensor(inputs_array)
targets = torch.Tensor(targets_array)

print(inputs)
print(targets)
inputs.shape, targets.shape

In [None]:
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.utils.data.sampler import SubsetRandomSampler
dataset = TensorDataset(inputs,targets)



In [None]:
batch_size = 16
test_split = .2
shuffle_dataset = True
random_seed= 42

# Creating data indices for training and validation splits:
train_size = len(train)
indices = list(range(train_size))
split = int(np.floor(test_split * train_size))
if shuffle_dataset :
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)


In [None]:
train_loader = DataLoader(dataset, batch_size=batch_size, 
                                           sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size,
                                                sampler=test_sampler)

In [None]:
for xb, yb in train_loader:
    print("inputs:", xb)
    print("targets:", yb)
    break

In [None]:
for xb, yb in test_loader:
    print("inputs:", xb)
    print("targets:", yb)
    break

In [None]:
input_size = len(input_col)
output_size = len(output_col)

In [None]:
class LRModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = torch.nn.Linear(input_size,output_size)
    
    def forward(self,xb):
        out = self.linear(xb)
        return out
    
    def training_step(self,batch):
        inputs,targets = batch
        out = self(inputs)
        loss = F.l1_loss(input = out,target= targets, size_average=None,reduce=None,reduction='mean')
        return loss
    
    def validation_step(self,batch):
        inputs,targets = batch
        out = self(inputs)
        loss = F.l1_loss(input = out,target= targets, size_average=None,reduce=None,reduction='mean')
        return {'val_loss':loss.detach()}
    
    def validation_epoch_end(self,outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean() 
        return {'val_loss':epoch_loss.item()}
    
    def epoch_end(self,epoch,result,num_epochs):
        if (epoch+1) % 20 == 0 or epoch==num_epochs-1:
            print("Epoch [{}],val_loss:{:.4f}".format(epoch+1,result['val_loss']))
            

In [None]:
model = LRModel()

In [None]:
list(model.parameters())

In [None]:
def evaluate(model, test_loader):
    outputs = [model.validation_step(batch) for batch in test_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, test_loader, opt_func = torch.optim.ASGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, test_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

In [None]:
epochs = 1000
lr = 1e-7
history1 = fit(epochs, lr, model, train_loader, test_loader)

In [None]:
def predict_single(inputs, target, model):
    inputs = inputs.unsqueeze(0)
    predictions = model(inputs)                
    prediction = predictions[0].detach()
    print("Input:", input)
    print("Target:", target)
    print("Prediction:", prediction)