### Data Details

Data fields
* **ID** - an Id that represents a (Shop, Item) tuple within the test set
* **shop_id** - unique identifier of a shop
* **item_id** - unique identifier of a product
* **item_category_id** - unique identifier of item category
* **item_cnt_day** - number of products sold. You are predicting a monthly amount of this measure
* **item_price** - current price of an item
* **date** - date in format dd/mm/yyyy
* **date_block_num** - a consecutive month number, used for convenience. January 2013 is 0, February 2013 is 1,..., October 2015 is 33
* **item_name** - name of item
* **shop_name** - name of shop
* **item_category_name** - name of item category



In [1]:
'''
Step 1: Loading Dataset
'''

import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.transforms as transforms
import pandas as pd
import numpy as np
from datetime import datetime, date
from torch.utils.data.dataset import Dataset
from torch.utils.data.sampler import SubsetRandomSampler


In [2]:
class CustomDatasetFromCSV(Dataset):
    def __init__(self, csv_path):
        """
        Args:
            csv_path (string): path to csv file
            height (int): image height
            width (int): image width
            transform: pytorch transforms for transforms and tensor conversion
        """
        self.data = pd.read_csv(csv_path)
        
        

    def __getitem__(self, index):
        X = np.asarray(self.data.iloc[index, [0,1,2,3,4,6]], dtype=np.float32)
#         print(self.data.iloc[index, [0,1,2,3,4,6]])
#         print(self.data.iloc[index, 5])                    
        y = np.asarray(self.data.iloc[index, 5], dtype=np.float32) # 5 for item_cnt_month
        
        
        return (X, y)

    def __len__(self):
        return len(self.data.index)


    
training_dataset = CustomDatasetFromCSV('data/train_input_data.csv')


batch_size = 100
num_iter = 600000
num_epoach = num_iter / (len(training_dataset) / batch_size)
epochs = int(num_epoach)

# Define data loader
training_dataset_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=True)
    
dataset_size = len(training_dataset)
indices = list(range(dataset_size))
val_indices = indices[:batch_size]
valid_sampler = SubsetRandomSampler(val_indices)
validation_dataset_loader = torch.utils.data.DataLoader(dataset=training_dataset,
                                                    batch_size=batch_size,
                                                    sampler=valid_sampler)


In [3]:
for i,(inputs, labels) in enumerate(training_dataset_loader):
    print(inputs)
    print(labels)
    # load images as varible
    x = Variable(inputs.view(-1, 6))
    y = Variable(labels)
    print(x.shape)
    break

tensor([[1.2000e+01, 5.2000e+01, 1.9670e+04, 1.0000e+00, 2.0140e+03, 4.0000e+01],
        [7.0000e+00, 3.5000e+01, 2.1544e+04, 8.0000e+00, 2.0130e+03, 3.7000e+01],
        [7.0000e+00, 4.2000e+01, 1.9529e+04, 8.0000e+00, 2.0130e+03, 4.0000e+01],
        [2.6000e+01, 2.5000e+01, 6.7680e+03, 3.0000e+00, 2.0150e+03, 2.4000e+01],
        [2.5000e+01, 3.0000e+00, 1.6184e+04, 2.0000e+00, 2.0150e+03, 6.5000e+01],
        [1.5000e+01, 2.2000e+01, 1.5063e+04, 4.0000e+00, 2.0140e+03, 3.0000e+01],
        [3.0000e+00, 6.0000e+00, 7.8500e+03, 4.0000e+00, 2.0130e+03, 3.0000e+01],
        [2.0000e+00, 1.2000e+01, 1.9340e+04, 3.0000e+00, 2.0130e+03, 4.9000e+01],
        [1.8000e+01, 1.2000e+01, 1.2322e+04, 7.0000e+00, 2.0140e+03, 3.7000e+01],
        [9.0000e+00, 2.8000e+01, 3.6520e+03, 1.0000e+01, 2.0130e+03, 5.5000e+01],
        [8.0000e+00, 4.2000e+01, 8.2330e+03, 9.0000e+00, 2.0130e+03, 3.8000e+01],
        [2.3000e+01, 5.7000e+01, 2.8750e+03, 1.2000e+01, 2.0140e+03, 2.5000e+01],
        [9.0000e

In [4]:
'''
Step 3: Create Model Class
'''

class FeedforwardNeuralNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(FeedforwardNeuralNetModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, output_dim)
        self.relu1 = nn.ReLU()
#         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
#         self.relu2 = nn.ReLU()
#         self.fc3 = nn.Linear(hidden_dim, hidden_dim)
#         self.relu3 = nn.ReLU()
#         self.fc4 = nn.Linear(hidden_dim, output_dim)
    
    def forward(self,x):
        out = self.fc1(x)
        out = self.relu1(out)
#         out = self.fc2(out)
#         out = self.relu2(out)
#         out = self.fc3(out)
#         out = self.relu3(out)
#         out = self.fc4(out)
        return out

    
'''
Step 4: Instantiate Model Class
'''

input_dim = 6
hidden_dim = 100
output_dim = 1

model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)

'''
Step 5: Instantiate Loss Class
'''

# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()

'''
Step 6: Instantiate Optimizer Class
'''

learning_rate = 0.1

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


'''
Step 7: Train Model
'''

# step 6: Train model




# epochs = 1000
iter = 0


for epoch in range(epochs):
    for i,(inputs, labels) in enumerate(training_dataset_loader):
        # load images as varible
        x = Variable(inputs.view(-1, 6))
        y = Variable(labels)
        
        
        
        
        
        # clear gradients w.r.t parameters
        optimizer.zero_grad()
        
        # Forward pass to get output
        outputs = model(x)
        
        # Calculate Loss : softmax --> cross entropy loss
        loss = criterion(outputs, y)
        
        # getting gradients w.r.t parameters
        loss.backward()
        
        # updating parameters
        optimizer.step()
#         print("iteration: {}, loss {}".format(iter, loss.data))
        iter += 1
        
        if iter % 500 == 0:
            # calculate Accuracy
            correct = 0
            total = 0
            #iterate through test dataset
            for images, labels in validation_dataset_loader:
                #load images to a Torch variable
                images = Variable(images.view(-1,6))
                
                # Forward pass only to get outputs
                outputs = model(images)
#                 print(type(outputs))
#                 print(outputs.data)
                predicted = outputs
                
                # Total number of labels 
                total += labels.size(0)
                
                # Total correct predictions
                correct += (predicted == labels).sum()
            
            accuracy = float(100 * correct / total)
            
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.data, accuracy))
        
#     epoch += 1

  return F.mse_loss(input, target, reduction=self.reduction)


Iteration: 500. Loss: 9.729999542236328. Accuracy: 100.0
Iteration: 1000. Loss: 10.170000076293945. Accuracy: 100.0
Iteration: 1500. Loss: 19.049999237060547. Accuracy: 100.0
Iteration: 2000. Loss: 17.100000381469727. Accuracy: 100.0
Iteration: 2500. Loss: 22.93000030517578. Accuracy: 100.0
Iteration: 3000. Loss: 5.949999809265137. Accuracy: 100.0
Iteration: 3500. Loss: 4.360000133514404. Accuracy: 100.0
Iteration: 4000. Loss: 5850.8701171875. Accuracy: 100.0
Iteration: 4500. Loss: 144.6999969482422. Accuracy: 100.0
Iteration: 5000. Loss: 8.529999732971191. Accuracy: 100.0
Iteration: 5500. Loss: 22.1299991607666. Accuracy: 100.0
Iteration: 6000. Loss: 17.979999542236328. Accuracy: 100.0
Iteration: 6500. Loss: 5.949999809265137. Accuracy: 100.0
Iteration: 7000. Loss: 17.139999389648438. Accuracy: 100.0
Iteration: 7500. Loss: 15.130000114440918. Accuracy: 100.0
Iteration: 8000. Loss: 50.849998474121094. Accuracy: 100.0
Iteration: 8500. Loss: 12.760000228881836. Accuracy: 100.0
Iteration:

  return F.mse_loss(input, target, reduction=self.reduction)


Iteration: 16500. Loss: 7.099999904632568. Accuracy: 100.0
Iteration: 17000. Loss: 5.050000190734863. Accuracy: 100.0
Iteration: 17500. Loss: 10.869999885559082. Accuracy: 100.0
Iteration: 18000. Loss: 15.479999542236328. Accuracy: 100.0
Iteration: 18500. Loss: 84.12999725341797. Accuracy: 100.0
Iteration: 19000. Loss: 14.9399995803833. Accuracy: 100.0
Iteration: 19500. Loss: 56.099998474121094. Accuracy: 100.0
Iteration: 20000. Loss: 36.130001068115234. Accuracy: 100.0
Iteration: 20500. Loss: 5.639999866485596. Accuracy: 100.0
Iteration: 21000. Loss: 7.670000076293945. Accuracy: 100.0
Iteration: 21500. Loss: 7.989999771118164. Accuracy: 100.0
Iteration: 22000. Loss: 12.649999618530273. Accuracy: 100.0
Iteration: 22500. Loss: 12.460000038146973. Accuracy: 100.0
Iteration: 23000. Loss: 4.840000152587891. Accuracy: 100.0
Iteration: 23500. Loss: 20.469999313354492. Accuracy: 100.0
Iteration: 24000. Loss: 6.239999771118164. Accuracy: 100.0
Iteration: 24500. Loss: 6.980000019073486. Accurac

KeyboardInterrupt: 