

## Predict if someone makes more or less than 50k using the adult dataset

   * Note that this dataset strictly refers to people living in the US






In [1]:
# Importing the libraries
import numpy as np

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sns
import tensorflow as tf
import torch
import torch.utils.data as data_utils
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# !pip install torchsummary 
from torchsummary import summary
import torch.nn as nn
import torch.nn.functional as F
torch.backends.cudnn.benchmark=True

  from ._conv import register_converters as _register_converters


# Loading the Training and Testing Data using Data Loader 

In [2]:
# Loading the Training and Testing Data
trainImages = np.load('./Adult/data.npy').astype('float32')
trainLabels = np.load('./Adult/labels.npy').astype('float32')

# # Normalizing the data
# mx = np.max(np.float32(trainImages.flatten()))
# Splitting the Data into Training and Test Data    trainImages.max(axis=0)
X_train, X_test,Y_train,Y_test = train_test_split(trainImages,trainLabels, test_size=0.15, shuffle = True)

# batch_size
batch = 256

# Train Data Loader
train = data_utils.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(Y_train))
train_loader = data_utils.DataLoader(train, batch_size=batch, shuffle=True)

# Test Data loader
test = data_utils.TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(Y_test))
test_loader = data_utils.DataLoader(test)



In [3]:
X_train.shape

(38438, 67)

In [4]:
X_test.shape

(6784, 67)

# Showing the Training Data after Normalizing

In [5]:
# Preview the training data
for batch_idx, (data, target) in enumerate(train_loader):
    print(batch_idx)
#     print("Training Data")
    print(data[0][0])
#     print("Testing Data")
#     print(target[0])
#     print(data.sha?pe)
#     break
    

0
tensor(22.)
1
tensor(21.)
2
tensor(52.)
3
tensor(32.)
4
tensor(51.)
5
tensor(26.)
6
tensor(21.)
7
tensor(36.)
8
tensor(62.)
9
tensor(51.)
10
tensor(25.)
11
tensor(30.)
12
tensor(26.)
13
tensor(44.)
14
tensor(39.)
15
tensor(17.)
16
tensor(50.)
17
tensor(50.)
18
tensor(17.)
19
tensor(31.)
20
tensor(36.)
21
tensor(46.)
22
tensor(19.)
23
tensor(37.)
24
tensor(27.)
25
tensor(42.)
26
tensor(30.)
27
tensor(25.)
28
tensor(72.)
29
tensor(36.)
30
tensor(45.)
31
tensor(58.)
32
tensor(24.)
33
tensor(26.)
34
tensor(41.)
35
tensor(24.)
36
tensor(37.)
37
tensor(20.)
38
tensor(41.)
39
tensor(66.)
40
tensor(52.)
41
tensor(23.)
42
tensor(35.)
43
tensor(23.)
44
tensor(64.)
45
tensor(38.)
46
tensor(36.)
47
tensor(69.)
48
tensor(34.)
49
tensor(56.)
50
tensor(37.)
51
tensor(30.)
52
tensor(32.)
53
tensor(72.)
54
tensor(32.)
55
tensor(23.)
56
tensor(39.)
57
tensor(37.)
58
tensor(45.)
59
tensor(31.)
60
tensor(38.)
61
tensor(23.)
62
tensor(30.)
63
tensor(23.)
64
tensor(19.)
65
tensor(54.)
66
tensor(43.)
67
te

## Defining Convolution Neural Network

In [6]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(67, 128)
        self.relu1 = nn.ReLU()
        self.dout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 64)
        self.relu2 = nn.ReLU()
        self.dout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(64, 32)
        self.prelu = nn.PReLU(1)
        self.out = nn.Linear(32, 1)
        self.out_act = nn.Sigmoid()
        
    def forward(self, input_):
        inp = self.fc1(input_)
        x = self.relu1(inp)
        x = self.dout(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dout2(x)
        x = self.fc3(x)
        x = self.prelu(x)
        x = self.out(x)
        y = (self.out_act(x))
        return y
net = Net()
net.to(device)

Net(
  (fc1): Linear(in_features=67, out_features=128, bias=True)
  (relu1): ReLU()
  (dout): Dropout(p=0.2)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (dout2): Dropout(p=0.2)
  (fc3): Linear(in_features=64, out_features=32, bias=True)
  (prelu): PReLU(num_parameters=1)
  (out): Linear(in_features=32, out_features=1, bias=True)
  (out_act): Sigmoid()
)

In [7]:
summary(net,input_size = (1,67),batch_size=batch)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1              [256, 1, 128]           8,704
              ReLU-2              [256, 1, 128]               0
           Dropout-3              [256, 1, 128]               0
            Linear-4               [256, 1, 64]           8,256
              ReLU-5               [256, 1, 64]               0
           Dropout-6               [256, 1, 64]               0
            Linear-7               [256, 1, 32]           2,080
             PReLU-8               [256, 1, 32]               1
            Linear-9                [256, 1, 1]              33
          Sigmoid-10                [256, 1, 1]               0
Total params: 19,074
Trainable params: 19,074
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.07
Forward/backward pass size (MB): 1.25
Params size (MB): 0.07
Estimated Tot

In [8]:
params = list(net.parameters())
# print(len(params))
# print(params[0].size())  # conv1's .weight

# params

##  Define a Loss function and optimizer

In [9]:
import torch.optim as optim

criterion = nn.BCELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
train_loss =[]
val_loss = []
train_accu = []
test_accu=[]

In [10]:
output = net(data)
#         print(target.dtype,output.dtype)
target = target.view(-1,1)
loss = criterion(output, target)
loss.backward()
optimizer.step()
torch.sum(output==target)


tensor(0)


## Training and Testing

In [13]:
def train(model, train_loader, optimizer, epoch,device):
    model.train()
    criterion = nn.BCELoss()
    training_loss = 0
    train_correct = 0
    lo = []
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
#         print(target.dtype,output.dtype)
        target = target.view(-1,1)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
#         print(loss.item())
        lo.append(loss.item())
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
        train_correct += torch.sum(torch.round(output)==target)
#         print(train_correct)
    train_accu.append(100. * train_correct / len(train_loader.dataset))
    train_loss.append(np.mean(lo))

def test(model, test_loader,device):
    model.eval()
    criterion = nn.BCELoss()
    test_loss = 0
    correct = 0
    testlo = []
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            target = target.view(-1,1)
#             test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            test_loss = criterion(output, target)
            testlo.append(test_loss.item())
#             print(test_loss)
#             print()
            correct += torch.sum(torch.round(output)==target)
#     print(100. * correct / len(test_loader.dataset))
    test_accu.append(100. * correct / len(test_loader.dataset))
    val_loss.append(np.mean(testlo))
#     print(test_loss)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        np.mean(testlo), correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

# Training the Model

In [14]:
epochs = 50
for epoch in range(1, epochs + 1):
        train(net, train_loader, optimizer, epoch,device)
        test(net, test_loader,device)


Test set: Average loss: 0.5488, Accuracy: 5189/6784 (76%)


Test set: Average loss: 0.5404, Accuracy: 5275/6784 (77%)


Test set: Average loss: 0.5349, Accuracy: 5264/6784 (77%)


Test set: Average loss: 0.5295, Accuracy: 5272/6784 (77%)


Test set: Average loss: 0.5238, Accuracy: 5276/6784 (77%)


Test set: Average loss: 0.5194, Accuracy: 5275/6784 (77%)


Test set: Average loss: 0.5135, Accuracy: 5274/6784 (77%)


Test set: Average loss: 0.5093, Accuracy: 5275/6784 (77%)


Test set: Average loss: 0.5042, Accuracy: 5269/6784 (77%)




Test set: Average loss: 0.4966, Accuracy: 5263/6784 (77%)


Test set: Average loss: 0.4910, Accuracy: 5286/6784 (77%)


Test set: Average loss: 0.4829, Accuracy: 5289/6784 (77%)


Test set: Average loss: 0.4764, Accuracy: 5306/6784 (78%)


Test set: Average loss: 0.4643, Accuracy: 5326/6784 (78%)


Test set: Average loss: 0.4505, Accuracy: 5315/6784 (78%)


Test set: Average loss: 0.4365, Accuracy: 5348/6784 (78%)


Test set: Average loss: 0.4234, Accuracy: 5373/6784 (79%)


Test set: Average loss: 0.4086, Accuracy: 5511/6784 (81%)


Test set: Average loss: 0.3965, Accuracy: 5556/6784 (81%)




Test set: Average loss: 0.3909, Accuracy: 5599/6784 (82%)


Test set: Average loss: 0.3939, Accuracy: 5557/6784 (81%)


Test set: Average loss: 0.3786, Accuracy: 5631/6784 (83%)


Test set: Average loss: 0.4056, Accuracy: 5545/6784 (81%)


Test set: Average loss: 0.4049, Accuracy: 5538/6784 (81%)


Test set: Average loss: 0.3741, Accuracy: 5647/6784 (83%)


Test set: Average loss: 0.3730, Accuracy: 5634/6784 (83%)


Test set: Average loss: 0.3763, Accuracy: 5665/6784 (83%)


Test set: Average loss: 0.3651, Accuracy: 5677/6784 (83%)




Test set: Average loss: 0.3696, Accuracy: 5653/6784 (83%)


Test set: Average loss: 0.3717, Accuracy: 5595/6784 (82%)


Test set: Average loss: 0.3645, Accuracy: 5689/6784 (83%)


Test set: Average loss: 0.3623, Accuracy: 5663/6784 (83%)


Test set: Average loss: 0.3681, Accuracy: 5615/6784 (82%)


Test set: Average loss: 0.3623, Accuracy: 5672/6784 (83%)


Test set: Average loss: 0.3633, Accuracy: 5659/6784 (83%)


Test set: Average loss: 0.3673, Accuracy: 5645/6784 (83%)


Test set: Average loss: 0.3576, Accuracy: 5668/6784 (83%)


Test set: Average loss: 0.3559, Accuracy: 5685/6784 (83%)




Test set: Average loss: 0.3621, Accuracy: 5691/6784 (83%)


Test set: Average loss: 0.3970, Accuracy: 5324/6784 (78%)


Test set: Average loss: 0.3963, Accuracy: 5383/6784 (79%)


Test set: Average loss: 0.3537, Accuracy: 5684/6784 (83%)


Test set: Average loss: 0.3611, Accuracy: 5650/6784 (83%)


Test set: Average loss: 0.3593, Accuracy: 5675/6784 (83%)


Test set: Average loss: 0.3543, Accuracy: 5708/6784 (84%)


Test set: Average loss: 0.3639, Accuracy: 5645/6784 (83%)


Test set: Average loss: 0.3631, Accuracy: 5675/6784 (83%)




Test set: Average loss: 0.3497, Accuracy: 5695/6784 (83%)


Test set: Average loss: 0.3523, Accuracy: 5696/6784 (83%)


Test set: Average loss: 0.3542, Accuracy: 5696/6784 (83%)



In [None]:
# Evaluate Accuracy
print('Training Loss:', train_loss[-1])
print('Training Accuracy:', train_accu[-1])
print()
print('Test Loss:', val_loss[-1])
print('Testing Accuracy:', test_accu[-1])
print()

plt.plot(train_loss,'r', label='Training Loss')
plt.plot(val_loss,'b', label='Testing Loss')
plt.title('Test Loss' + str(val_loss[-1]))
plt.legend()
plt.show()
plt.plot(train_accu,'r', label='Training accuracy')
plt.plot(test_accu,'b', label='Testing accuracy')
plt.title('Test Accuracy : '+ str(test_accu[-1]))
plt.legend()
plt.show()

In [None]:
def visualize_model(model, num_images=6):
    was_training = model.training
    model.eval()
    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            for j in range(inputs.size()[0]):
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                imshow(inputs.cpu().data[j])

                if images_so_far == num_images:
                    model.train(mode=was_training)
                    return
        model.train(mode=was_training)

In [None]:
model_ft

In [None]:
summary(model_ft,(3,32,32))