In [1]:
import os
import glob
import math
import numpy as np

In [2]:
#Bayes
# Folder Path
path = "languageID"
languages = ['e', 'j', 's']

# Read text File
def read_text_file(file_path):
    with open(file_path, 'r') as f:
        contents = f.read()
    return contents
# iterate through all training files
global_count_dict = {'e':{}, 'j':{}, 's': {}}
for language in languages:
    dict = {}
    for file in glob.glob(f"{path}/{language}[0-9].txt"):
        file_path = f"{file}"
        contents = read_text_file(file_path)
        
        for char in contents:
            if char == "\n":
                continue
            else:
                if dict.get(char) == None:
                    dict[char] = 1
                else:
                    dict[char] = dict[char] + 1   
    global_count_dict[language] = dict

In [3]:
global_ccp = {'e':{}, 'j':{}, 's': {}}
for language in languages:
    ccp = {}
    total = 0
    for char in sorted(global_count_dict[language].keys()):
        total = total + global_count_dict[language][char]
    for char in sorted(global_count_dict[language].keys()):
        if ccp.get(char) == None:
            ccp[char] =  float(global_count_dict[language][char] + 0.5)/ (total + (27 * 0.5))
    global_ccp[language] = ccp

    print(f"For language : {language}, the CCP vector is {ccp}\n")

For language : e, the CCP vector is {' ': 0.1792499586981662, 'a': 0.0601685114819098, 'b': 0.011134974392863043, 'c': 0.021509995043779945, 'd': 0.021972575582355856, 'e': 0.1053692383941847, 'f': 0.018932760614571286, 'g': 0.017478936064761277, 'h': 0.047216256401784236, 'i': 0.055410540227986124, 'j': 0.001420783082768875, 'k': 0.0037336857756484387, 'l': 0.028977366595076822, 'm': 0.020518751032545846, 'n': 0.057921691723112505, 'o': 0.06446390219725756, 'p': 0.01675202378985627, 'q': 0.0005617049396993227, 'r': 0.053824549810011564, 's': 0.06618205848339666, 't': 0.08012555757475633, 'u': 0.026664463902197257, 'v': 0.009284652238559392, 'w': 0.015496448042293078, 'x': 0.001156451346439782, 'y': 0.013844374690236246, 'z': 0.0006277878737815959}

For language : j, the CCP vector is {' ': 0.12344945665466997, 'a': 0.1317656102589189, 'b': 0.010866906600510151, 'c': 0.005485866033054963, 'd': 0.01722631818022992, 'e': 0.06020475907613823, 'f': 0.003878542227191726, 'g': 0.014011670568

In [4]:
test_file = f"{path}/e10.txt"

x_vector = {}
for char in read_text_file(test_file):
    if char == "\n":
        continue
    else:
        if x_vector.get(char) == None:
            x_vector[char] = 1
        else:
            x_vector[char] = x_vector[char] + 1   
print(sorted(x_vector.items()))

[(' ', 498), ('a', 164), ('b', 32), ('c', 53), ('d', 57), ('e', 311), ('f', 55), ('g', 51), ('h', 140), ('i', 140), ('j', 3), ('k', 6), ('l', 85), ('m', 64), ('n', 139), ('o', 182), ('p', 53), ('q', 3), ('r', 141), ('s', 186), ('t', 225), ('u', 65), ('v', 31), ('w', 47), ('x', 4), ('y', 38), ('z', 2)]


In [5]:
log_likelihood =  {'e': float(0), 'j': float(0), 's': float(0)}
for language in languages:
    ccp = global_ccp[language]
    logsum = 0
    for char in x_vector:
        if ccp.get(char) == None:
            ccp[char] = 0.5 / 27*0.5
        logsum = logsum + math.log(ccp[char]) * x_vector[char]
    print(f"Log(p_hat) for {language} : {logsum}")
    log_likelihood[language] = logsum


Log(p_hat) for e : -7841.865447060635
Log(p_hat) for j : -8749.114299535931
Log(p_hat) for s : -8467.28204401056


In [6]:
# The prior is the same for each class as the number of samples (10) is the same.
prior = float((10 + 0.5)) / (30 + 3*0.5)

# Calculate posterior using Bayes rule
posterior = [log_likelihood[i] * prior for i in log_likelihood]
for i in range(3):
    print(f"Log(posterior) for {languages[i]} : {posterior[i]}")

Log(posterior) for e : -2613.9551490202116
Log(posterior) for j : -2916.3714331786437
Log(posterior) for s : -2822.42734800352


In [7]:
#NN
import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import torch
import time

In [8]:
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Layer details for the neural network
input_size = 784
hidden_layer1_size = 300
hidden_layer2_size = 300
output_size = 10

losses = []
accuracies = []


trainset = datasets.MNIST('./dataset/MNIST/', download=True, train=True, transform=transform)
valset = datasets.MNIST('./dataset/MNIST/', download=True, train=False, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=32, shuffle=True)

In [9]:
class NeuralModel():
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
    
    def softmax(self, x):
        exps = np.exp(x)
        denom = np.sum(exps, axis=1)
        denom.resize(exps.shape[0], 1)
        return exps / denom

    def __init__(self, sizes, epochs=20, l_rate=0.01):
        self.sizes = sizes
        self.epochs = epochs
        self.l_rate = l_rate
        self.init_params()
        
    def init_params(self):
        input_layer = int(self.sizes[0])
        hidden_1 = int(self.sizes[1])
        hidden_2 = int(self.sizes[1])
        output_layer = int(self.sizes[2])

        # Random initialization of weights between -1 and 1
        self.w1 = np.random.uniform(low=-1, high=1, size=(input_layer, hidden_1))
        self.w2 = np.random.uniform(low=-1, high=1, size=(hidden_1, hidden_2))
        self.w3 = np.random.uniform(low=-1, high=1, size=(hidden_2, output_layer))
        
        # Zero initialization of weights
        #self.w1 = np.zeros((input_layer, hidden_1))
        #self.w2 = np.zeros((hidden_1, hidden_2))
        #self.w3 = np.zeros((hidden_2, output_layer))
        
        # Random initialization of weights from normal distribution
        #self.w1 = np.random.randn(input_layer, hidden_1)
        #self.w2 = np.random.randn(hidden_1, hidden_2)
        #self.w3 = np.random.randn(hidden_2, output_layer)
        
    def forward(self, inputs):
        # Input layer to hidden layer1
        inputs = inputs.numpy()
        self.linear_1 = inputs.dot(self.w1)
        self.out1 = self.sigmoid(self.linear_1)
        
        # hidden layer 1 to 2
        self.linear_2 = self.out1.dot(self.w2)
        self.out2 = self.sigmoid(self.linear_2)
        
        # Hidden layer to softmax layer
        self.linear3 = self.out2.dot(self.w3)
        self.out3 = self.softmax(self.linear3)

        return self.out3
    
    def backward(self, x_train, y_train, output):
        # Convert tensors to numpy arrays
        x_train = x_train.numpy()
        y_train = y_train.numpy()

        batch_size = y_train.shape[0]

        # Derivative of loss 
        d_loss = output - y_train
        # Calculating delta for W3
        change_w3 = (1./batch_size) * np.matmul(self.out2.T, d_loss)

        # Backpropagating to the 2nd layer from the third layer
        d_out_2 = np.matmul(d_loss, self.w3.T)
        d_linear_2 = d_out_2 * self.sigmoid(self.linear_2) * (1 - self.sigmoid(self.linear_2))
        # Calculating delta for W2
        change_w2 = (1. / batch_size) * np.matmul(self.out1.T, d_linear_2)
        
        # Backpropagating to the 1nd layer from the second layer
        d_out_1 = np.matmul(d_loss, self.w3.T) * self.sigmoid(self.linear_2) * (1 - self.sigmoid(self.linear_2))
        d_out_1 = np.matmul(d_out_1, self.w2.T)
        d_linear_1 = d_out_1 * self.sigmoid(self.linear_1) * (1 - self.sigmoid(self.linear_1))
        # Calculating delta for W2
        change_w1 = (1. / batch_size) * np.matmul(x_train.T, d_linear_1)

        return change_w1, change_w2, change_w3
    
    def update_weights(self, w1_update, w2_update,w3_update):
        self.w1 -= self.l_rate * w1_update
        self.w2 -= self.l_rate * w2_update
        self.w3 -= self.l_rate * w3_update
    def compute_loss(self, y, y_hat):
        batch_size = y.shape[0]
        y = y.numpy()
        # Computing the cross entropy loss for the model and its given predictions
        loss = np.sum(np.multiply(y, np.log(y_hat)))
        loss = -(1./batch_size) * loss
        return loss
    def compute_metrics(self, val_loader):
        losses = []
        correct = 0
        total = 0
        for i, data in enumerate(val_loader):
            x, y = data
            # Converting to expected one-hot format
            y_onehot = torch.zeros(y.shape[0], 10)
            y_onehot[range(y_onehot.shape[0]), y]=1
            # Flattening input image into 1-D
            flattened_input = x.view(-1, 28*28)
            output = self.forward(flattened_input)
            predicted = np.argmax(output, axis=1)
            # Calculating correctly predicted labels
            correct += np.sum((predicted==y.numpy()))
            total += y.shape[0]
            # Computing the cross entropy loss
            loss = self.compute_loss(y_onehot, output)
            losses.append(loss)
        # Performing mean over all minibatches
        return (correct/total), np.mean(np.array(losses))

    def train(self, train_loader, val_loader):
        start_time = time.time()
        global losses
        global accuracies
        for iteration in range(self.epochs):
            for i, data in enumerate(train_loader):
                x, y = data
                # Since the model is producing a softmax probability over 10 classes, the label needs to be converted to a one-hot encoded vector
                y_onehot = torch.zeros(y.shape[0], 10)
                y_onehot[range(y_onehot.shape[0]), y]=1
                # Converting 28x28 image into a flattened input
                flattened_input = x.view(-1, 28*28)
                # Forward pass the input through the model
                output = self.forward(flattened_input)
                # Compute gradients for the linear layer weights using SGD
                w1_update, w2_update, w3_update = self.backward(flattened_input, y_onehot, output)
                # Perform weight update for the minibatch
                self.update_weights(w1_update, w2_update, w3_update)
            # Compute the mean loss over the test set after the completion of epoch
            accuracy, loss = self.compute_metrics(val_loader)
            losses.append(loss)
            accuracies.append(accuracy)
            print('Epoch: {0}, Time Spent: {1:.2f}s, Accuracy: {2:.2f}%, Loss: {3:.2f}'.format(
                iteration+1, time.time() - start_time, accuracy*100, loss
            ))

In [None]:
model = NeuralModel(sizes=[784, 300, 10], epochs=20)
# Training the model over the MNIST dataset
model.train(train_loader=trainloader, val_loader=valloader)
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()

In [None]:
from torch import nn
import torch
# Layer details for the neural network
input_size = 784
hidden_size = 300
output_size = 10

# Build a simple 2-layer feed forward network as described
model = nn.Sequential(nn.Linear(input_size, hidden_size, bias=False),
                      nn.Sigmoid(),
                      nn.Sigmoid(),
                      nn.Linear(hidden_size, output_size, bias=False),
                      nn.LogSoftmax(dim=1))
print(model)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.0)
# Using the cross entropy (or NLL) loss
criterion = nn.NLLLoss()

epochs = 20
losses = []
for i in range(epochs):
    running_loss = 0
    for images, labels in trainloader:
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)
    
        # Training pass
        optimizer.zero_grad()
        
        output = model(images)
        loss = criterion(output, labels)
        
        #This is where the model learns by backpropagating
        loss.backward()
        
        #And optimizes its weights here
        optimizer.step()
        
        running_loss += loss.item()
    else:
        losses.append(float(running_loss/len(trainloader)))
        print("Epoch {0}, Training loss: {1}".format(i, running_loss/len(trainloader)))

correct_count, all_count = 0, 0
for images,labels in valloader:
    for i in range(len(labels)):
        img = images[i].view(1, 28*28)
        # Turn off gradients for forward pass
        with torch.no_grad():
            logps = model(img)

    # Output of the network are log-probabilities, need to take exponential for probabilities
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
        correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))
plt.xlabel('Epochs')
plt.ylabel('Test Loss')
plt.plot(losses)
plt.show()