<a href="https://colab.research.google.com/github/Uzmamushtaque/Projects-in-Machine-Learning-and-AI/blob/main/GAN_MovieLens.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## GAN Training for Movie Lens data

In [1]:
import pandas as pd

In [2]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset

import numpy, random
import matplotlib.pyplot as plt

# dataset class

class MovieLens(Dataset):

    def __init__(self, csv_file):
        self.data_df = pd.read_csv(csv_file, header=0)

    def __len__(self):
        return len(self.data_df)

    def __getitem__(self, index):
        # image target (label)
        label = self.data_df.iloc[index,0]
        target = torch.zeros((610))
        target[label] = 1.0

        # image data, normalised from 0-5 to 0-1
        image_values = torch.FloatTensor(self.data_df.iloc[index,1:].values) / 5.0

        # return label, image data tensor and target tensor
        return label, image_values, target

    def plot_image(self, index):
        img = self.data_df.iloc[index,1:].values()
        plt.title("label = " + str(self.data_df.iloc[index,0]))
        plt.imshow(img, interpolation='none', cmap='Blues')




In [4]:
df = pd.read_csv('/content/df.csv')

In [8]:
df1 = df.iloc[:,:1000]

In [12]:
df1.to_csv('/content/final.csv')

In [13]:
movie = MovieLens('/content/final.csv')

In [16]:
import pandas as pd

class Discriminator(nn.Module):
  def __init__(self):
        # initialise parent pytorch class
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(1000,200),
            nn.LeakyReLU(0.02),
            nn.LayerNorm(200),
            nn.Linear(200,1),
            nn.Sigmoid()
        )
        self.loss_function = nn.BCELoss()
        self.optimiser = torch.optim.Adam(self.parameters(),lr = 0.0001)
        self.counter = 0
        self.progress = []

  def forward(self,inputs):
    return self.model(inputs)

  def train(self,inputs,target):
    #calculate output of the network
    output = self.forward(inputs)
    #calculate loss
    loss = self.loss_function(output,target)

    # increase counter and accumulate error every 10
    self.counter += 1;
    if (self.counter % 10 == 0):
        self.progress.append(loss.item())

    if (self.counter % 10000 == 0):
        print("counter = ", self.counter)
    self.optimiser.zero_grad()
    loss.backward()
    self.optimiser.step()
  def plot_progress(self):
    df = pd.DataFrame(self.progress, columns=['loss'])
    df.plot(ylim=(0), figsize=(16,8), alpha=0.1, marker='.', grid=True, yticks=(0, 0.25, 0.5, 1.0, 5.0))




In [14]:
#test the discriminator
def generate_random(size):
  random_data = torch.rand(size)
  return random_data
print(generate_random(12))

tensor([0.2701, 0.0452, 0.2068, 0.2931, 0.7095, 0.5226, 0.6988, 0.5533, 0.1401,
        0.6964, 0.4593, 0.7158])


In [18]:
%%time
D = Discriminator()

for label, image_data_tensor, target_tensor in movie:
    # real data
    D.train(image_data_tensor, torch.FloatTensor([1.0]))
    # fake data
    D.train(generate_random(1000), torch.FloatTensor([0.0]))

CPU times: user 4.54 s, sys: 38.2 ms, total: 4.58 s
Wall time: 5.39 s


In [22]:
#D.plot_progress()

In [20]:
# manually run discriminator to check it can tell real data from fake
import random
for i in range(4):
  image_data_tensor = movie[random.randint(0,100)][1]
  print( D.forward( image_data_tensor ).item() )

for i in range(4):

  x =  D.forward( generate_random(1000))
  print(float(x[0]))

0.9966801404953003
0.9974632263183594
0.9960994720458984
0.9968515038490295
0.00041345242061652243
0.0005410234443843365
0.0003653595340438187
0.0004509893769863993


In [24]:
# generator class
class Generator(nn.Module):

    def __init__(self):
        # initialise parent pytorch class
        super().__init__()

        # define neural network layers
        self.model = nn.Sequential(
            nn.Linear(100, 200),
            nn.LeakyReLU(0.02),

            nn.LayerNorm(200),

            nn.Linear(200, 1000),
            nn.Sigmoid()
        )

        # create optimiser, simple stochastic gradient descent
        self.optimiser = torch.optim.Adam(self.parameters(), lr=0.0001)

        # counter and accumulator for progress
        self.counter = 0;
        self.progress = []




    def forward(self, inputs):
        # simply run model
        return self.model(inputs)


    def train(self, D, inputs, targets):
        # calculate the output of the network
        g_output = self.forward(inputs)

        # pass onto Discriminator
        d_output = D.forward(g_output)

        # calculate error
        loss = D.loss_function(d_output, targets)

        # increase counter and accumulate error every 10
        self.counter += 1;
        if (self.counter % 10 == 0):
            self.progress.append(loss.item())


        # zero gradients, perform a backward pass, update weights
        self.optimiser.zero_grad()
        loss.backward()
        self.optimiser.step()




    def plot_progress(self):
        df = pd.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0), figsize=(16,8), alpha=0.1, marker='.', grid=True, yticks=(0, 0.25, 0.5, 1.0, 5.0))




In [27]:
G = Generator()
output = G.forward(generate_random(100))
img = output.detach().numpy()
#plt.imshow(img, interpolation='none', cmap='Blues')
#print(img)




In [28]:
def generate_random_image(size):
    random_data = torch.rand(size)
    return random_data


def generate_random_seed(size):
    random_data = torch.randn(size)
    return random_data

In [29]:
# create Discriminator and Generator

D = Discriminator()
G = Generator()

epochs = 4

for epoch in range(epochs):
  print ("epoch = ", epoch + 1)
  # train Discriminator and Generator
  for label, image_data_tensor, target_tensor in movie:
    # train discriminator on true
    D.train(image_data_tensor, torch.FloatTensor([1.0]))

    # train discriminator on false
    # use detach() so gradients in G are not calculated
    D.train(G.forward(generate_random_seed(100)).detach(), torch.FloatTensor([0.0]))

    # train generator
    G.train(D, generate_random_seed(100), torch.FloatTensor([1.0]))







epoch =  1
epoch =  2
epoch =  3
epoch =  4


In [33]:
# plot several outputs from the trained generator
# plot a 3 column, 2 row array of generated images

output = G.forward(generate_random_seed(100))
img = output.detach().numpy()

for i in range(len(img)):
   if img[i] <= 0.5:
    img[i]=0
   else:
    img[i] = 1
print(img)



[1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0.
 0. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 0. 1. 0. 1. 0. 0. 1.
 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1.
 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 0. 0. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0.
 1. 0. 1. 1. 1. 0. 0. 0. 0. 1. 0. 1. 0. 0. 1. 1. 0.

In [35]:
movie[3]

(3,
 tensor([0.8000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.6000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6000, 0.0000, 0.4000,
         0.0000, 0.0000, 0.0000, 0.6000, 0.0000, 0.0000, 0.0000, 0.0000, 0.6000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.8000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 1.0000,

In [43]:

numpy.dot(movie[6][1],img)


17.8