<a href="https://www.kaggle.com/code/harishlakshman/cs5661-project?scriptVersionId=234961814" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


import pytorch_lightning as pl #to speed up execution
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from torchvision.datasets import MNIST

PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
BATCH_SIZE = 256 if torch.cuda.is_available() else 64
NUM_WORKERS = int(os.cpu_count() / 2) #check if we have GPU's or multiple CPU cores


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory



# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import kagglehub

# Download latest version
path = kagglehub.dataset_download("vishalkundar/gandata20")

print("Path to dataset files:", path)

In [None]:
import pickle

# Sample data
data = {'a': 1, 'b': [2, 3, 4], 'c': 'hello'}

# Pickling (serializing) and saving to a file
with open('data.pickle', 'wb') as file:
    pickle.dump(data, file)

# Unpickling (deserializing) from a file
with open('data.pickle', 'rb') as file:
    loaded_data = pickle.load(file)

print(loaded_data)

In [None]:
def displaySingleImage(X_train, y_train):
    plt.imshow(X_train[0], cmap="gray")
    plt.title(f"Label: {y_train[0]}")

In [None]:
def check_type(variable):
    if isinstance(variable, np.ndarray):
        return "NumPy array"
    elif isinstance(variable, pd.DataFrame):
        return "Pandas DataFrame"
    else:
        return "Neither NumPy array nor Pandas DataFrame"

In [None]:
with open("/kaggle/input/gandata20/birds/train/64images.pickle", "rb") as file:
    low_res_Xtrain = pickle.load(file)
file.close()    
print(low_res_Xtrain[0])    

In [None]:
a = low_res_Xtrain['labels']

In [None]:
   
#print(low_res_ytrain[0])
# Try loading with latin1 encoding
try:
    with open("/kaggle/input/gandata20/birds/train/char-CNN-RNN-embeddings.pickle", 'rb') as f:
        low_res_ytrain = pickle.load(f, encoding='latin1')
except UnicodeDecodeError:
    # If latin1 fails, try bytes encoding
    with open("/kaggle/input/gandata20/birds/train/char-CNN-RNN-embeddings.pickle", 'rb') as f:
        low_res_ytrain = pickle.load(f, encoding='bytes')
        # Depending on the data structure, you might need to decode the keys and values
        # Example:
        # data = {k.decode('utf-8'): v.decode('utf-8') for k, v in data.items()}
f.close()   

print(low_res_ytrain[0])


In [None]:
print(len(low_res_ytrain))

In [None]:
type(low_res_ytrain)

In [None]:
low_res_ytrain_np = np.array(low_res_ytrain)
low_res_ytrain_np.shape

In [None]:
first_image = low_res_ytrain_np[0]
first_caption_embedding = first_image[0]
print("First image, first caption embedding: ", first_caption_embedding)
low_res_ytrain_np_flat = low_res_ytrain_np.reshape(8855,-1)
df = pd.DataFrame(low_res_ytrain_np_flat)
df.head(5)

In [None]:
print('64images type : ', check_type(low_res_Xtrain))
print('length of list: ', len(low_res_Xtrain))

In [None]:
plt.imshow(low_res_Xtrain[0], cmap="gray")

## Create a class that inherits from Pytorch lightning data module.
## Responsible for creating data loaders for training, validation & test set

In [None]:
class PickleImageDataset(Dataset):
    def __init__(self, pickle_path, tranform=None):
        with open(pickle_path, 'rb') as f:
            data = pickle.load(f)
        self.images = data
        self.transform = transform


    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img = self.images[idx]

        #convert to tensor if not already
        if isinstance(img, np.ndarray):
            img = torch.tensor(img, dtype=torch.float32).permute(2,0,1) / 255.0

        if self.transform:
            img = self.transform(img)
        return img

class BIRDSDataModule(pl.LightningDataModule):
    def __init__(
        self,
        pickle_path_train,
        pickle_path_test,
        batch_size: int = BATCH_SIZE,
        val_split=0.2,
        num_workers: int = NUM_WORKERS
    ):
        super().__init__()
        self.pickle_path_train = pickle_path_train
        self.pickle_path_test =  pickle_path_test #initalize class data with dataset
        self.batch_size = batch_size #initalize class data batch size
        self.val_split = val_split
        self.num_workers = num_workers #assigns cpu cores

        #defines class tranformation methods
        #Tensors are similar to NumPy arrays, but tensors have accelerator support. 
        #Tensors are optimized for automatic differentiation.
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(), #transforms images to tensors.
                transforms.Normalize((0.1307,), (0.3081,)), #tranformed data is normalized. Ex: 0.1307 = mean; 0.3081 = std deviation
            ]
        )

        self.dims = (1, 28, 28)
        self.num_classes = 10

        #def prepare_data(self):
        # download
        #MNIST(self.data_dir, train=True, download=True) #download training data
        #MNIST(self.data_dir, train=False, download=True)#download testing data

    # Assign train/val datasets for use in dataloaders
    def setup(self, stage=None):
        
        if stage == "fit" or stage is None:
            # Assign train/val datasets for use in dataloaders
            dataset = PickleImageDataset(self.pickle_path_train, transform=self.transform)
            val_size = int(len(dataset) * self.val_split)
            train_size = len(dataset) - val_size
            self.train_dataset, self.val_dataset = random_split(dataset, [train_size, val_size])
            
        # Assign test dataset for use in dataloader(s)
        if stage == "test" or stage is None:
            dataset = PickleImageDataset(self.pickle_path_test, transform=self.transform)
            val_size = 0
            test_size = len(dataset) - val_size
            self.test_dataset, x = random_split(dataset, [test_size, val_size])

    #Training
    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
        )
    #Validation
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)
    #Testing
    def test_dataloader(self):
        return DataLoader(self.mnist_test, batch_size=self.batch_size, num_workers=self.num_workers)

## Download the datset

## split the data into training & validation

### BUILD THE GENERATOR
#### Generate fake data from random noise

In [None]:
def build_generator():

### BUILD THE DISCRIMINATOR
#### Distinguish between real and fake data

In [None]:
def build_discriminator():

### 