In [1]:
cp -r /kaggle/input/dataset-gans-train/code /kaggle/working/

In [2]:
cp -r /kaggle/input/dataset-gans-train/config-kaggle.json /kaggle/working/

In [3]:
!rm /kaggle/working/code/main.py

In [4]:
%%writefile /kaggle/working/config-kaggle.json
{
    "data": {
        "train": {
            "captions": "/kaggle/input/dataset-gans-train/dataset-1t/captions/train.en",
            "features": "/kaggle/input/dataset-gans-train/dataset-1t/features/train.npy",
            "links": "/kaggle/input/dataset-gans-train/dataset-1t/links/train.txt"
        },
        "beam": {
            "captions": "/kaggle/input/dataset-gans-train/dataset-1t/captions/val.en",
            "features": "/kaggle/input/dataset-gans-train/dataset-1t/features/val.npy",
            "links": "/kaggle/input/dataset-gans-train/dataset-1t/links/beam.txt"
        },
        "test": {
            "captions": "/kaggle/input/dataset-gans-train/dataset-1t/captions/test.en",
            "features": "/kaggle/input/dataset-gans-train/dataset-1t/features/test.npy",
            "links": "/kaggle/input/dataset-gans-train/dataset-1t/links/beam.txt"
        },
        "images": "cocodataset/images",
        "vocab": "/kaggle/input/dataset-gans-train/dataset-1t/vocab.en"
    },
    "seed": 1561478941,
    "max_epoch": 30,
    "logging": {
        "activate": true,
        "output_folder": "results"
    },
    "cuda": {
        "ngpu": 2,
        "device": "cuda:0"
    },
    "sampler": {
        "train": {
            "batch_size": 128,
            "max_len": 20
        },
        "val": {
            "batch_size": 32,
            "max_len": 20
        },
        "beam": {
            "batch_size": 128,
            "drop_last": false
        }
    },
    "iterator": {
        "train": {
            "pin_memory": false,
            "num_workers": 0
        },
        "beam": {
            "pin_memory": false,
            "num_workers": 0
        },
        "test": {
            "pin_memory": false,
            "num_workers": 0
        }
    },
    "model": {
        "embeddings": "/kaggle/input/dataset-gans-train/dataset-1t/embeddings/glove.6B.300d.txt",
        "emb_dim": 300,
        "dec_dim": 256,
        "gradient_weight": 10,
        "clip": 1.0,
        "feature_size": 2048,
        "optimizers": {
            "lr": 0.002,
            "weight_decay": 1e-05,
            "betas": {
                "min": 0.5,
                "max": 0.999
            }
        },
        "generator": {
            "dropout_emb": 0.0,
            "dropout_type": "local",
            "dropout_state": 0.5,
            "train_iteration": 9,
            "dec_init_type": "zero",
            "att_activ": "tanh"
        },
        "discriminator": {
            "dec_init_type": "zero",
            "att_activ": "relu"
        }
    },
    "beam_search": {
        "beam_size": 5,
        "max_len": 25
    },
    "BLEU": {
        "max_bleu": 4
    },
    "load_dict": "output_epoch29_bleu0.07394183608962669"
}


Overwriting /kaggle/working/config-kaggle.json


In [5]:
!nvidia-smi

Wed Apr  3 04:39:17 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla T4                       Off | 00000000:00:0

In [6]:
%%writefile /kaggle/working/code/main.py

import os
import sys
import time
import torch
import torch.optim as optim
import utils.explorer_helper as exh
import utils.vocab as uvoc
from tqdm import tqdm  # Import tqdm for progress bars
import subprocess  # Import subprocess for GPU usage monitoring
from datasets.captioning import CaptioningDataset
from metrics.scores import bleu_score, prepare_references
from metrics.search import beam_search, max_search
from models.wgan import WGAN
from torch.utils.data import DataLoader
from utils import check_args, fix_seed

# Function to get GPU usage using nvidia-smi
def get_gpu_usage():
    output = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used,memory.total', '--format=csv,nounits'])
    gpu_usage = [int(x) for x in output.decode('utf-8').strip().split('\n')[1].split(',')]
    return gpu_usage

def run(args):
    config = exh.load_json(args.CONFIG)
    logging = config['logging']['activate']
    if logging:
        exh.create_directory("output")
        output = os.path.join("output", config['logging']['output_folder'])
        exh.create_directory(output)

    torch.cuda.init()
    device = torch.device(config['cuda']['device'] if (torch.cuda.is_available() and config['cuda']['ngpu'] > 0) else "cpu")
    seed = fix_seed(config['seed'])
    vocab = exh.load_json(config['data']['vocab'])
    references = exh.read_file(config['data']['beam']['captions'])
    references = prepare_references(references)

    training_dataset = CaptioningDataset(config['data']['train'], "train", vocab, config['sampler']['train'])
    train_iterator = DataLoader(
        training_dataset,
        batch_sampler=training_dataset.sampler,
        collate_fn=training_dataset.collate_fn,
        pin_memory=config['iterator']['train']['pin_memory'],
        num_workers=config['iterator']['train']['num_workers']
    )
    beam_dataset = CaptioningDataset(config['data']['beam'], "beam", vocab, config['sampler']['beam'])
    beam_iterator = DataLoader(
        beam_dataset,
        batch_sampler=beam_dataset.sampler,
        collate_fn=beam_dataset.collate_fn,
        pin_memory=config['iterator']['beam']['pin_memory'],
        num_workers=config['iterator']['beam']['num_workers']
    )

    weights = None
    if len(config['model']['embeddings']) > 0:
        weights = uvoc.init_weights(vocab, config['model']['emb_dim'])
        uvoc.glove_weights(weights, config['model']['embeddings'], vocab)

    model = WGAN(len(vocab['token_list']), config['model'], weights)
    model.reset_parameters()

    lr = config['model']['optimizers']['lr']
    betas = (config['model']['optimizers']['betas']['min'], config['model']['optimizers']['betas']['max'])
    weight_decay = config['model']['optimizers']['weight_decay']

    optim_D = optim.Adam(model.D.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)
    optim_G = optim.Adam(model.G.parameters(), lr=lr, betas=betas, weight_decay=weight_decay)

    model.to(device)

    fix_seed(config['seed'] + 1)

    generator_trained = config['model']['generator']['train_iteration']

    scores = {
        "BLEU": [],
        "G_loss_train": [],
        "D_loss_train": []
    }
    max_bleu = config['BLEU']['max_bleu']
    bleus = [[]] * max_bleu
    best_bleu = (0, 1)

    model.train(True)
    torch.set_grad_enabled(True)

    epoch = 1
    while epoch <= 30:
        secs = time.time()
        print("Starting Epoch {}".format(epoch))

        iteration = 1

        d_batch = 0
        g_batch = 0
        d_loss = 0
        g_loss = 0
        train_iterator = tqdm(train_iterator, desc="Training", leave=False)  # Add tqdm for training progress bar
        for batch in train_iterator:
            batch.device(device)

            out = model(batch, optim_G, optim_D, epoch, iteration)

            d_loss += out['D_loss']
            d_batch += 1
            g_loss += out['G_loss']
            g_batch += 1

            iteration += 1

            # Monitor GPU usage
            gpu_usage = get_gpu_usage()

        scores['G_loss_train'].append((g_loss/g_batch))
        scores['D_loss_train'].append((d_loss/d_batch))


        # Validation
        model.train(False)
        torch.set_grad_enabled(False)

        # Beam search
        print("Beam search...")
        # generated_sentences = beam_search(model.G, beam_iterator, vocab, config['beam_search'], device)
        # generated_sentences = beam_search([model], beam_iterator, vocab, beam_size=config['beam_search']['beam_size'], max_len=config['beam_search']['max_len'], device=device)
        generated_sentences = max_search(model, beam_iterator, vocab, max_len=config['beam_search']['max_len'], device=device)

        # BLEU score
        # for n in range(3,max_bleu):
        #     score = bleu_score(references, generated_sentences, n+1)
        #     bleus[n].append(score)
        #     print("BLEU-{} score : {}".format(n+1, score))
        score = bleu_score(references, generated_sentences, max_bleu)
        bleus[max_bleu-1].append(score)
        print("BLEU-{} score : {}".format(max_bleu, score))

        if score > best_bleu[0]:
            best_bleu = (score, epoch)
            filename = 'output_epoch{}_bleu{}'.format(epoch,score)
            out_file = os.path.join(output, filename)
            torch.save(model.state_dict(), out_file)

        print("Best BLEU so far : {} (Epoch {})".format(best_bleu[0], best_bleu[1]))

        if logging:
            output_file = 'output_{}'.format(epoch)
            output_sentences = os.path.join(output, output_file)
            exh.write_text('\n'.join(generated_sentences), output_sentences)

        model.train(True)
        torch.set_grad_enabled(True)
        print("Epoch finished in {} seconds".format(time.time()-secs))

#         if epoch - best_bleu[1] == 3:
#             break

        epoch += 1


    if logging:
        scores['BLEU'] = bleus
        output_scores = os.path.join(output, 'scores.json')
        exh.write_json(scores, output_scores)
        print("Scores saved in {}".format(output_scores))


if __name__ == "__main__":
    args = check_args(sys.argv)
    run(args)


Writing /kaggle/working/code/main.py


In [7]:
!python /kaggle/working/code/main.py /kaggle/working/config-kaggle.json

Starting Epoch 1
  fake = Variable(Tensor(real_samples.size(1), 1).fill_(1.0), requires_grad=False)
Beam search...
[0.25, 0.25, 0.25, 0.25]
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
BLEU-4 score : 0.17226576910713395
Best BLEU so far : 0.17226576910713395 (Epoch 1)
Epoch finished in 1132.3388495445251 seconds
Starting Epoch 2
Beam search...
[0.25, 0.25, 0.25, 0.25]
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
BLEU-4 score : 2.5309502685218223e-11
Best BLEU so far : 0.17226576910713395 (Epoch 1)
Epoch finished in 1133.8990211486816 seconds
Starting Epoch 3
Beam search...
[0.25, 0.25, 0.25, 0.25]
Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
BLEU-4 score : 0.24510880724039344
Best BLEU so far : 0.24510880724039344 (Epoch 3)
Epoch finished in 1133.636801481247 se