In [2]:
import torchvision
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from datetime import datetime
import numpy as np
import os

import ipynb

from ipynb.fs.full.LSTM_char import *
from ipynb.fs.full.Data_import_preprocessing import *

In [4]:
def tr_device_fn(GPU = False):
    '''info of GPU support
    Input
    GPU: bool - GPU support
    Output
    device: obj
    '''
    device = torch.device("mps" if torch.backends.mps.is_available() and torch.backends.mps.is_built() and GPU else "cpu")
    return device

if __name__ == '__main__':
    device = tr_device_fn(GPU = True)
    print(device)

mps


In [None]:
def tr_training(X, y, model, optimizer, loss_fn, char_to_int, n_epochs, batch_size, device, save = False, verbose = False):
    '''Train the model
    Input
    X: tensor - training data shape of (batch size/sequence length/1)
    y: tensor - response vector
    model: obj - nn model
    optimizer: nn optimizer
    loss_fn: nn loss funciton
    char_to_int: dict - character to integer mapping
    n_epochs: number of epoch
    batch_size: batch size
    device: device cpu or mps
    save: save model
    verbose: print
    Output
    model: obj
    '''

    start = datetime.now()
    if verbose:
        print('\nstart:', start, '\n')

    model = model.to(device)

    loader = data.DataLoader(data.TensorDataset(X, y), shuffle=True, batch_size=batch_size)
    best_model = None
    best_loss = np.inf
    
    for epoch in range(n_epochs):
        model.train()
        for X_batch, y_batch in loader:
            y_pred = model(X_batch.to(device))
            loss = loss_fn(y_pred, y_batch.to(device))
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
        # Validation
        model.eval()
        loss = 0
        with torch.no_grad():
            for X_batch, y_batch in loader:
                y_pred = model(X_batch.to(device))
                loss += loss_fn(y_pred, y_batch.to(device))
            if loss < best_loss:
                best_loss = loss
                best_model = model.state_dict()
            if verbose:
                print("Epoch %d: Cross-entropy: %.4f" % (epoch+1, loss))

        # Estimated end
        end = datetime.now() 
        diff = end - start
        if verbose:
            print('estimated end:', start + diff * (n_epochs - epoch))
        start = end

    if verbose:
        print('\nend:', datetime.now(), '\n', '\n')

    if save:
        m = str(model)
        m = m[0: m.find('(')]
        id = str(int(datetime.now().timestamp()))
        torch.save([best_model, char_to_int], (m + '_' + id + '.pth') )
        print('\nModel(weights + char_to_int) saved as: ' + (m + '_' + id + '.pth') + ' in directory: ' + os.getcwd(), '\n')

    return model

if __name__ == '__main__':
    seq_length = 100
    n_epochs = 1
    batch_size = 128
    optimizer = optim.Adam(model.parameters())
    loss_fn = nn.CrossEntropyLoss(reduction="sum")
    text = dip_load_data('/Users/danielboda', 'wonderland.txt')
    chars, char_to_int, int_to_char = dip_chars_dict(text)
    X, y, n_patterns, n_vocab = dip_create_data(text, chars, char_to_int, verbose = True)   
    X, y = dip_normalization_reshape(X, y, n_patterns, n_vocab, 100, verbose = True) 
    model = lc_CharModel(n_vocab)
    device = tr_device_fn(GPU = True)
    model = tr_training(X, y, model, optimizer, loss_fn, char_to_int, n_epochs, n_epochs, device, save = True, verbose = True)


Total Characters: 
144059
Total Vocab: 
47
Total Patterns: 
143959



Size of X: torch.Size([143959, 100, 1]) Size of y: torch.Size([143959]) 


start: 2023-11-20 08:28:57.692182 

