In [None]:
import pickle
UNK_TOKEN = "<UNK>"
PAD_TOKEN = "<PAD>"
BOS_TOKEN = "시"
EOS_TOKEN = "끝"
SPLIT_TOKEN = "▁"
def create_digest_cedict(mono_file, poly_file, output_file):
    cedict = {}

    # Process monophonic characters
    with open(mono_file, 'r', encoding='utf-8') as f:
        for line in f:
            char, pron = line.strip().split('\t')
            cedict[char] = [pron]

    # Process polyphonic characters
    with open(poly_file, 'r', encoding='utf-8') as f:
        for line in f:
            char, prons = line.strip().split('\t')
            cedict[char] = prons.split(',')

    # Save to pickle file
    with open(output_file, 'wb') as f:
        pickle.dump(cedict, f)

# Create digest_cedict.pkl
create_digest_cedict('MONOPHONIC_CHARS.txt', 'POLYPHONIC_CHARS.txt', 'digest_cedict.pkl')

In [None]:
import pickle


def create_char2idx(sent_files, output_file):
    char2idx = {}
    idx = 0

    for sent_file in sent_files:
        with open(sent_file, "r", encoding="utf-8") as f:
            for line in f:
                for char in line.strip():
                    if char not in char2idx:
                        char2idx[char] = idx
                        idx += 1

    # Add special tokens
    char2idx[UNK_TOKEN] = idx
    char2idx[PAD_TOKEN] = idx + 1
    char2idx[BOS_TOKEN] = idx + 2
    char2idx[EOS_TOKEN] = idx + 3

    # Save to pickle file
    with open(output_file, "wb") as f:
        pickle.dump(char2idx, f)


# Create char2idx.pkl
create_char2idx(["train.sent", "dev.sent", "test.sent"], "char2idx.pkl")

In [None]:
def create_class2idx(lb_files, output_file):
    class2idx = {}
    idx = 0

    for lb_file in lb_files:
        with open(lb_file, "r", encoding="utf-8") as f:
            for line in f:
                for pron in line.strip().split():
                    if pron not in class2idx:
                        class2idx[pron] = idx
                        idx += 1

    # Add special tokens
    class2idx[UNK_TOKEN] = idx
    class2idx[PAD_TOKEN] = idx + 1

    # Save to pickle file
    with open(output_file, "wb") as f:
        pickle.dump(class2idx, f)


# Create class2idx.pkl
create_class2idx(["train.lb", "dev.lb", "test.lb"], "class2idx.pkl")

In [None]:
import numpy as np
import pickle


def initialize_np_ckpt(char2idx, class2idx, embedding_dim=64, lstm_hidden_dim=32):
    state_dict = {}

    # Initialize embedding weights
    state_dict["embedding.weight"] = np.random.randn(
        len(char2idx), embedding_dim
    ).astype(np.float32)

    # Initialize LSTM weights and biases
    state_dict["lstm.weight_ih_l0"] = np.random.randn(
        4 * lstm_hidden_dim, embedding_dim
    ).astype(np.float32)
    state_dict["lstm.weight_hh_l0"] = np.random.randn(
        4 * lstm_hidden_dim, lstm_hidden_dim
    ).astype(np.float32)
    state_dict["lstm.bias_ih_l0"] = np.zeros(4 * lstm_hidden_dim, dtype=np.float32)
    state_dict["lstm.bias_hh_l0"] = np.zeros(4 * lstm_hidden_dim, dtype=np.float32)

    state_dict["lstm.weight_ih_l0_reverse"] = np.random.randn(
        4 * lstm_hidden_dim, embedding_dim
    ).astype(np.float32)
    state_dict["lstm.weight_hh_l0_reverse"] = np.random.randn(
        4 * lstm_hidden_dim, lstm_hidden_dim
    ).astype(np.float32)
    state_dict["lstm.bias_ih_l0_reverse"] = np.zeros(
        4 * lstm_hidden_dim, dtype=np.float32
    )
    state_dict["lstm.bias_hh_l0_reverse"] = np.zeros(
        4 * lstm_hidden_dim, dtype=np.float32
    )

    # Initialize fully connected layer weights and biases
    state_dict["logit_layer.0.weight"] = np.random.randn(
        lstm_hidden_dim, 2 * lstm_hidden_dim
    ).astype(np.float32)
    state_dict["logit_layer.0.bias"] = np.zeros(lstm_hidden_dim, dtype=np.float32)
    state_dict["logit_layer.2.weight"] = np.random.randn(
        len(class2idx), lstm_hidden_dim
    ).astype(np.float32)
    state_dict["logit_layer.2.bias"] = np.zeros(len(class2idx), dtype=np.float32)

    # Save to pickle file
    with open("np_ckpt.pkl", "wb") as f:
        pickle.dump(state_dict, f)


# Load char2idx and class2idx
char2idx = pickle.load(open("char2idx.pkl", "rb"))
class2idx = pickle.load(open("class2idx.pkl", "rb"))

# Initialize np_ckpt.pkl
initialize_np_ckpt(char2idx, class2idx)

In [None]:
digest_cedict = pickle.load(open("digest_cedict.pkl", "rb"))
char2idx = pickle.load(open("char2idx.pkl", "rb"))
class2idx = pickle.load(open("class2idx.pkl", "rb"))

# Print statistics
print("Length of digest_cedict:", len(digest_cedict))
print("Length of char2idx:", len(char2idx))
print("Length of class2idx:", len(class2idx))
state_dict = pickle.load(open("np_ckpt.pkl", "rb"))

print("Dimensions of embedding.weight:", state_dict["embedding.weight"].shape)
print("Dimensions of lstm.weight_ih_l0:", state_dict["lstm.weight_ih_l0"].shape)
print("Dimensions of lstm.weight_hh_l0:", state_dict["lstm.weight_hh_l0"].shape)
print("Dimensions of lstm.bias_ih_l0:", state_dict["lstm.bias_ih_l0"].shape)
print("Dimensions of lstm.bias_hh_l0:", state_dict["lstm.bias_hh_l0"].shape)
print(
    "Dimensions of lstm.weight_ih_l0_reverse:",
    state_dict["lstm.weight_ih_l0_reverse"].shape,
)
print(
    "Dimensions of lstm.weight_hh_l0_reverse:",
    state_dict["lstm.weight_hh_l0_reverse"].shape,
)
print(
    "Dimensions of lstm.bias_ih_l0_reverse:",
    state_dict["lstm.bias_ih_l0_reverse"].shape,
)
print(
    "Dimensions of lstm.bias_hh_l0_reverse:",
    state_dict["lstm.bias_hh_l0_reverse"].shape,
)
print("Dimensions of logit_layer.0.weight:", state_dict["logit_layer.0.weight"].shape)
print("Dimensions of logit_layer.0.bias:", state_dict["logit_layer.0.bias"].shape)
print("Dimensions of logit_layer.2.weight:", state_dict["logit_layer.2.weight"].shape)
print("Dimensions of logit_layer.2.bias:", state_dict["logit_layer.2.bias"].shape)

In [None]:
import numpy as np
import pickle
import os
from tqdm import tqdm
from g2pM2 import G2pM


# Load the dataset
def load_data(sent_file, lb_file):
    with open(sent_file, "r", encoding="utf-8") as f:
        sentences = [line.strip() for line in f]
    with open(lb_file, "r", encoding="utf-8") as f:
        labels = [line.strip().split() for line in f]
    return sentences, labels


# Convert characters and labels to indices and pad sequences
def prepare_data(sentences, labels, char2idx, class2idx):
    input_ids = []
    target_ids = []
    target_indices = []
    for sent, label in zip(sentences, labels):
        input_id = [char2idx.get(char, char2idx[UNK_TOKEN]) for char in sent]
        target_id = [class2idx.get(pron, class2idx[UNK_TOKEN]) for pron in label]
        input_ids.append(input_id)
        target_ids.append(target_id)

        # Compute target indices for polyphonic characters
        target_idx = [i for i, pron in enumerate(label) if pron in class2idx]
        target_indices.append(target_idx)

    # Pad sequences
    max_length = max(len(seq) for seq in input_ids)
    input_ids = [
        seq + [char2idx[PAD_TOKEN]] * (max_length - len(seq)) for seq in input_ids
    ]
    target_ids = [
        seq + [class2idx[PAD_TOKEN]] * (max_length - len(seq)) for seq in target_ids
    ]

    return np.array(input_ids), np.array(target_ids), target_indices


# Generate batches of data
def get_batches(data, batch_size):
    inputs, targets, target_indices = data
    for i in range(0, len(inputs), batch_size):
        batch_inputs = inputs[i : i + batch_size]
        batch_targets = targets[i : i + batch_size]
        batch_target_indices = target_indices[i : i + batch_size]
        yield np.array(batch_inputs), np.array(batch_targets), batch_target_indices


def compute_loss(model, inputs, targets, target_indices):
    lengths = np.sum(np.sign(inputs), axis=1)
    max_length = max(lengths)

    # Recompute the hidden states up to the point where logits are obtained
    rev_seq = model.reverse_sequence(inputs, lengths)
    fw_emb = model.get_embedding(inputs)  # [b, t, d]
    bw_emb = model.get_embedding(rev_seq)

    fw_states, bw_states = None, None
    fw_hs = []
    bw_hs = []
    for i in range(max_length):
        fw_input = fw_emb[:, i, :]
        bw_input = bw_emb[:, i, :]
        fw_states = model.fw_lstm_cell(fw_input, fw_states)
        bw_states = model.bw_lstm_cell(bw_input, bw_states)

        fw_hs.append(fw_states[0])
        bw_hs.append(bw_states[0])
    fw_hiddens = np.stack(fw_hs, axis=1)
    bw_hiddens = np.stack(bw_hs, axis=1)
    bw_hiddens = model.reverse_sequence(bw_hiddens, lengths)

    outputs = np.concatenate([fw_hiddens, bw_hiddens], axis=2)  # [b, t, d]
    batch_size = outputs.shape[0]
    if batch_size == 1:
        outputs = outputs.squeeze(axis=0)  # [t, d]
        target_hidden = outputs[target_indices[0]]
    else:
        # Flatten target_indices for batch processing
        target_hidden = []
        for i in range(batch_size):
            for idx in target_indices[i]:
                target_hidden.append(outputs[i, idx])
        target_hidden = np.array(target_hidden)  # [total_targets, d]

    # Compute logits using the fc_layer
    logits = model.fc_layer(target_hidden)  # [total_targets, num_classes]

    # Apply softmax to get probabilities
    exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
    softmax_probs = exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

    # Extract the probabilities of the correct classes
    total_targets = len(target_hidden)
    target_classes = []
    for i in range(batch_size):
        for idx in target_indices[i]:
            target_classes.append(targets[i, idx])
    target_classes = np.array(target_classes)  # [total_targets]

    # Gather the probabilities for the target classes
    target_probs = softmax_probs[np.arange(total_targets), target_classes]

    # Compute the negative log likelihood
    loss = -np.log(target_probs + 1e-9)
    loss = np.sum(loss) / total_targets

    return loss


# Update the model weights using Adam optimizer
def update_weights(model, grads, learning_rate, beta1, beta2, epsilon, t, m, v):
    for param, grad in grads.items():
        m[param] = beta1 * m[param] + (1 - beta1) * grad
        v[param] = beta2 * v[param] + (1 - beta2) * (grad**2)
        m_hat = m[param] / (1 - beta1**t)
        v_hat = v[param] / (1 - beta2**t)
        model.__dict__[param] -= learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)


# Save the trained model
def save_model(model, output_file):
    state_dict = {
        "embedding.weight": model.embeddings,
        "lstm.weight_ih_l0": model.weight_ih,
        "lstm.weight_hh_l0": model.weight_hh,
        "lstm.bias_ih_l0": model.bias_ih,
        "lstm.bias_hh_l0": model.bias_hh,
        "lstm.weight_ih_l0_reverse": model.weight_ih_reverse,
        "lstm.weight_hh_l0_reverse": model.weight_hh_reverse,
        "lstm.bias_ih_l0_reverse": model.bias_ih_reverse,
        "lstm.bias_hh_l0_reverse": model.bias_hh_reverse,
        "logit_layer.0.weight": model.hidden_weight_l0,
        "logit_layer.0.bias": model.hidden_bias_l0,
        "logit_layer.2.weight": model.hidden_weight_l1,
        "logit_layer.2.bias": model.hidden_bias_l1,
    }
    with open(output_file, "wb") as f:
        pickle.dump(state_dict, f)


# Main script
if __name__ == "__main__":
    UNK_TOKEN = "<UNK>"
    PAD_TOKEN = "<PAD>"
    BOS_TOKEN = "시"
    EOS_TOKEN = "끝"
    SPLIT_TOKEN = "▁"

    # Load the model
    model = G2pM()

    # Load the training and development data
    train_sentences, train_labels = load_data("train.sent", "train.lb")
    dev_sentences, dev_labels = load_data("dev.sent", "dev.lb")

    # Load the char2idx and class2idx mappings
    char2idx = pickle.load(open("char2idx.pkl", "rb"))
    class2idx = pickle.load(open("class2idx.pkl", "rb"))

    # Prepare the data
    train_data = prepare_data(train_sentences, train_labels, char2idx, class2idx)
    dev_data = prepare_data(dev_sentences, dev_labels, char2idx, class2idx)

    # Training parameters
    epochs = 5
    batch_size = 32
    learning_rate = 0.001
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8

    # Initialize Adam optimizer parameters
    t = 0
    m = {
        param: np.zeros_like(value)
        for param, value in model.__dict__.items()
        if isinstance(value, np.ndarray)
    }
    v = {
        param: np.zeros_like(value)
        for param, value in model.__dict__.items()
        if isinstance(value, np.ndarray)
    }

    # Training loop
    for epoch in range(epochs):
        train_loss = 0
        total_targets = 0
        with tqdm(total=len(train_data[0]), desc=f"Epoch {epoch+1}/{epochs}") as pbar:
            for inputs, targets, target_indices in get_batches(train_data, batch_size):
                t += 1
                loss = compute_loss(model, inputs, targets, target_indices)
                train_loss += loss

                # Compute gradients (this is a placeholder, you need to implement backpropagation to get actual gradients)
                grads = {
                    param: np.zeros_like(value)
                    for param, value in model.__dict__.items()
                    if isinstance(value, np.ndarray)
                }

                # Update weights
                update_weights(
                    model, grads, learning_rate, beta1, beta2, epsilon, t, m, v
                )

                # Update progress bar
                target_count = sum(len(indices) for indices in target_indices)
                total_targets += target_count
                pbar.update(len(inputs))
                pbar.set_postfix(
                    {
                        "Train Loss": (
                            train_loss / total_targets if total_targets > 0 else 0.0
                        )
                    }
                )

        # Validation loop
        dev_loss = 0
        total_dev_targets = 0
        for inputs, targets, target_indices in get_batches(dev_data, batch_size):
            loss = compute_loss(model, inputs, targets, target_indices)
            dev_loss += loss
            total_dev_targets += sum(len(indices) for indices in target_indices)

        avg_train_loss = (
            train_loss / total_targets if total_targets > 0 else float("inf")
        )
        avg_dev_loss = (
            dev_loss / total_dev_targets if total_dev_targets > 0 else float("inf")
        )

        print(
            f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss}, Dev Loss: {avg_dev_loss}"
        )

    # Save the trained model
    save_model(model, "trained_np_ckpt.pkl")

In [84]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import pickle
from tqdm import tqdm


# 1. Define Dataset and DataLoader
class CantoneseDataset(Dataset):
    def __init__(
        self,
        sent_file,
        lb_file,
        char2idx,
        class2idx,
        pad_token="<PAD>",
        unk_token="<UNK>",
    ):
        self.sentences, self.labels = self.load_data(sent_file, lb_file)
        self.char2idx = char2idx
        self.class2idx = class2idx
        self.pad_token = pad_token
        self.unk_token = unk_token
        self.prepared_data = self.prepare_data()

    def load_data(self, sent_file, lb_file):
        with open(sent_file, "r", encoding="utf-8") as f:
            sentences = [line.strip() for line in f]
        with open(lb_file, "r", encoding="utf-8") as f:
            labels = [line.strip().split() for line in f]
        return sentences, labels

    def prepare_data(self):
        input_ids = []
        target_ids = []
        target_indices = []
        for sent, label in zip(self.sentences, self.labels):
            input_id = [
                self.char2idx.get(char, self.char2idx[self.unk_token]) for char in sent
            ]
            target_id = [
                self.class2idx.get(pron, self.class2idx[self.unk_token])
                for pron in label
            ]
            input_ids.append(input_id)
            target_ids.append(target_id)

            # Compute target indices for polyphonic characters
            target_idx = [i for i, pron in enumerate(label) if pron in self.class2idx]
            target_indices.append(target_idx)

        # Pad sequences
        max_length = max(len(seq) for seq in input_ids)
        input_ids = [
            seq + [self.char2idx[self.pad_token]] * (max_length - len(seq))
            for seq in input_ids
        ]
        target_ids = [
            seq + [self.class2idx[self.pad_token]] * (max_length - len(seq))
            for seq in target_ids
        ]

        return list(zip(input_ids, target_ids, target_indices))

    def __len__(self):
        return len(self.prepared_data)

    def __getitem__(self, idx):
        input_id, target_id, target_idx = self.prepared_data[idx]
        return {
            "input_ids": torch.tensor(input_id, dtype=torch.long),
            "target_ids": torch.tensor(target_id, dtype=torch.long),
            "target_indices": target_idx,  # Keep as list for variable lengths
        }


def collate_fn(batch):
    inputs = torch.stack([item["input_ids"] for item in batch])
    targets = torch.stack([item["target_ids"] for item in batch])
    target_indices = [item["target_indices"] for item in batch]
    return inputs, targets, target_indices


# 2. Define the Model
class G2pM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes, padding_idx):
        super(G2pM, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # Bidirectional
        # Initialize weights
        self.init_weights()

    def init_weights(self):
        nn.init.xavier_uniform_(self.embedding.weight)
        for name, param in self.lstm.named_parameters():
            if "weight" in name:
                nn.init.xavier_uniform_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.constant_(self.fc.bias, 0)

    def forward(self, inputs, target_indices):
        """
        Args:
            inputs: [batch_size, seq_len]
            target_indices: list of lists containing target positions for each sample
        Returns:
            logits: [total_targets, num_classes]
        """
        embedded = self.embedding(inputs)  # [batch_size, seq_len, embed_dim]
        packed_output, _ = self.lstm(embedded)  # [batch_size, seq_len, hidden_dim*2]

        # Extract target hidden states
        target_hidden = []
        for i, indices in enumerate(target_indices):
            for idx in indices:
                if idx < packed_output.size(
                    1
                ):  # Ensure index is within sequence length
                    target_hidden.append(packed_output[i, idx, :])
        if target_hidden:
            target_hidden = torch.stack(target_hidden)  # [total_targets, hidden_dim*2]
        else:
            target_hidden = torch.empty(0, self.lstm.hidden_size * 2).to(
                packed_output.device
            )

        logits = self.fc(target_hidden)  # [total_targets, num_classes]
        return logits


# 3. Training and Evaluation Functions
def train_epoch(model, loader, criterion, optimizer, device, class2idx):
    model.train()
    running_loss = 0.0
    total_targets = 0

    progress = tqdm(loader, desc="Training", leave=False)
    for batch in progress:
        inputs, targets, target_indices = batch
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()

        logits = model(inputs, target_indices)  # [total_targets, num_classes]
        if logits.numel() == 0:
            continue  # Skip if there are no target indices in the batch

        # Flatten targets based on target_indices
        active_targets = targets[targets != class2idx["<PAD>"]].view(-1)
        loss = criterion(logits, active_targets)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * logits.size(0)
        total_targets += logits.size(0)

        avg_loss = running_loss / total_targets if total_targets > 0 else 0.0
        progress.set_postfix({"Loss": f"{avg_loss:.4f}"})

    avg_loss = running_loss / total_targets if total_targets > 0 else 0.0
    return avg_loss


def evaluate_epoch(model, loader, criterion, device, class2idx):
    model.eval()
    running_loss = 0.0
    total_targets = 0

    with torch.no_grad():
        progress = tqdm(loader, desc="Evaluating", leave=False)
        for batch in progress:
            inputs, targets, target_indices = batch
            inputs = inputs.to(device)
            targets = targets.to(device)

            logits = model(inputs, target_indices)  # [total_targets, num_classes]
            if logits.numel() == 0:
                continue  # Skip if there are no target indices in the batch

            active_targets = targets[targets != class2idx["<PAD>"]].view(-1)
            loss = criterion(logits, active_targets)

            running_loss += loss.item() * logits.size(0)
            total_targets += logits.size(0)

            avg_loss = running_loss / total_targets if total_targets > 0 else 0.0
            progress.set_postfix({"Loss": f"{avg_loss:.4f}"})

    avg_loss = running_loss / total_targets if total_targets > 0 else 0.0
    return avg_loss


# 4. Evaluation Function for Sentences
def evaluate_sentence(
    model, sentence, char2idx, idx2class, device, pad_token="<PAD>", unk_token="<UNK>"
):
    model.eval()
    with torch.no_grad():
        # Convert sentence to indices
        input_ids = [char2idx.get(char, char2idx[unk_token]) for char in sentence]
        input_tensor = (
            torch.tensor(input_ids, dtype=torch.long).unsqueeze(0).to(device)
        )  # [1, seq_len]

        # Since it's a single sentence, target_indices are all positions (or specific based on your use case)
        # Assuming you want predictions for all characters
        target_indices = [list(range(len(input_ids)))]

        # Get logits
        logits = model(input_tensor, target_indices)  # [seq_len, num_classes]
        if logits.numel() == 0:
            print("No target indices found in the sentence.")
            return []

        # Get predictions
        predictions = torch.argmax(logits, dim=1).cpu().numpy()  # [seq_len]

        # Map predictions to class labels
        predicted_labels = [idx2class.get(idx, "<UNK>") for idx in predictions]

    return predicted_labels


# 5. Main Training Script
def main():
    # Load mappings
    char2idx = pickle.load(open("char2idx.pkl", "rb"))
    class2idx = pickle.load(open("class2idx.pkl", "rb"))
    # Create inverse mapping for class indices
    idx2class = {idx: cls for cls, idx in class2idx.items()}

    # Parameters (adjust as needed)
    vocab_size = len(char2idx)
    embed_dim = 128
    hidden_dim = 256
    num_classes = len(class2idx)
    pad_idx = char2idx["<PAD>"]

    # Create datasets
    train_dataset = CantoneseDataset("train.sent", "train.lb", char2idx, class2idx)
    dev_dataset = CantoneseDataset("dev.sent", "dev.lb", char2idx, class2idx)

    # Create data loaders
    batch_size = 32
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn
    )
    dev_loader = DataLoader(
        dev_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn
    )

    # Initialize model
    model = G2pM(vocab_size, embed_dim, hidden_dim, num_classes, padding_idx=pad_idx)

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    learning_rate = 0.001
    optimizer = torch.optim.Adam(
        model.parameters(), lr=learning_rate, betas=(0.9, 0.999), eps=1e-8
    )

    # Training parameters
    epochs = 10

    for epoch in range(1, epochs + 1):
        print(f"\nEpoch {epoch}/{epochs}")

        train_loss = train_epoch(
            model, train_loader, criterion, optimizer, device, class2idx
        )
        print(f"Training Loss: {train_loss:.4f}")

        dev_loss = evaluate_epoch(model, dev_loader, criterion, device, class2idx)
        print(f"Validation Loss: {dev_loss:.4f}")

        # Optionally, save the model checkpoint
        torch.save(model.state_dict(), f"trained_pytorch_ckpt_epoch{epoch}.pth")

    # Save the final model
    torch.save(model.state_dict(), "trained_pytorch_final.pth")

    # Example Evaluation
    sentence = "然而，他红了20年以后，他竟退出了大家的视线。"
    predicted_labels = evaluate_sentence(model, sentence, char2idx, idx2class, device)
    print("\nSentence Evaluation:")
    for char, label in zip(sentence, predicted_labels):
        print(f"{char}: {label}")


if __name__ == "__main__":
    main()

                                                                         

KeyboardInterrupt: 

In [None]:
sentence = "然而，他红了20年以后，他竟退出了大家的视线。"

In [None]:
import pickle
import numpy as np
from g2pM2 import G2pM


# Load the trained model weights
def load_trained_model(model, ckpt_file):
    state_dict = pickle.load(open(ckpt_file, "rb"))
    model.load_variable(state_dict)


# Initialize the model
model = G2pM()

# Load the trained weights
load_trained_model(model, "trained_np_ckpt.pkl")

# Test sentence
sentence = "然而，他红了20年以后，他竟退出了大家的视线。"

# Predict pronunciations
predicted_pronunciations = model(sentence, tone=True, char_split=False)

# Print the result
print(predicted_pronunciations)

In [2]:
import ToJyutping

ToJyutping.get_jyutping_text(sentence)


NameError: name 'sentence' is not defined

In [3]:
ToJyutping.get_jyutping_text("蒼白色")

'cong1 baak6 sik1'

In [None]:


def main():
    # Load mappings
    char2idx = pickle.load(open("char2idx.pkl", "rb"))
    class2idx = pickle.load(open("class2idx.pkl", "rb"))
    # Create inverse mapping for class indices
    idx2class = {idx: cls for cls, idx in class2idx.items()}

    # Parameters (adjust as needed)
    vocab_size = len(char2idx)
    embed_dim = 128
    hidden_dim = 256
    num_classes = len(class2idx)
    pad_idx = char2idx["<PAD>"]

    # Create datasets
    train_dataset = CantoneseDataset("train.sent", "train.lb", char2idx, class2idx)
    dev_dataset = CantoneseDataset("dev.sent", "dev.lb", char2idx, class2idx)

    # Create data loaders
    batch_size = 32
    train_loader = DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn
    )
    dev_loader = DataLoader(
        dev_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn
    )

    # Initialize model
    model = G2pM(vocab_size, embed_dim, hidden_dim, num_classes, padding_idx=pad_idx)

    # Move model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Load the trained model
    model.load_state_dict(torch.load("trained_pytorch_final.pth", map_location=device))
    model.eval()  # Set model to evaluation mode

    # Example Evaluation
    sentence = "然而，他红了20年以后，他竟退出了大家的视线。"
    predicted_labels = evaluate_sentence(model, sentence, char2idx, idx2class, device)
    print("\nSentence Evaluation:")
    for char, label in zip(sentence, predicted_labels):
        print(f"{char}: {label}")


if __name__ == "__main__":
    main()