In [1]:
import sys

sys.path.append("..")

from typing import Tuple, Iterable, Optional
import re
import torch


# Define The Config

In [2]:
BATCH_SIZE = 2
FINETUNE_LR = 1e-5
LR = 1e-4
EPOCHS = 30
TOTAL_STEPS = (42_000 // BATCH_SIZE) * EPOCHS
WARMUP_STEPS = int(TOTAL_STEPS * 0.1)
CONSTANT_STEPS = int(TOTAL_STEPS * 0.4)
WEIGHT_DECAY = 5e-3

CKPT_DIR = "ckpt"
LOG_DIR = "logs"

# Prepare Dataset

In [3]:
# Convert original dataset to WebDataset tar file for faster loading on Google Colab
from src.datamodule import VLSP2020Dataset, VLSP2020TarDataset

# from torch.utils.data import random_split

# dts = VLSP2020Dataset("../data/vlsp2020_train_set_02")
# train_set, val_set = random_split(dts, [42_000, 14_427])

# VLSP2020TarDataset("../data/vlsp2020_train_set.tar").convert(train_set)
# VLSP2020TarDataset("../data/vlsp2020_val_set.tar").convert(val_set)



## Get dataloader and preprocessing

In [4]:
train_dataset = VLSP2020TarDataset("../data/vlsp2020_train_set.tar").load()
val_dataset = VLSP2020TarDataset("../data/vlsp2020_val_set.tar").load()

In [5]:
import string

from src.datamodule.vlsp2020 import get_dataloader


def remove_punctuation(text: str):
    return text.translate(str.maketrans("", "", string.punctuation)).lower()


train_loader = get_dataloader(
    train_dataset,
    return_transcript=True,
    batch_size=BATCH_SIZE,
    target_transform=remove_punctuation,
)

val_loader = get_dataloader(
    val_dataset,
    return_transcript=True,
    batch_size=BATCH_SIZE,
    target_transform=remove_punctuation,
)


In [6]:
for batch in train_loader:
    print(batch)
    break

(('v√† h√¨nh nh∆∞ t√¥i c·∫£m gi√°c l√† qua t·ª´ng v√≤ng thi th√¨ c√°i h·ªçng c·ªßa b·∫°n t·ªët h∆°n r·ªìi th√¨ ph·∫£i', 'ƒë·∫øn ƒë√¢y th√¨ m·ªçi b·∫Øt ƒë·∫ßu th·∫•y ƒë∆∞·ª£c cu·ªôc chi·∫øn kh·ªëc li·ªát nh∆∞ th·∫ø n√†o r·ªìi ƒë√∫ng kh√¥ng ·∫° h·ªç r·∫•t l√† t√≠nh tay r·∫•t kƒ© th∆∞a qu√Ω v·ªã ch·ªçn √¥ m√†u c·ªßa m√¨nh m·ªùi em ƒë·ªçc c√¢u ti·∫øp theo'), (tensor([-0.0012,  0.0088,  0.0054,  ..., -0.0038, -0.0009,  0.0046]), tensor([-0.0760, -0.0811, -0.0728,  ..., -0.0032, -0.0034, -0.0035])))


# Metrics

In [7]:
def levenshtein_distance(source: Tuple[str], target: Tuple[str]):
    """
    Compute the Levenshtein distance between two sequences.
    """

    n, m = len(source), len(target)
    if n > m:
        # Make sure n <= m, to use O(min(n,m)) space
        source, target = target, source
        n, m = m, n

    current_row = range(n + 1)  # Keep current and previous row, not entire matrix
    for i in range(1, m + 1):
        previous_row, current_row = current_row, [i] + [0] * n
        for j in range(1, n + 1):
            add, delete, change = (
                previous_row[j] + 1,
                current_row[j - 1] + 1,
                previous_row[j - 1],
            )
            if source[j - 1] != target[i - 1]:
                change += 1
            current_row[j] = min(add, delete, change)

    distance = current_row[n]

    del current_row
    del previous_row

    return distance


def word_error_rate(prediction: str, transcript: str):
    pattern = r"\W+"

    prediction = re.split(pattern, prediction)
    transcript = re.split(pattern, transcript)

    return levenshtein_distance(prediction, transcript) / len(transcript)


def character_error_rate(prediction: str, transcript: str):
    return levenshtein_distance(prediction, transcript) / len(transcript)


# Prepare Tokenizer

In [8]:
import json
from torch.utils.data import DataLoader


class VocabBuilder:
    DELIM_TOKEN = "|"
    UNK_TOKEN = "<unk>"
    PAD_TOKEN = "<pad>"

    def __init__(self, vocab_file: str):
        self.vocab_file = vocab_file
        self._vocab_set = set()

    def add(self, texts: Iterable[str]):
        for t in texts:
            self._vocab_set.update(t)

    def build(self):
        self._vocab_dict = {c: i for i, c in enumerate(self._vocab_set)}

        # replace space with pipe for clearer visualization
        self._vocab_dict[self.DELIM_TOKEN] = self._vocab_dict[" "]
        del self._vocab_dict[" "]

        # add unknown token so that model can handle unseen characters
        self._vocab_dict[self.UNK_TOKEN] = len(self._vocab_dict)

        # add padding token for CTC
        self._vocab_dict[self.PAD_TOKEN] = len(self._vocab_dict)

        return self._vocab_dict

    @property
    def vocab_dict(self):
        return self._vocab_dict

    def save(self):
        with open(self.vocab_file, "w") as f:
            json.dump(self.vocab_dict, f, ensure_ascii=False)

    def load(self):
        with open(self.vocab_file, "r") as f:
            self._vocab_dict = json.load(f)

        return self._vocab_dict


# Speech Recognizer

## Define Model

In [9]:
from pytorch_lightning import LightningModule
from transformers import (
    Wav2Vec2ForPreTraining,
    Wav2Vec2CTCTokenizer,
    Wav2Vec2FeatureExtractor,
)
from torchmetrics import MeanMetric

from src.utils.scheduler import TriStateScheduler

class SpeechRecognizer(LightningModule):
    def __init__(
        self,
        wav2vec2: Wav2Vec2ForPreTraining,
        tokenizer: Wav2Vec2CTCTokenizer,
        feature_extractor: Wav2Vec2FeatureExtractor,
    ):
        super().__init__()


        self.hidden_size = wav2vec2.config.hidden_size
        self.vocab_size = tokenizer.vocab_size

        self.wav2vec2 = wav2vec2
        self.wav2vec2.freeze_feature_extractor()
        self.tokenizer = tokenizer
        self.feature_extractor = feature_extractor

        self.dropout = torch.nn.Dropout(0.1)
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_size, self.hidden_size // 2),
            torch.nn.ReLU(inplace=True),
            torch.nn.Linear(self.hidden_size // 2, self.vocab_size),
        )

        self.criterion = torch.nn.CTCLoss(blank=tokenizer.pad_token_id)

        self.train_loss = MeanMetric()

    def forward(self, waveforms: Tuple[torch.Tensor], transcripts: Tuple[str] = None):
        # convert torch.Tensor to numpy.ndarray
        waveforms = tuple(waveform.cpu().numpy() for waveform in waveforms)

        extracted = self.feature_extractor(
            waveforms,
            sampling_rate=16000,
            padding=True,
            return_tensors="pt",
            return_attention_mask=True,
        )

        outputs = self.wav2vec2(
            extracted.input_values,
            attention_mask=extracted.attention_mask,
        )

        # hidden_states.shape == (batch_size, sequence_length, hidden_size)
        hidden_states = outputs[0]
        hidden_states = self.dropout(hidden_states)

        # logits.shape == (batch_size, sequence_length, vocab_size)
        logits = self.fc(hidden_states)

        if transcripts is not None:
            # get the length of valids sequence
            input_lengths = self.wav2vec2._get_feat_extract_output_lengths(
                extracted.attention_mask.sum(-1)
            ).to(torch.long)

            # tokenize transcripts
            target_ids, target_lengths = self.tokenizer(
                transcripts, padding=True, return_length=True, return_tensors="pt"
            ).values()

            # (batch_size, sequence_length, vocab_size) -> (sequence_length, batch_size, vocab_size)
            log_probs = torch.nn.functional.log_softmax(logits, dim=-1).transpose_(0, 1)

            # compute loss
            loss = self.criterion(log_probs, target_ids, input_lengths, target_lengths)

            return loss, logits
        else:
            return logits

    def training_step(self, batch, batch_idx):
        transcripts, waveforms = batch

        loss, logits = self(waveforms, transcripts)

        self.train_loss(loss)

        if batch_idx % 100 == 0:
            self.log("train/loss", self.train_loss, on_step=True, on_epoch=True)

        return loss

    def on_train_epoch_end(self) -> None:
        self.train_loss.reset()

    def validation_step(self, batch, batch_idx):
        transcripts, waveforms = batch

        logits = self(waveforms)

        predicted_ids = torch.argmax(logits, dim=-1).cpu().numpy()

        predicted_texts = self.tokenizer.batch_decode(predicted_ids)

        wer = word_error_rate(predicted_texts, transcripts)
        cer = character_error_rate(predicted_texts, transcripts)

        self.log("val/wer", wer, on_epoch=True)
        self.log("val/cer", cer, on_epoch=True)

        return wer, cer

    def configure_optimizers(self):

        optimizer = torch.optim.AdamW([
            {"params": self.wav2vec2.parameters(), "lr": FINETUNE_LR},
            {"params": self.fc.parameters(), "lr": LR},
        ], lr=LR, weight_decay=WEIGHT_DECAY
        )

        scheduler = TriStateScheduler(
            optimizer,
            total_steps=TOTAL_STEPS,
            warmup_steps=WARMUP_STEPS,
            constant_steps=CONSTANT_STEPS,
            factor=1e-3
        )

        return {
            "optimizer": optimizer,
            "lr_scheduler": {
                "scheduler": scheduler,
                "interval": "step",
                "frequency": 1,
            }
        }


# Training

In [10]:
model_name = "nguyenvulebinh/wav2vec2-base-vietnamese-250h"

wav2vec2 = Wav2Vec2ForPreTraining.from_pretrained(model_name)
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)

Some weights of the model checkpoint at nguyenvulebinh/wav2vec2-base-vietnamese-250h were not used when initializing Wav2Vec2ForPreTraining: ['lm_head.weight', 'lm_head.bias']
- This IS expected if you are initializing Wav2Vec2ForPreTraining from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing Wav2Vec2ForPreTraining from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of Wav2Vec2ForPreTraining were not initialized from the model checkpoint at nguyenvulebinh/wav2vec2-base-vietnamese-250h and are newly initialized: ['project_q.bias', 'quantizer.weight_proj.bias', 'quantizer.weight_proj.weight', 'quantizer.codevectors', 'project_hid.bias', 'project_hid.weight', 'project_q.weight']
You should proba

In [16]:
tokenizer.vocab_size

110

In [18]:
tokenizer.get_vocab()

{'·∫ª': 0,
 '6': 1,
 '·ª•': 2,
 '√≠': 3,
 '3': 4,
 '·ªπ': 5,
 '√Ω': 6,
 '·∫©': 7,
 '·ªü': 8,
 '·ªÅ': 9,
 '√µ': 10,
 '7': 11,
 '√™': 12,
 '·ª©': 13,
 '·ªè': 14,
 'v': 15,
 '·ª∑': 16,
 'a': 17,
 'l': 18,
 '·ª±': 19,
 'q': 20,
 '·ªù': 21,
 'j': 22,
 '·ªë': 23,
 '√†': 24,
 '·ªó': 25,
 'n': 26,
 '√©': 27,
 '·ªß': 28,
 '—É': 29,
 '√¥': 30,
 'u': 31,
 'y': 32,
 '·∫±': 33,
 '4': 34,
 'w': 35,
 'b': 36,
 '·ªá': 37,
 '·ªÖ': 38,
 's': 39,
 '√¨': 40,
 '·∫ß': 41,
 '·ªµ': 42,
 '8': 43,
 'd': 44,
 '·ªÉ': 45,
 'r': 47,
 '≈©': 48,
 'c': 49,
 '·∫°': 50,
 '9': 51,
 '·∫ø': 52,
 '√π': 53,
 '·ª°': 54,
 '2': 55,
 't': 56,
 'i': 57,
 'g': 58,
 'ÃÅ': 59,
 '·ª≠': 60,
 'ÃÄ': 61,
 '√°': 62,
 '0': 63,
 '·∫≠': 64,
 'e': 65,
 '·ªô': 66,
 'm': 67,
 '·∫≥': 68,
 '·ª£': 69,
 'ƒ©': 70,
 'h': 71,
 '√¢': 72,
 '√∫': 73,
 '·ªç': 74,
 '·ªì': 75,
 '·∫∑': 76,
 'f': 77,
 '·ªØ': 78,
 '·∫Ø': 79,
 '·ª≥': 80,
 'x': 81,
 '√≥': 82,
 '√£': 83,
 '·ªï': 84,
 '·ªã': 85,
 'Ã£': 86,
 'z': 87,
 '·∫£': 88,
 'ƒë': 89,
 '√®': 90,
 '·ª´': 91,
 '

In [11]:
model = SpeechRecognizer(wav2vec2, tokenizer, feature_extractor)



In [12]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger

trainer = Trainer(
    accelerator="cpu",
    callbacks=[
        ModelCheckpoint(CKPT_DIR, monitor="val/wer", mode="min", save_top_k=1)
    ],
    logger=TensorBoardLogger(LOG_DIR),
    max_epochs=EPOCHS
)

GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [13]:
trainer.fit(model, train_loader, val_loader)

Missing logger folder: logs/lightning_logs

  | Name       | Type                   | Params
------------------------------------------------------
0 | wav2vec2   | Wav2Vec2ForPreTraining | 95.0 M
1 | dropout    | Dropout                | 0     
2 | fc         | Sequential             | 337 K 
3 | criterion  | CTCLoss                | 0     
4 | train_loss | MeanMetric             | 0     
------------------------------------------------------
91.2 M    Trainable params
4.2 M     Non-trainable params
95.4 M    Total params
381.529   Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(

KeyboardInterrupt



In [None]:
tokenizer(["xin ch√†o üòÉ", "c√¥ g√°i n√¥ng th√¥n"], return_tensors="pt", padding=True, return_length=True).input_ids.shape

torch.Size([2, 16])

In [None]:
input_ids

tensor([[ 81,  57,  26,  46,  49,  71,  24, 105,  46, 108, 109, 109, 109, 109,
         109, 109],
        [ 49,  30,  46,  58,  62,  57,  46,  26,  30,  26,  58,  46,  56,  71,
          30,  26]])

In [None]:
attention_mask

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [None]:
tokenizer.batch_decode(ids)

['xin ch√†o <unk>', 'c√¥ g√°i n√¥ng th√¥n']

In [None]:
tokenizer.get_vocab()

{'·∫ª': 0,
 '6': 1,
 '·ª•': 2,
 '√≠': 3,
 '3': 4,
 '·ªπ': 5,
 '√Ω': 6,
 '·∫©': 7,
 '·ªü': 8,
 '·ªÅ': 9,
 '√µ': 10,
 '7': 11,
 '√™': 12,
 '·ª©': 13,
 '·ªè': 14,
 'v': 15,
 '·ª∑': 16,
 'a': 17,
 'l': 18,
 '·ª±': 19,
 'q': 20,
 '·ªù': 21,
 'j': 22,
 '·ªë': 23,
 '√†': 24,
 '·ªó': 25,
 'n': 26,
 '√©': 27,
 '·ªß': 28,
 '—É': 29,
 '√¥': 30,
 'u': 31,
 'y': 32,
 '·∫±': 33,
 '4': 34,
 'w': 35,
 'b': 36,
 '·ªá': 37,
 '·ªÖ': 38,
 's': 39,
 '√¨': 40,
 '·∫ß': 41,
 '·ªµ': 42,
 '8': 43,
 'd': 44,
 '·ªÉ': 45,
 'r': 47,
 '≈©': 48,
 'c': 49,
 '·∫°': 50,
 '9': 51,
 '·∫ø': 52,
 '√π': 53,
 '·ª°': 54,
 '2': 55,
 't': 56,
 'i': 57,
 'g': 58,
 'ÃÅ': 59,
 '·ª≠': 60,
 'ÃÄ': 61,
 '√°': 62,
 '0': 63,
 '·∫≠': 64,
 'e': 65,
 '·ªô': 66,
 'm': 67,
 '·∫≥': 68,
 '·ª£': 69,
 'ƒ©': 70,
 'h': 71,
 '√¢': 72,
 '√∫': 73,
 '·ªç': 74,
 '·ªì': 75,
 '·∫∑': 76,
 'f': 77,
 '·ªØ': 78,
 '·∫Ø': 79,
 '·ª≥': 80,
 'x': 81,
 '√≥': 82,
 '√£': 83,
 '·ªï': 84,
 '·ªã': 85,
 'Ã£': 86,
 'z': 87,
 '·∫£': 88,
 'ƒë': 89,
 '√®': 90,
 '·ª´': 91,
 '

# Inference

In [None]:
from src.model.modules import Wav2Vec2Processor
from src.model import Wav2Vec2PretrainingModule
import torch


def speech_to_text(waveforms: Tuple[torch.Tensor, ...]):

    batched_waveforms, wavelengths = Wav2Vec2Processor()(waveforms)
    attention_masks = Wav2Vec2PretrainingModule._compute_attention_mask(wavelengths)

    logits = model(batched_waveforms, attention_mask=attention_masks).logits
    predicted_ids = torch.argmax(logits, dim=-1)

    return processor.batch_decode(predicted_ids)


In [None]:
cer, wer = 0, 0
n_items = 0

for batch_idx, batch in enumerate(dataloader):
    transcripts, waveforms = batch
    predicted_transcripts = speech_to_text(waveforms)

    for predicted_transcript, transcript in zip(predicted_transcripts, transcripts):
        cer += character_error_rate(predicted_transcript, transcript)
        wer += word_error_rate(predicted_transcript, transcript)

        n_items += 1

    if batch_idx > 100:
        break

    del transcript
    del waveforms
    del predicted_transcript
    del batch

cer /= n_items
wer /= n_items

print(f"Character error rate: {cer:.2%}")
print(f"Word error rate: {wer:.2%}")


Character error rate: 25.47%
Word error rate: 32.33%


In [None]:
import pandas as pd

m = []

for batch_idx, batch in enumerate(dataloader):
    if batch_idx > 5:
        break

    transcripts, waveforms = batch
    predicted_transcripts = speech_to_text(waveforms)

    for predicted_transcript, transcript in zip(predicted_transcripts, transcripts):
        m.append(
            {
                "transcript": transcript,
                "predicted_transcript": predicted_transcript,
            }
        )


pd.set_option("display.max_colwidth", None)
pd.DataFrame.from_dict(m)


Unnamed: 0,transcript,predicted_transcript
0,tr∆∞·ªõc khi b·∫Øt ƒë·∫ßu kh·ªüi h√†nh hi·ªÉn d·ª± ƒë·ªãnh ƒë·∫°p xe v√†o b·ªën gi·ªù ba m∆∞∆°i ph√∫t s√°ng h√†ng ng√†y,tr∆∞·ªõc khi b·∫Øt ƒë·∫ßu kh·ªüi h√†nh hi·ªán d·ª± ƒë·ªãnh ƒë·∫°p xe v√† b·ªën gi·ªù ba m∆∞∆°i ph√∫t s√°ng h·∫±ng ng√†y
1,<unk> ngon,·ªõnƒëyvn
2,xin ƒë∆∞·ª£c c·∫£m ∆°n nh·ªØng chia s·∫ª ƒë·∫ßy c·∫£m x√∫c c·ªßa c√°c kh√°ch m·ªùi ƒë√£ gi√∫p cho ch√∫ng t√¥i nh·ªØng ng∆∞·ªùi thu·ªôc nhi·ªÅu th·∫ø h·ªá ƒë∆∞·ª£c l·ªõn l√™n trong h√≤a b√¨nh th√™m tr√¢n tr·ªçng nh·ªØng m·∫•t m√°t hi sinh c·ªßa th·∫ø h·ªá ƒëi tr∆∞·ªõc ƒë·ªÉ c√≥ ƒë∆∞·ª£c m·ªôt vi·ªát nam h√≤a b√¨nh gi√∫p cho ch√∫ng t√¥i,xin ƒë∆∞·ª£c c·∫£m ∆°n nh·ªØng chia s·∫ª ƒë·∫ßy c·∫£m x√∫c c·ªßa c√°c kh√°ch m·ªùi ƒë√£ gi√∫p cho ch√∫ng t√¥i nh·ªØng ng∆∞·ªùi thu·ªôc nhi·ªÅu th·∫ø h·ªá ƒë∆∞·ª£c l·ªõn l√™n trong h√≤a b√¨nh th√™m tr√¢n tr·ªçng nh·ªØng m·∫•t m√°t hi sinh c·ªßa th·∫•y ƒëi tr∆∞·ªõc ƒë·ªÉ c√≥ ƒë∆∞·ª£c m·ªôt vi·ªát nam h√≤a b√¨nh gi√∫p cho ch√∫ng t√¥i
3,nguy·ªÖn trang ƒëem thi th·ªÉ ch√∫a tr·ªãnh n·ªôp cho qu√¢n t√¢y s∆°n,nguy·ªÖn trang ƒëem thi th·ªÉ ch√∫a tr·ªãnh n·ªôp cho qu√¢n t√¢y s∆°n
4,theo ƒë√≥ ch·ªß m∆∞u trong v·ª• √°n l√† b·ªã c√°o nguy·ªÖn minh h√πng b·ªã tuy√™n m∆∞·ªùi b·∫£y nƒÉm t√π v·ªõi vai tr√≤ ch·ªâ,theo ƒë√≥ ch·ªß m∆∞u trong v·ª• √°n l√† b·ªã c√°o nguy·ªÖn minh h√πng b·ªã tuy√™n m∆∞·ªùi b·∫£y nƒÉm t√π v·ªõi vai tr√≤ ch·ªâ
5,b√¢y gi·ªù s·∫Ω l√† ph·∫ßn thi cu·ªëi c√πng v√† c≈©ng l√† <unk>,b√¢y gi·ªù s·∫Ω l√† ph·∫ßn thi cu·ªëi c√πng v√† c≈©n l√†
6,ƒë·ªìng th·ªùi l·∫°i nghe theo ki·∫øn ngh·ªã c·ªßa qu√¢n s∆∞ √¥ng cho ƒë√∫c tr·ªü l·∫°i ti·ªÅn ng≈© th√π kh√¥i ph·ª•c l∆∞u th√¥ng ti·ªÅn t·ªá trong ƒë·ªãa b√†n ƒëem ƒë·∫øn ti·ªán l·ª£i cho sinh ho·∫°t c·ªßa nh√¢n d√¢n kh√¥ng c√≤n ph·∫£i d√πng x·∫øp v·∫£i l√†m ph∆∞∆°ng ti·ªán thanh to√°n,ƒë·ªìng th·ªùi l·∫°i nghe theo k√Ωn ngh·ªã c·ªßa qu√¢n s∆∞ √¥ng cho ƒë√∫c tr·ªü l·∫°i ti·ªÅn ng·ªß th√π kh√¥i ph·ª•c l∆∞u th√¥ng ti·ªÅn t·ªá trong ƒë·ªãa b√†n ƒëem ƒë·∫øn ti·ªÅn l·ª£i cho sinh ho·∫°t c·ªßa nh√¢n d√¢n kh√¥ng c√≤n ph·∫£i d√πng x·∫øp v√£i l√†m ph∆∞∆°ng ti·ªán thanh to√°n
7,ch√≠nh quy·ªÅn c·ªßa t·ªïng th·ªëng trƒÉm nh·∫±m c·∫Øt gi·∫£m c√°c ch∆∞∆°ng tr√¨nh tr·ª£ c·∫•p an sinh x√£ h·ªôi,ch√≠nh quy·ªÅn c·ªßa t·ªïng th·ªëng trƒÉmp nh·∫Øm c·∫Øt gi·∫£m c√°c ch∆∞∆°ng tr√¨nh tr∆°a c·∫•p an sinh x√£ h·ªôicc
8,vƒÉn h·ªçc vi·ªát nam l√† s·ª± t√≠ch h·ª£p t·ª´ hai d√≤ng vƒÉn h·ªçc d√¢n gian v√† vƒÉn h·ªçc vi·∫øt c·ªßa nh·ªØng ng∆∞·ªùi d√πng ti·∫øng vi·ªát,vƒÉn h·ªçc vi·ªát nam l√† s·ª± t√≠ch h·ª£p t·ª´ hai d√≤ng vƒÉn h·ªçc d√¢n gian v√† vƒÉn h·ªçc vi·∫øt c·ªßa nh·ªØng ng∆∞·ªùi d√πng ti·∫øng vi·ªát
9,ui t√¨nh,·ª´·ª´·ª´
