In [1]:
import os

import pandas as pd

import librosa
import librosa.display

import numpy as np

import IPython.display as ipd

import matplotlib.pyplot as plt

import random

from collections import Counter

from sklearn.model_selection import train_test_split

import torch
import torchaudio

from dataclasses import dataclass
from typing import Any, Dict, List, Union
from datasets import DatasetDict
from datasets import Dataset as DS

from transformers import (
    WhisperFeatureExtractor,
    WhisperTokenizer,
    WhisperProcessor,
    WhisperForConditionalGeneration,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    TrainerCallback,
    TrainingArguments,
    TrainerState,
    TrainerControl,
    EarlyStoppingCallback,
    pipeline
)

from torchmetrics.text import WordErrorRate, CharErrorRate

2024-04-23 07:47:50.454682: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-23 07:47:50.454827: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-23 07:47:50.585816: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
pip freeze > requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [3]:
BASE_DIR = '/kaggle/input/ben10/ben10'
train_data_dir = f"{BASE_DIR}/16_kHz_train_audio/"
test_data_dir = f"{BASE_DIR}/16_kHz_valid_audio/"
data_path = f"{BASE_DIR}/train.csv"

In [4]:
split2path = {
    "train": train_data_dir,
    "test": test_data_dir,
}

In [5]:
data = pd.read_csv(data_path)
data.sample(10)

Unnamed: 0,file_name,transcripts
12816,train_tangail (264).wav,বুঝছো? হ দরকার পড়লে একবেলা না খাইয়া থাকমু। তাও...
922,train_sandwip (923).wav,তারা অবশ্য জিজ্ঞাইবার কথা আইছল হরে কই যে হ্যাঁ...
12756,train_tangail (204).wav,"না করো, কিবা দেহা যায় না! ভালো সোর্সে যদি ভালো..."
11827,train_sylhet (2220).wav,"আমি অইলাম যে, ই ব্যাটির অইলো যে ইও আছে। কিতা ই..."
3710,train_habiganj (387).wav,আমরার সাতে ঝগড়া-বিবাদের একটা অবস্থা তৈরি হইছিল...
8212,train_narsingdi (769).wav,সেঞ্চুরি আছে পাচটা না ছয়ডা। লিটনের <> খেলতো <>...
6474,train_narail (535).wav,রাগিব আর আমিই তো। মানে দুইজনের পুরো জীবনের রিস...
2296,train_chittagong (392).wav,কিল্লাই? \nইতে নাকি খোন চেয়ারম্যানের কী বলে কী...
10732,train_sylhet (1125).wav,"কিসের, কুন? ইটা ওইযে পুষ্টি গ্রুপ আছে না? পুষ্..."
10822,train_sylhet (1215).wav,"তো বিয়াটা মুটামুটি যেহেতু কবিড টাইমে ওইছে, ইলা..."


In [6]:
def extract_split(filename):
    filename_ = filename.split("_")
    split = filename_[0]
    return split

def extract_district(filename):
    filename_ = filename.split(" ")[0]
    district = filename_.split("_")[1]
    return district

def beautify_dataset(data):
    splits = []
    districts = []
    newpaths = []
    transcripts = []
    
    for i in range(len(data)):
        filename, transcript = data.iloc[i]
        split = extract_split(filename)
        district = extract_district(filename)
        dir_path = split2path[split]
        composed_path = f"{dir_path}{filename}"
        
        if os.path.exists(composed_path) == False:
            print(f"{composed_path} does not exist.")
            continue
        
        # replace any newline characters
        transcript = transcript.replace("\n", " ")
        transcript = " ".join(transcript.split())
        
        splits.append(split)
        districts.append(district)
        newpaths.append(composed_path)
        transcripts.append(transcript)
    
    data['file_path'] = newpaths
    data['district'] = districts
    data['split'] = splits
    data['transcripts'] = transcripts
    
#     data.drop(columns=['file_name'], inplace=True)
    
    return data

In [7]:
data = beautify_dataset(data)


In [8]:
data[data["transcripts"] == "<>"]

Unnamed: 0,file_name,transcripts,file_path,district,split
721,train_sandwip (722).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,sandwip,train
1296,train_barishal (242).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
1298,train_barishal (244).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
1299,train_barishal (245).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
1301,train_barishal (247).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
...,...,...,...,...,...
13330,train_tangail (778).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,tangail,train
13336,train_tangail (784).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,tangail,train
13356,train_tangail (804).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,tangail,train
13361,train_tangail (809).wav,<>,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,tangail,train


In [9]:
data[data["transcripts"] == ""]

Unnamed: 0,file_name,transcripts,file_path,district,split
3590,train_habiganj (267).wav,,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,habiganj,train
13505,train_tangail (953).wav,,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,tangail,train


In [10]:
data[data["transcripts"] == ".."]

Unnamed: 0,file_name,transcripts,file_path,district,split
1329,train_barishal (275).wav,..,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
1331,train_barishal (277).wav,..,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train
1338,train_barishal (284).wav,..,/kaggle/input/ben10/ben10/16_kHz_train_audio/t...,barishal,train


**NOTE:** Think of how you want use the existing models/your finetuned model to replace these examples.... For now let's just handle them.

In [11]:
# print(list(data[data['transcripts'] == ''].index))
data.drop(data[data['transcripts'] == ''].index, inplace=True)
      
# print(list(data[data['transcripts'] == '<>'].index))
data.drop(data[data['transcripts'] == "<>"].index, inplace=True)
      
# print(list(data[data['transcripts'] == '..'].index))
data.drop(data[data['transcripts'] == ".."].index, inplace=True)

In [12]:
data["transcripts"] = data["transcripts"].str.strip()

In [13]:
# # remove punctuations
# punctuations = [
#     "/::\)","/::","(-_-)","(*_*)","(>_<)",":)",";)",":P","xD","-_-","*_*","(>_<)","...",".",",",";",":","!","?","'","অ�", "অাবার", "।"
#     "\"","-","_","/","\\","|","{","}","[","]","(",")","<",">","@","#","$","%","^","&","*","~","`","+","=","0","1","2","3","4","5","6","7","8","9","৳","০",
#     "১","২","৩","৪","৫","৬","৭","৮","৯","\n","\t","\r","\f","\v","\u00C0-\u017F","\u2000-\u206F","\u25A0-\u25FF","\u2600-\u26FF","\u2B00-\u2BFF","\u3000-\u303F",
#     "\uFB00-\uFB4F","\uFE00-\uFE0F","\uFE30-\uFE4F","\u1F600-\u1F64F","\u1F300-\u1F5FF","\u1F680-\u1F6FF","\u1F1E0-\u1F1FF","\u2600-\u26FF","\u2700-\u27BF",
#     "\u1F300-\u1F5FF","\u1F900-\u1F9FF","\u1F600-\u1F64F","\u1F680-\u1F6FF","\u1F1E0-\u1F1FF","\u1F600-\u1F64F",
# ]

In [14]:
# def remove_punctuations(text):
#     for punctuation in punctuations:
#         text = text.replace(punctuation, "")
#     return text

# # Apply the function to the 'transcript' column
# data['transcripts'] = data['transcripts'].apply(remove_punctuations)

In [15]:
import re
def remove_emoji(text):
    emoji_pattern = re.compile(
        "["u"\U0001F600-\U0001F64F"  # emoticons
        u"\U0001F300-\U0001F5FF"  # symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+",
        flags=re.UNICODE,
    )
    return emoji_pattern.sub(r"", text)

# Apply the remove_emoji function to the 'Description' column
data['transcripts'] = data['transcripts'].apply(remove_emoji)


In [16]:
def remove_extra_space(text):
    text = re.sub(r"[a-zA-Z]+", "", text)
    text = re.sub(r"\s+", " ", text)
    return text

data['transcripts'] = data['transcripts'].apply(remove_extra_space)

In [17]:
def remove_extra(text):
    text = re.sub(r"\s+", " ", text)
    return text

data['transcripts'] = data['transcripts'].apply(remove_extra)

In [18]:
data.transcripts.sample(20)

5120     <> ধর এ অইলো অইলে, লাভ অইলো অইলে, লাভ অইলো অইল...
621      ঘাডার আগে আইলের কোরে কোরে কোরেত্তে আমরা টানি-ট...
11474    আর বনির বাড়ি যাই। ওহ। আর <> নায়। ড্রাগোর অনো খ...
8947     হ, যায়া নিয়ে আসছে? হুম, তা ছোট ছাওয়াক যে আনসে ...
5321     সত্ত মিত্তা টিকাছে? এই যে <> টিকাছে? এ লং টাইম...
6990     ওই ইসের ডেটা কালেকশনের ওই কাজডা ভাবলাম যাই উনা...
4523     অহন এই যে কি ইয়া তর ইস্কুল না বন্দ আছিন? হ। অহ...
332      টিয়ারে টিয়া কইতো না হিয়ার যা মন চাইতো হিয়া কইত...
2779     আঁর লাই দোয়া গরিবু বা খোনো সময়ত আঁরে হেল্প গরি...
8142     পইল্লা ছয় ওভারে খেলা দিছে। ছয় ওভারে রান টার্গে...
2353     <> বউত আগরতুন বিয়া গইজজে। ইবা যহন আই জাইননি, এ...
8666     কী অবস্তা আল আমিন ভাই, কেমন আছেন? আচ্ছা ভাই ঠি...
887      মাইজ্জাগার বৌ যেগিন করে, কার কার? ওরে বাপরে বা...
12017    <> তারার লাগিও ইটা পারফেক্ট। ইনো ইনো আর মানে আ...
5065     <> বছর যে কিভাবে যায়গা টেরো ফাইতা না, <> বিদেশ...
8478     টেনশন কিলিগা? পরীক্ষার। ইমুন পরীক্ষার যাগাত গি...
9640     অনেক বালো সাইট মানে কুনু ঝায়-ঝামেলা নাই, কুনু .

In [19]:
data.transcripts[7693]

'পাঁচ দশমিক চার শূন্য পার্সেন অইছে হিন্দু। বৌদ্ধ আছে শূন্য দশমিক শূন্য এক পার্সেন। খ্রিস্টানও আছে কিছু, শূন্য দশমিক শূন্য তিন পার্সেন। আর অন্যান্য কিছু ধর্মের অনুসারী আছে শূন্য দশমিক শূন্য ছয়।'

In [20]:
TASK = "transcribe"
MODEL_NAME = "arif11/bangla-ASR-v7"
MODEL_PATH = "/kaggle/input/asr-v16/whisper-reg-ben"

In [21]:
feature_extractor = WhisperFeatureExtractor.from_pretrained(MODEL_PATH)
tokenizer = WhisperTokenizer.from_pretrained(MODEL_PATH, language='bn', task=TASK)
processor = WhisperProcessor.from_pretrained(MODEL_PATH, language='bn', task=TASK)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [22]:
ids = tokenizer.encode("")
ids

[50258, 50302, 50359, 50363, 50257]

In [23]:
tokenizer.decode(ids)

'<|startoftranscript|><|bn|><|transcribe|><|notimestamps|><|endoftext|>'

In [24]:
@dataclass
class DataCollatorSpeechSeq2SeqWithPadding:
    processor: Any

    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:
        # split inputs and labels since they have to be of different lengths and need different padding methods
        # first treat the audio inputs by simply returning torch tensors
        input_features = [{"input_features": feature["input_features"]} for feature in features]
        batch = self.processor.feature_extractor.pad(input_features, return_tensors="pt")

        # get the tokenized label sequences
        label_features = [{"input_ids": feature["labels"]} for feature in features]
        # pad the labels to max length
        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")

        # replace padding with -100 to ignore loss correctly
        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)

        # if bos token is appended in previous tokenization step,
        # cut bos token here as it's append later anyways
        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
            labels = labels[:, 1:]

        batch["labels"] = labels
        
        torch.cuda.empty_cache()

        return batch

In [25]:
data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=processor)

In [26]:
def prepare_dataset(example):
    audio_path = example["file_path"]
    
    # load the audio using librosa or torch audio (as you wish)
    audio, sr = librosa.load(audio_path, sr=16_000)
    
    example["input_features"] = feature_extractor(audio, sampling_rate=sr).input_features[0]
    
    example["labels"] = tokenizer(f"{example['transcripts']}", max_length=448, padding=True, truncation=True).input_ids
    
    return example


def filter_inputs(input_audio):
    """filter inputs with zero input length"""
    return 0 < len(input_audio)


def filter_labels(input_labels):
    """filter empty label sequences"""
    return 0 < len(input_labels)

In [27]:
train_df = data[data["split"] == "train"]

In [28]:
"""
    adjust test size accordingly.
"""
train_df, eval_df = train_test_split(train_df, test_size=0.20, shuffle=True)

In [29]:
len(train_df), len(eval_df)

(10785, 2697)

In [30]:
ben_reg_voice_ds = DatasetDict()

train_split = DS.from_pandas(train_df)
eval_split = DS.from_pandas(eval_df)

ds_splits = DatasetDict({
    'train': train_split,
    'eval': eval_split
})

In [31]:
ds_splits = ds_splits.remove_columns(["split"])

In [32]:
print(ds_splits)

DatasetDict({
    train: Dataset({
        features: ['file_name', 'transcripts', 'file_path', 'district', '__index_level_0__'],
        num_rows: 10785
    })
    eval: Dataset({
        features: ['file_name', 'transcripts', 'file_path', 'district', '__index_level_0__'],
        num_rows: 2697
    })
})


In [33]:
np.object = object

In [34]:
ds_splits = ds_splits.map(prepare_dataset, remove_columns=ds_splits.column_names["train"] # open for multithreadding
)

  0%|          | 0/10785 [00:00<?, ?ex/s]

  0%|          | 0/2697 [00:00<?, ?ex/s]

In [35]:
# ds_splits = ds_splits.filter(filter_inputs, input_columns=["input_features"])
# ds_splits = ds_splits.filter(filter_labels, input_columns=["labels"])

In [36]:
len(ds_splits["train"]), len(ds_splits["eval"])

(10785, 2697)

In [37]:
cer = CharErrorRate()
wer = WordErrorRate()

In [38]:
def compute_metrics(pred):
    pred_ids = pred.predictions
    label_ids = pred.label_ids

    label_ids[label_ids == -100] = tokenizer.pad_token_id

    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
    label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)

    wer_res = wer(pred_str, label_str)
    cer_res = cer(pred_str, label_str)
    
    """
        uncomment the next 3 lines if you want to see how the examples look like during eval 
    """
    print("WER:",wer_res,"| CER:", cer_res) # to show up during running logs
    print("Pred:",pred_str[0])
    print("Label:",label_str[0])
    
    return {"wer": wer_res, "cer": cer_res}

In [39]:
model = WhisperForConditionalGeneration.from_pretrained(MODEL_PATH, device_map="auto")

In [40]:
model_id = "whisper-reg-ben"

In [41]:
training_args = Seq2SeqTrainingArguments(
    output_dir=model_id,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    fp16=True,
    learning_rate=5e-5,
    weight_decay=1e-2,
    warmup_steps=100,
    num_train_epochs=2,
    evaluation_strategy="epoch", # or "epochs"
    save_strategy="epoch",
    predict_with_generate=True,
    generation_max_length=448,
#     save_steps=2976,
#     eval_steps=32,
#     logging_steps=1000,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model="wer",
    greater_is_better=False,
    push_to_hub=False,
    report_to="none",
    remove_unused_columns=False,
)

In [42]:
model.generation_config.language = "bn"
model.generation_config.task = "transcribe"

model.generation_config.forced_decoder_ids = None
model.config.suppress_tokens = [] # added later

In [43]:
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=training_args.learning_rate)





In [44]:
trainer = Seq2SeqTrainer(
    args=training_args,
    model=model,
    train_dataset=ds_splits["train"],
    eval_dataset=ds_splits["eval"],
    data_collator=data_collator,
    tokenizer=processor.feature_extractor,
    compute_metrics=compute_metrics,
    optimizers=(optimizer, None),
#     callbacks=[EarlyStoppingCallback(2, 1.0)]
)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [45]:
trainer.train()

# to use the high-level pipeline, ensure both the processor outputs and model outputs exist in the same dir
trainer.save_model(training_args.output_dir)
processor.save_pretrained(training_args.output_dir)



Epoch,Training Loss,Validation Loss,Wer,Cer
1,0.2166,0.21157,0.638127,0.3218
2,0.1149,0.183261,0.578048,0.284783


WER: tensor(0.6381) | CER: tensor(0.3218)
Pred: হে কিন্তু তোমার প্রচুর নিয়া আইয়া অবো। পরে কিন্তু মাতা আতাবো। এইডাই আমিও আসলে কি একটা বিষয় কি একটা বিষয় করতে গেলে আমার মনে করো প্রত্যেকটা চাইরটা-পাঁচটা সাইডই চিন্তাভাবনার করে নাই। তোমার চিন্তার কতাডা কিন্তু আমি কইলাম এনে। সেইটাই�
Label: হে কিন্তু তোমার পচুর ঋণে আইয়া পড়বো। পরে কিন্তু মাতা আতাবো। সেটাই আমিও আসলে কি একটা বিষয় কি? একটা বিষয় করতে গেলে আবার মনে করো প্রত্যেকটা চাইরটা-পাঁচটা সাইটই চিন্তাভাবনা করোন নাগবো। তোমার চিন্তার কতাডাই কিন্তু আমি কইলাম এনু মানে �


Non-default generation parameters: {'max_length': 448, 'suppress_tokens': [], 'begin_suppress_tokens': [220, 50257]}


WER: tensor(0.5780) | CER: tensor(0.2848)
Pred: হে কিন্তু তোমার প্রচুর হিনে আইয়া পাবো। পরে কিন্তু মাতা আতাবো কিন্তু। কিন্তু আই আমিও আসলে কি একটা বিষয় কি একটা বিষয় করতে গেলে আমার মনে করো প্রত্যেকটা চাইরটা-পাঁচটা সাইডি চিন্তা-ভাবনা করো নাকি? তোমার চিন্তার কতাডা কিন্তু আমি কইলাম 
Label: হে কিন্তু তোমার পচুর ঋণে আইয়া পড়বো। পরে কিন্তু মাতা আতাবো। সেটাই আমিও আসলে কি একটা বিষয় কি? একটা বিষয় করতে গেলে আবার মনে করো প্রত্যেকটা চাইরটা-পাঁচটা সাইটই চিন্তাভাবনা করোন নাগবো। তোমার চিন্তার কতাডাই কিন্তু আমি কইলাম এনু মানে �


Non-default generation parameters: {'max_length': 448, 'suppress_tokens': [], 'begin_suppress_tokens': [220, 50257]}
There were missing keys in the checkpoint model loaded: ['proj_out.weight'].
Non-default generation parameters: {'max_length': 448, 'suppress_tokens': [], 'begin_suppress_tokens': [220, 50257]}


[]

In [46]:
out_logs = pd.DataFrame(trainer.state.log_history)
out_logs.to_csv("logs.csv")

In [47]:
import gc

del ds_splits

gc.collect()

33

In [48]:
torch.cuda.empty_cache()

In [49]:
pipe = pipeline(
    "automatic-speech-recognition",
    model=model_id,
    chunk_length_s=30,
    device=0,
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [50]:
def pretty_sort(filename):
    name, number_str = filename.split(" (")
    number = int(number_str.split(")")[0])
    return name, number

In [51]:
ids = []
preds = []

In [52]:
for root, dirs, files in os.walk("/kaggle/input/ben10/ben10/16_kHz_valid_audio"):
    files = sorted(files, key=pretty_sort)
    
#     print(files.index("valid_sandwip (1).wav"))
#     print(files.index("valid_sandwip (132).wav"))
    
#     put swandip first
    shift = files[1070 : 1202]
    
    files = shift + files[:1070] + files[1202:]
    ids = files.copy()
    
    for file in files:
        composed_path = f"{test_data_dir}{file}"
        audio, sr = librosa.load(composed_path, sr=16_000)
        text = pipe(audio)["text"]
        preds.append(text)



In [53]:
sub_df = pd.DataFrame()

In [54]:
sub_df["id"] = ids
sub_df["sentence"] = preds

In [55]:
sub_df.to_csv("submission.csv", index=False)

In [56]:
sub_df.head(20)

Unnamed: 0,id,sentence
0,valid_sandwip (1).wav,"হরালেখা করি কিন্তু, আইচ্ছা আম সিস্টাই সিস্টাই ..."
1,valid_sandwip (2).wav,লইক্কু আছে নে? অনকার অল্লাইনের লক্ষ্য নাই। আন্...
2,valid_sandwip (3).wav,"ভিতে-ভিতে অইছে বউত টেনশন লাগে। আসলেন, মানে আসল..."
3,valid_sandwip (4).wav,"বউত ভালা বুঝে তারফরেও হউরগা বাড়িত জিরুম, কিচ্চ..."
4,valid_sandwip (5).wav,"আনবে না, এক্কানা রানবে আনবে না। কাইলকা অন পৌরস..."
5,valid_sandwip (6).wav,"ক আমার মজা অইছে দো মজা অইছে দো, এন্নে হেগিন ভা..."
6,valid_sandwip (7).wav,ও একানা ভড্ডা মাস টাছে এগিন একানা কুইট্টা কুদে...
7,valid_sandwip (8).wav,"ধরইছে, এন্নে কাম বারিক্কা ন, যে ইয়ান করবি ইয়ান..."
8,valid_sandwip (9).wav,তো এই আগাইরে এগুনেরে এগুনেরে কেমনে কিরমু? আর ম...
9,valid_sandwip (10).wav,হরালেখার মতো হরালেখা থাইকলে অইন্য কিছু লই চিন্...
