<a href="https://colab.research.google.com/github/ahmedrana603/NLP-Language-Modeling-for-Urdu-News-Articles/blob/main/Language_Modeling_for_Urdu_News_Articles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PART 1 - BBC Urdu Dataset Collection and Preprocessing**

# **Importing Libraries**

In [2]:
import requests
from bs4 import BeautifulSoup
import json
import time
import re

# **Base URL**

In [3]:
base_url = "https://www.bbc.com/urdu/topics/cjgn7n9zzq7t"

article_links = set()
raw_articles = []
metadata_list = []

# **Collecting Article Links**

In [4]:

for page in range(1, 50):
    url = f"{base_url}?page={page}"
    res = requests.get(url)
    soup = BeautifulSoup(res.text, "html.parser")

    for a in soup.select("h2 a[href*='/urdu/articles/']"):
        href = a["href"]
        if href.startswith("/"):
            href = "https://www.bbc.com" + href
        article_links.add(href)

    if len(article_links) >= 270:
        break

article_links = list(article_links)[:270]


# **Scrapping Articles**

In [5]:
for idx, link in enumerate(article_links, 1):
    res = requests.get(link)
    soup = BeautifulSoup(res.text, "html.parser")

    title_tag = soup.find("h1", class_="article-heading")
    title = title_tag.get_text(strip=True) if title_tag else "No title found"

    date_tag = soup.find("time")
    date = date_tag.get_text(strip=True) if date_tag else "No date found"

    author_tag = soup.find("span", class_="byline__name")
    author = author_tag.get_text(strip=True) if author_tag else "BBC Urdu"

    category_tag = soup.find("a", class_="bbc-1f2hn8h e1hk9ate4")
    category = category_tag.get_text(strip=True) if category_tag else "Unknown"

    body_paragraphs = []

    article_tag = soup.find("article")
    if article_tag:
        for p in article_tag.find_all("p"):
            text = p.get_text(strip=True)
            if text.startswith("©") or "،تصویر کا ذریعہ" in text:
                continue
            body_paragraphs.append(text)

    if not body_paragraphs:
        for div in soup.find_all("div", class_=lambda x: x and "RichTextComponentWrapper" in x):
            for p in div.find_all("p"):
                text = p.get_text(strip=True)
                if text.startswith("©") or "،تصویر کا ذریعہ" in text:
                    continue
                body_paragraphs.append(text)

    if not body_paragraphs:
        for div in soup.find_all("div", {"dir": "rtl"}):
            for p in div.find_all("p"):
                text = p.get_text(strip=True)
                if len(text) > 5:
                    body_paragraphs.append(text)

    body = "\n".join(body_paragraphs).strip()


    raw_articles.append((idx, body))
    metadata_list.append({
        "article_id": idx,
        "title": title,
        "url": link,
        "category": category,
        "date": date,
        "author": author
    })

    time.sleep(0.5)


# **Txt File**

In [6]:
with open("raw.txt", "w", encoding="utf-8") as f:
    for idx, body in raw_articles:
        f.write(f"### Article {idx} ###\n")
        f.write(body + "\n\n")


# **Metadata JSON file**

In [7]:
with open("metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadata_list, f, ensure_ascii=False, indent=2)


# **Diacritics Removal**

In [8]:

def remove_diacritics(text):
    """
    Removes Urdu diacritics (Aarabs) from text.
    Unicode ranges:
    064B–065F
    0670
    06D6–06ED
    """
    diacritics_pattern = r'[\u064B-\u065F\u0670\u06D6-\u06ED]'
    return re.sub(diacritics_pattern, '', text)


with open("raw.txt", "r", encoding="utf-8", errors='ignore') as f:
    raw_content = f.read()


cleaned_content = remove_diacritics(raw_content)


with open("no_diacritics.txt", "w", encoding="utf-8") as f:
    f.write(cleaned_content)


print("Diacritics removed successfully.")

Diacritics removed successfully.


# **Noise Removal**

# **Removal of Non-Urdu Text**

In [9]:
def remove_urls(text):
    """Remove URLs like http://... or www..."""
    url_pattern = r'http\S+|www\S+'
    return re.sub(url_pattern, '', text)

def remove_emojis(text):
    """Remove emojis"""
    emoji_pattern = re.compile("["
                               u"\U0001F600-\U0001F64F"
                               u"\U0001F300-\U0001F5FF"
                               u"\U0001F680-\U0001F6FF"
                               u"\U0001F1E0-\U0001F1FF"
                               "]+", flags=re.UNICODE)
    return emoji_pattern.sub('', text)

def remove_english(text):
    """Remove English letters"""
    english_pattern = r'[A-Za-z]+'
    return re.sub(english_pattern, '', text)

def remove_navigation_text(text):
    """Remove common web/navigation phrases"""
    unwanted_phrases = [
        "مواد پر جائیں",
        "سبسکرائب کرنے کے لیے کلک کریں",
        "بی بی سی اردو کی خبروں اور فیچرز کو اپنے فون پر حاصل کریں",
        "اپنے فون پر حاصل کریں",
        "کلک کریں"
    ]
    for phrase in unwanted_phrases:
        text = text.replace(phrase, '')
    return text

def remove_noise(text):
    """Apply all noise removal rules"""
    text = remove_urls(text)
    text = remove_emojis(text)
    text = remove_english(text)
    text = remove_navigation_text(text)
    return text


def remove_non_urdu(text):
    """Keep only Urdu letters, digits, spaces, Urdu punctuation"""
    return re.sub(r'[^\u0600-\u06FF\s۔؟!،0-9]', '', text)


with open("no_diacritics.txt", "r", encoding="utf-8", errors='ignore') as f:
    content = f.read()

content = remove_noise(content)

split_articles = content.split("### Article ")
filtered_articles = []

for part in split_articles:
    if not part.strip():
        continue

    lines = part.split("\n", 1)
    header_num = lines[0].strip()
    header = f"### Article {header_num} ###"
    body = lines[1] if len(lines) > 1 else ""

    body = remove_non_urdu(body)

    filtered_articles.append(header + "\n" + body.strip() + "\n\n")

with open("urdu_only_filtered.txt", "w", encoding="utf-8") as f:
    f.writelines(filtered_articles)

print("Noise removed and non-Urdu text filtered. Article headers preserved. File ready: urdu_only_filtered.txt")

Noise removed and non-Urdu text filtered. Article headers preserved. File ready: urdu_only_filtered.txt


# **Sentence Segmentation**

In [10]:
input_file = "urdu_only_filtered.txt"
output_file = "segmented.txt"

with open(input_file, "r", encoding="utf-8") as f:
    content = f.read()

articles = re.split(r'(?=### Article \d+ ###)', content)

segmented_articles = []

for article in articles:
    article = article.strip()
    if not article:
        continue

    lines = article.split("\n", 1)
    header = lines[0].strip()
    body = lines[1] if len(lines) > 1 else ""


    body = re.sub(r'([۔؟!])\s*', r'\1\n', body)

    body = re.sub(r'\n+', '\n', body)

    body = body.strip()

    segmented_articles.append(header + "\n" + body + "\n\n")

with open(output_file, "w", encoding="utf-8") as f:
    f.writelines(segmented_articles)

print("Sentence segmentation complete. File saved as segmented.txt")


Sentence segmentation complete. File saved as segmented.txt


# **Whitespace and Formatting Normalization**

In [11]:
import re

input_file = "segmented.txt"
output_file = "normalized.txt"

def normalize_whitespace(text):
    lines = text.split('\n')
    cleaned_lines = []

    for line in lines:
        line = re.sub(r'\s+', ' ', line)

        line = line.strip()

        cleaned_lines.append(line)

    cleaned_text = '\n'.join([l for l in cleaned_lines if l])

    return cleaned_text


with open(input_file, "r", encoding="utf-8") as f:
    content = f.read()

articles = re.split(r'(?=### Article \d+ ###)', content)

normalized_articles = []

for article in articles:
    article = article.strip()
    if not article:
        continue

    parts = article.split("\n", 1)
    header = parts[0].strip()
    body = parts[1] if len(parts) > 1 else ""

    body = normalize_whitespace(body)

    normalized_articles.append(header + "\n" + body + "\n\n")

with open(output_file, "w", encoding="utf-8") as f:
    f.writelines(normalized_articles)

print("Whitespace and formatting normalization complete.")
print("File saved as normalized.txt")


Whitespace and formatting normalization complete.
File saved as normalized.txt


# **Custom Linguistic Processing**

In [12]:
import re

input_file = "normalized.txt"
output_file = "cleaned.txt"


def urdu_tokenizer(text, is_header=False):
    """
    Tokenizes Urdu text:
    - Replaces numbers with <NUM> only for body text
    - Separates punctuation
    """
    if not is_header:
        text = re.sub(r'\d+', '<NUM>', text)

    text = re.sub(r'([۔،؟!])', r' \1 ', text)

    text = re.sub(r'\s+', ' ', text).strip()

    tokens = text.split(" ")
    return tokens

lemmatizer_map = {
    "ہیں": "ہے",
    "تھیں": "تھا",
    "گئیں": "گیا",
    "کرتی": "کرتا"
}

def urdu_lemmatizer(word):
    """
    Rule-based lemmatizer for Urdu:
    - Handles plurals (وں, یں, ات)
    - Feminine → Masculine (ی → ا)
    - Handles irregular forms via dictionary
    """
    if word in lemmatizer_map:
        return lemmatizer_map[word]

    if word.endswith("وں") and len(word) > 3:
        return word[:-2]

    if word.endswith("یں") and len(word) > 3:
        return word[:-2]

    if word.endswith("ات") and len(word) > 3:
        return word[:-2]

    if word.endswith("ی") and len(word) > 3:
        return word[:-1] + "ا"

    return word


suffixes = [
    "وں", "یں", "ات", "یاں",
    "نے", "ہے", "ہوں"
]

def urdu_stemmer(word):
    """
    Light stemmer to reduce vocabulary without destroying sentence structure.
    """
    for suffix in sorted(suffixes, key=len, reverse=True):
        if word.endswith(suffix) and len(word) > len(suffix) + 1:
            return word[:-len(suffix)]
    return word


with open(input_file, "r", encoding="utf-8") as f:
    content = f.read()

articles = re.split(r'(?=### Article \d+ ###)', content)

processed_articles = []

for article in articles:
    article = article.strip()
    if not article:
        continue

    parts = article.split("\n", 1)
    header = parts[0].strip()
    body = parts[1] if len(parts) > 1 else ""

    sentences = body.split("\n")
    processed_sentences = []

    for sentence in sentences:
        sentence = sentence.strip()
        if not sentence:
            continue

        tokens = urdu_tokenizer(sentence, is_header=False)
        tokens = [urdu_lemmatizer(tok) for tok in tokens]

        tokens = [urdu_stemmer(tok) for tok in tokens]

        processed_sentences.append(" ".join(tokens))

    processed_body = "\n".join(processed_sentences)

    processed_articles.append(header + "\n" + processed_body + "\n\n")

with open(output_file, "w", encoding="utf-8") as f:
    f.writelines(processed_articles)

print("Custom Tokenization, Lemmatization, and Light Stemming complete.")
print(f"File saved as {output_file}")


Custom Tokenization, Lemmatization, and Light Stemming complete.
File saved as cleaned.txt


# **Part 2 - BBC Style Urdu News Article Generation**

# **Language Model Training**

In [22]:

import re
import random
from collections import defaultdict, Counter
import math

print("Loading preprocessed dataset from cleaned.txt...")

with open("cleaned.txt", "r", encoding="utf-8") as f:
    content = f.read()

articles = content.split("### Article ")
all_tokens = []

for article in articles:
    if not article.strip():
        continue

    lines = article.split("\n", 1)
    if len(lines) > 1:
        body = lines[1].strip()
        tokens = body.split()
        all_tokens.extend(tokens)

print(f"Total tokens: {len(all_tokens)}")
print(f"Vocabulary size: {len(set(all_tokens))}")


Loading preprocessed dataset from cleaned.txt...
Total tokens: 427754
Vocabulary size: 12683


# **UNIGRAM Model + Smoothing**

In [33]:
from collections import Counter

class UnigramModel:

    def __init__(self, smoothing='add-k', k=0.1):
        self.unigram_counts = Counter()
        self.total_words = 0
        self.vocabulary = set()
        self.vocab_size = 0
        self.smoothing = smoothing
        self.k = 1.0 if smoothing == 'laplace' else k

    def train(self, tokens):
        self.unigram_counts = Counter(tokens)
        self.total_words = len(tokens)
        self.vocabulary = set(tokens)
        self.vocab_size = len(self.vocabulary)

    def get_probability(self, word):
        count = self.unigram_counts[word]
        numerator = count + self.k
        denominator = self.total_words + (self.k * self.vocab_size)
        return numerator / denominator


unigram_model = UnigramModel(smoothing='add-k', k=0.1)
unigram_model.train(all_tokens)

print("[UNIGRAM MODEL TRAINED]")
print("Vocabulary size:", unigram_model.vocab_size)

print("\nTop 10 unigrams:")
for word, count in unigram_model.unigram_counts.most_common(10):
    print(f"{word}: {count}")

unseen_word = "XYZ_UNSEEN_WORD"
prob = unigram_model.get_probability(unseen_word)
print(f"\nSmoothing Demo (Unseen Unigram):")
print(f"P({unseen_word}) = {prob:.10f}")


[UNIGRAM MODEL TRAINED]
Vocabulary size: 12683

Top 10 unigrams:
کے: 18513
۔: 14023
ہے: 13463
میں: 12533
کی: 11908
اور: 8424
سے: 8246
کہ: 8100
نے: 6394
کا: 6073

Smoothing Demo (Unseen Unigram):
P(XYZ_UNSEEN_WORD) = 0.0000002331


# **BIGRAM Model + Smoothing**

In [46]:
from collections import defaultdict, Counter

class BigramModel:

    def __init__(self, smoothing='add-k', k=0.1):
        self.bigram_counts = defaultdict(Counter)
        self.unigram_counts = Counter()
        self.vocabulary = set()
        self.vocab_size = 0
        self.smoothing = smoothing
        self.k = 1.0 if smoothing == 'laplace' else k

    def train(self, tokens):
        tokens = ['<START>'] + tokens + ['<END>']
        self.unigram_counts = Counter(tokens)
        self.vocabulary = set(tokens)
        self.vocab_size = len(self.vocabulary)
        for i in range(len(tokens)-1):
            w1, w2 = tokens[i], tokens[i+1]
            self.bigram_counts[w1][w2] += 1

    def get_probability(self, w1, w2):
        bigram_count = self.bigram_counts[w1][w2]
        unigram_count = self.unigram_counts[w1]
        numerator = bigram_count + self.k
        denominator = unigram_count + (self.k * self.vocab_size)
        return numerator / denominator

    def get_next_word_probabilities(self, w1):
        """
        Calculates the probability distribution for the next word given w1.
        """
        probabilities = {}
        for w2 in self.vocabulary:
            probabilities[w2] = self.get_probability(w1, w2)

        total_prob = sum(probabilities.values())
        if total_prob > 0:
            for w2 in probabilities:
                probabilities[w2] /= total_prob
        return probabilities


bigram_model = BigramModel(smoothing='add-k', k=0.1)
bigram_model.train(all_tokens)

print("[BIGRAM MODEL TRAINED]")
print("Vocabulary size:", bigram_model.vocab_size)

bigram_list = []
for w1 in bigram_model.bigram_counts:
    for w2 in bigram_model.bigram_counts[w1]:
        count = bigram_model.bigram_counts[w1][w2]
        bigram_list.append(((w1, w2), count))

bigram_list.sort(key=lambda x: x[1], reverse=True)

print("\nTop 10 bigrams:")
for bigram, count in bigram_list[:10]:
    print(f"{bigram}: {count}")

test_w1 = "پاکستان"
unseen_word = "XYZ_UNSEEN_WORD"
prob = bigram_model.get_probability(test_w1, unseen_word)
print(f"\nSmoothing Demo (Unseen Bigram):")
print(f"P({unseen_word} | {test_w1}) = {prob:.10f}")

[BIGRAM MODEL TRAINED]
Vocabulary size: 12685

Top 10 bigrams:
('ہے', '۔'): 5499
('ہے', 'کہ'): 2948
('کے', 'لیے'): 2004
('کے', 'مطابق'): 1347
('انھ', 'نے'): 1269
('تھا', 'کہ'): 1215
('ہے', 'اور'): 1156
('ان', 'کے'): 1149
('تھا', '۔'): 1146
('کے', 'بعد'): 1114

Smoothing Demo (Unseen Bigram):
P(XYZ_UNSEEN_WORD | پاکستان) = 0.0000260112


# **TRIGRAM Model + Smoothing**

In [47]:
from collections import defaultdict, Counter

class TrigramModel:

    def __init__(self, bigram_model, smoothing='add-k', k=0.1):
        self.trigram_counts = defaultdict(lambda: defaultdict(Counter))
        self.bigram_context_counts = defaultdict(int)
        self.vocabulary = set()
        self.vocab_size = 0
        self.bigram_model = bigram_model
        self.smoothing = smoothing
        self.k = 1.0 if smoothing == 'laplace' else k

    def train(self, tokens):
        tokens = ['<START>', '<START>'] + tokens + ['<END>']
        self.vocabulary = set(tokens)
        self.vocab_size = len(self.vocabulary)
        for i in range(len(tokens)-2):
            w1, w2, w3 = tokens[i], tokens[i+1], tokens[i+2]
            self.trigram_counts[w1][w2][w3] += 1
            self.bigram_context_counts[(w1, w2)] += 1

    def get_probability(self, w1, w2, w3):
        trigram_count = self.trigram_counts[w1][w2][w3]
        context_count = self.bigram_context_counts[(w1, w2)]
        if context_count < 2:
            return self.bigram_model.get_probability(w2, w3)
        numerator = trigram_count + self.k
        denominator = context_count + (self.k * self.bigram_model.vocab_size)
        return numerator / denominator

    def get_next_word_probabilities(self, w1, w2):
        """
        Calculates the probability distribution for the next word given w1 and w2.
        Includes backoff to bigram model if trigram context count is too low.
        """
        probabilities = {}
        context_count = self.bigram_context_counts[(w1, w2)]

        if context_count < 2:
            probabilities = self.bigram_model.get_next_word_probabilities(w2)
        else:
            for w3 in self.bigram_model.vocabulary:
                trigram_count = self.trigram_counts[w1][w2][w3]
                numerator = trigram_count + self.k
                denominator = context_count + (self.k * self.bigram_model.vocab_size)
                probabilities[w3] = numerator / denominator

        total_prob = sum(probabilities.values())
        if total_prob > 0:
            for w3 in probabilities:
                probabilities[w3] /= total_prob
        return probabilities


trigram_model = TrigramModel(bigram_model, smoothing='add-k', k=0.1)
trigram_model.train(all_tokens)

print("[TRIGRAM MODEL TRAINED]")
print("Vocabulary size:", trigram_model.vocab_size)

trigram_list = []
for w1 in trigram_model.trigram_counts:
    for w2 in trigram_model.trigram_counts[w1]:
        for w3 in trigram_model.trigram_counts[w1][w2]:
            count = trigram_model.trigram_counts[w1][w2][w3]
            trigram_list.append(((w1, w2, w3), count))

trigram_list.sort(key=lambda x: x[1], reverse=True)

print("\nTop 10 trigrams:")
for trigram, count in trigram_list[:10]:
    print(f"{trigram}: {count}")

test_w1 = "میں"
test_w2 = "پاکستان"
unseen_word = "XYZ_UNSEEN_WORD"
prob = trigram_model.get_probability(test_w1, test_w2, unseen_word)
print(f"\nSmoothing Demo (Unseen Trigram):")
print(f"P({unseen_word} | {test_w1}, {test_w2}) = {prob:.10f}")

[TRIGRAM MODEL TRAINED]
Vocabulary size: 12685

Top 10 trigrams:
('۔', 'انھ', 'نے'): 699
('کہنا', 'تھا', 'کہ'): 684
('کا', 'کہنا', 'تھا'): 647
('کی', 'جانب', 'سے'): 572
('بی', 'بی', 'سی'): 566
('نے', 'کہا', 'کہ'): 529
('کہتے', 'ہے', 'کہ'): 485
('کے', 'بارے', 'میں'): 478
('کہنا', 'ہے', 'کہ'): 466
('کا', 'کہنا', 'ہے'): 454

Smoothing Demo (Unseen Trigram):
P(XYZ_UNSEEN_WORD | میں, پاکستان) = 0.0000702494


# **Article Generation System and Interface**

# **Helper Function**

In [38]:
def sample_next_word(prob_dict):
    words = list(prob_dict.keys())
    probs = list(prob_dict.values())
    return random.choices(words, weights=probs, k=1)[0]


# **Bigram Article Generator**

In [39]:
def generate_bigram_article(model, seed_tokens, min_words=200, max_words=300):

    generated = seed_tokens.copy()

    while len(generated) < max_words:

        last_word = generated[-1]

        if last_word not in model.vocabulary:
            next_word = random.choice(list(model.vocabulary))
        else:
            prob_dist = model.get_next_word_probabilities(last_word)
            next_word = sample_next_word(prob_dist)

        generated.append(next_word)

        if len(generated) >= min_words and next_word == "۔":
            break

    return " ".join(generated)


# **Trigram Generator with Backoff**

In [40]:
def generate_trigram_article(trigram_model, unigram_model, seed_tokens,
                              min_words=200, max_words=300):

    generated = seed_tokens.copy()

    while len(generated) < max_words:

        if len(generated) < 2:
            next_word = random.choice(list(trigram_model.vocabulary))
        else:
            w1 = generated[-2]
            w2 = generated[-1]

            if w1 in trigram_model.vocabulary and w2 in trigram_model.vocabulary:
                prob_dist = trigram_model.get_next_word_probabilities(w1, w2)
                next_word = sample_next_word(prob_dist)
            else:
                next_word = random.choice(list(unigram_model.vocabulary))

        generated.append(next_word)

        if len(generated) >= min_words and next_word == "۔":
            break

    return " ".join(generated)


# **Console Interface**

In [41]:
def article_generation_interface():

    print("="*60)
    print("BBC Style Urdu News Article Generator")
    print("="*60)

    print("Select Language Model:")
    print("1. Bigram Model")
    print("2. Trigram Model (with Backoff)")

    choice = input("Enter choice (1 or 2): ").strip()

    seed_prompt = input("Enter seed prompt (5–8 Urdu words): ").strip()
    seed_tokens = seed_prompt.split()

    if len(seed_tokens) < 5:
        print("Invalid seed prompt. Must contain at least 5 words.")
        return

    print("\nGenerating article...\n")

    if choice == "1":
        article = generate_bigram_article(bigram_model, seed_tokens)
    elif choice == "2":
        article = generate_trigram_article(trigram_model, unigram_model, seed_tokens)
    else:
        print("Invalid model choice.")
        return

    print("\n" + "="*60)
    print("Generated Article:\n")
    print(article)
    print("\n" + "="*60)


In [48]:
article_generation_interface()

BBC Style Urdu News Article Generator
Select Language Model:
1. Bigram Model
2. Trigram Model (with Backoff)
Enter choice (1 or 2): 2
Enter seed prompt (5–8 Urdu words): پاکستان میں مہنگائی کی شرح میں

Generating article...


Generated Article:

پاکستان میں مہنگائی کی شرح میں کہا تعزیر جنھ مواقع دروازہ رک رائیڈر آٹھو ٹھہرا گلزار انتشار بدانتظاما مہیسر گارنٹیز کٹاؤ چھپاتے کاپا آک سالم ائیڈنٹا ور ریولوشنرا پروسیسنگ ادھے سبی لیڈرز روس جول پلیٹساس سیلابا ترمیما پکی سرزد ہزار طفیل اثار ٹھیکے بیگم قسمتا زچہ اہنگا فہیم فوکل زبح دعوے شکریے ویلنگ ناں پیتے سیکھا اندھیرا ساس ہوجائ پنگ لاہورا ثمن ظالمانہ گورنرا جھمکے سموئے ازسرنو اجارہ بھتیجا کشادگا انویسٹیگینش سشما برادار نازیہ چیٹ تہمینہ اونچا ڈسینائکے جتنا کٹے ڈینٹسٹ نکاراگوا ڈائیریکٹر ہدایت لوئر ہراسانا مینو چشمے آگمنٹ کونسلز واسطہ تمہید کمبھ سکے رقص سیرئیل مزاکر وزرا سٹامپ سنگھارامے موڈیفیکیشن نوکر ہلکے بے کبڈا چھالا پریشانی تحفے ہمالیہ دھماکے جنس آباو ٹرانسپرینسا دلچسپ نگلنا فنکار میکسیلوفیشل تمباکو صنفا مریض ہیلمٹ شجرہ پر متعارف ادھ آصف یون