# 1. Download Main Entry Categories and Pages Manually

In [1]:
import requests
from bs4 import BeautifulSoup
import json
import itertools
import pandas as pd
import urllib.parse

In [2]:
def extract_links_from_mp_other_content():
    """
    Extract all links from the mp-other-content div on the Burmese Wikipedia main page
    that start with https://my.wikipedia.org/wiki/
    """
    # URL of the Burmese Wikipedia main page
    url = "https://my.wikipedia.org/wiki/%E1%80%97%E1%80%9F%E1%80%AD%E1%80%AF%E1%80%85%E1%80%AC%E1%80%99%E1%80%BB%E1%80%80%E1%80%BA%E1%80%94%E1%80%BE%E1%80%AC"

    # Send a request to get the HTML content
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    response.encoding = 'utf-8'  # Ensure proper encoding for Burmese characters

    if response.status_code != 200:
        return f"Error: Failed to retrieve the page (Status Code: {response.status_code})"

    print("Successfully retrieved the Wikipedia page")

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the div with id="mp-other-content"
    mp_other_content = soup.find('div', id='mp-other-content')

    if not mp_other_content:
        return "Error: Could not find the div with id='mp-other-content' on the page."

    print("Found the mp-other-content div")

    # Extract all links from this div
    page_links = []
    category_links = []
    header = ""
    for link in mp_other_content.find_all('a'):
        href = link.get('href', '')
        text = link.get_text(strip=True)
        title = link.get('title', '')

        # Get the full HTML content of the link
        link_html = str(link)
        # Check if the link contains a <b> tag
        has_bold = '<b>' in link_html
        if has_bold:
            header = title.replace("ကဏ္ဍ:", "")



        # Convert relative URLs to absolute URLs
        if href.startswith('/wiki/'):
            full_url = f"https://my.wikipedia.org{href}"
        else:
            full_url = href

        # Check if the URL starts with https://my.wikipedia.org/wiki/
        if full_url.startswith("https://my.wikipedia.org/wiki/%E1%80%80%E1%80%8F%E1%80%B9%E1%80%8D:"):
            category_link_urls = [cat['url'] for cat in category_links]
            if full_url not in category_link_urls:
                category_links.append({
                    'header': header,
                    'text': title.replace("ကဏ္ဍ:", ""),
                    'title': title,
                    'url': full_url
                })
        elif full_url.startswith('https://my.wikipedia.org/wiki/'):
            link_urls = [cat['url'] for cat in page_links]
            if full_url not in link_urls:
                page_links.append({
                    'header': header,
                    'text': text,
                    'title': title,
                    'url': full_url
                })


    print(f"Found {len(page_links)} links starting with https://my.wikipedia.org/wiki/")
    print(f"Found {len(category_links)} category_links starting with https://my.wikipedia.org/wiki/%E1%80%80%E1%80%8F%E1%80%B9%E1%80%8D:")

    return {
        'page_links': page_links,
        'category_links': category_links,
        'total_links_found': len(page_links) + len(category_links)
    }

if __name__ == "__main__":
    result = extract_links_from_mp_other_content()

    # Print the result in a readable format
    print("\n===== RESULTS =====")

    if isinstance(result, str):
        print(result)  # Error message
    else:
        print(f"Found {result['total_links_found']} links in mp-other-content:")

        # Save to a JSON file
        with open('../data/myanmar_wiki_entry_links.json', 'w', encoding='utf-8') as f:
            json.dump(result, f, ensure_ascii=False, indent=2)

        print("\nResults also saved to burmese_wiki_links.json")

Successfully retrieved the Wikipedia page
Found the mp-other-content div
Found 36 links starting with https://my.wikipedia.org/wiki/
Found 65 category_links starting with https://my.wikipedia.org/wiki/%E1%80%80%E1%80%8F%E1%80%B9%E1%80%8D:

===== RESULTS =====
Found 101 links in mp-other-content:

Results also saved to burmese_wiki_links.json


In [3]:
with open("../data/myanmar_wiki_entry_links.json") as file:
    myanmar_wiki_links = json.load(file)

categories = myanmar_wiki_links['category_links']
pages = myanmar_wiki_links['page_links']
categories = [[category['text']] for category in categories]

# 2. Get the pages from main entry link with url

In [4]:
base_url = "https://my.wikipedia.org/w/api.php"

In [5]:
def get_page_by_url(url):
    """
    Get a Wikipedia page by its URL

    Args:
        url (str): Full Wikipedia page URL

    Returns:
        dict: Page content information or None if not found
    """
    # Extract the page title from the URL

    # Parse the URL
    parsed_url = urllib.parse.urlparse(url)

    # Get the path component and remove the '/wiki/' prefix
    path = parsed_url.path

    if path.startswith('/wiki/'):
        # Extract the title and decode it
        page_title = urllib.parse.unquote(path.replace("/wiki/", ""))
    else:
        raise f"Link has issue: {url}"

    # Now use the API to get the page by title
    params = {
        'action': 'query',
        'titles': page_title,
        'prop': 'extracts|categories|info',
        'explaintext': True,
        'exsectionformat': 'plain',
        'inprop': 'url',
        'format': 'json',
        'redirects': True
    }

    response = requests.get(url=base_url, params=params).json()

    if 'query' in response and 'pages' in response['query']:
        # The API returns a dict with page IDs as keys
        # We don't know the page ID in advance, so we get the first (and only) page
        pages = response['query']['pages']

        # Check if the page exists (page ID -1 means it doesn't exist)
        if '-1' in pages:
            raise f"ID has issue: {url}"

        page_id = next(iter(pages))
        page_data = pages[page_id]

        return {
            'title': page_data.get('title', ''),
            'content': page_data.get('extract', ''),
            'categories': [cat['title'] for cat in page_data.get('categories', [])],
            'url': page_data.get('fullurl', ''),
            'last_modified': page_data.get('touched', ''),
        }
    else:
        raise f"Response has issue: {url}"


In [6]:
page_data = get_page_by_url("https://my.wikipedia.org/wiki/%E1%80%80%E1%80%99%E1%80%B9%E1%80%98%E1%80%AC")
df = pd.DataFrame([page_data])
df

Unnamed: 0,title,content,categories,url,last_modified
0,ကမ္ဘာဂြိုဟ်,""""" ဖြင့် နက္ခတ္တဗေဒတွင် သင်္ကေတပြုရိုးရှိသည့် ...","[ကဏ္ဍ:CS1 maint: multiple names: editors list,...",https://my.wikipedia.org/wiki/%E1%80%80%E1%80%...,2025-03-19T16:53:42Z


# 3. Get the pages and categories for a given category

In [7]:
# import pandas as pd
# import time
#
# session = requests.Session()
#
# def get_category_members(category):
#     # Add Category: prefix if not present
#     if not category.startswith('Category:'):
#         category = f"Category:{category}"
#
#     pages = []
#     subcategories = []
#
#     params = {
#         'action': 'query',
#         'list': 'categorymembers',
#         'cmtitle': category,
#         'cmtype': 'page|subcat',
#         'cmlimit': 500,
#         'format': 'json'
#     }
#
#     continuation = True
#
#     while continuation:
#         response = session.get(url=base_url, params=params).json()
#
#         if 'error' in response:
#             print(f"Error: {response['error']['info']}")
#             break
#
#         if 'query' not in response:
#             print(f"No results found for {category}")
#             break
#
#         members = response['query']['categorymembers']
#
#         for member in members:
#             ns = member['ns']
#             title = member['title']
#
#             if ns == 14:  # Namespace 14 is for categories
#                 subcategories.append(title)
#             else:
#                 pages.append({
#                     'title': title,
#                     'pageid': member['pageid']
#                 })
#
#         if 'continue' in response:
#             params['cmcontinue'] = response['continue']['cmcontinue']
#         else:
#             continuation = False
#
#     return pages, subcategories
#
# def get_page_content(page_id):
#     params = {
#         'action': 'query',
#         'prop': 'extracts|categories|info',
#         'pageids': page_id,
#         'explaintext': True,
#         'exsectionformat': 'plain',
#         'inprop': 'url',
#         'format': 'json'
#     }
#
#     response = session.get(url=base_url, params=params).json()
#
#     if 'query' in response and 'pages' in response['query']:
#         page_data = response['query']['pages'][str(page_id)]
#         return {
#             'title': page_data.get('title', ''),
#             'content': page_data.get('extract', ''),
#             'categories': [cat['title'] for cat in page_data.get('categories', [])],
#             'url': page_data.get('fullurl', ''),
#             'last_modified': page_data.get('touched', '')
#         }
#     else:
#         return None
#
# def fetch_pages_and_subcategories(category):
#     all_pages = []
#
#     pages, subcategories = get_category_members(category[-1])
#
#     print("Pages:", len(pages))
#     print("Subcategories:", len(subcategories))
#
#     # Process pages
#     for page in pages:
#         print(f"Fetching page: {page['title']}")
#         page_data = get_page_content(page['pageid'])
#
#         if page_data and page_data['content']:
#             page_data['source_category'] = category
#             all_pages.append(page_data)
#
#         # Be gentle with the API
#         time.sleep(1)
#
#     subcategories = [subcategory.replace("ကဏ္ဍ:", "") for subcategory in subcategories]
#
#     # Convert to DataFrame
#     if all_pages:
#         return pd.DataFrame(all_pages), subcategories
#     else:
#         return pd.DataFrame(columns=['title', 'content', 'categories', 'url', 'last_modified', 'source_category']), subcategories

In [56]:
import random
from tqdm import tqdm
import pandas as pd
import time
import multiprocessing
from functools import partial
import logging

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

session = requests.Session()

def get_category_members(category):
    # Add Category: prefix if not present
    if not category.startswith(('Category:', 'ကဏ္ဍ:')):
        category = f"Category:{category}"

    pages = []
    subcategories = []

    params = {
        'action': 'query',
        'list': 'categorymembers',
        'cmtitle': category,
        'cmtype': 'page|subcat',
        'cmlimit': 500,
        'format': 'json'
    }

    continuation = True



    max_retries = 100
    response = None
    for attempt in range(max_retries):
        if not continuation:
            return pages, subcategories

        try:
            while continuation:
                response = session.get(url=base_url, params=params).json()

                if 'error' in response:
                    print(f"Error: {response['error']['info']}")
                    break

                if response is None:
                    print(f"Timeout: {category}")
                    break

                if 'query' not in response:
                    print(f"No results found for {category}")
                    break

                members = response['query']['categorymembers']

                for member in members:
                    ns = member['ns']
                    title = member['title']

                    if ns == 14:  # Namespace 14 is for categories
                        subcategories.append(title)
                    else:
                        pages.append({
                            'title': title,
                            'pageid': member['pageid']
                        })

                if 'continue' in response:
                    params['cmcontinue'] = response['continue']['cmcontinue']
                else:
                    continuation = False

        except (requests.exceptions.RequestException, ValueError) as e:
            wait_time = (attempt + 1) * 5  # Increasing backoff
            logger.warning(f"Error fetching category {category}. Retrying in {wait_time}s... ({attempt+1}/{max_retries})")
            time.sleep(wait_time)

def get_page_content(page_id, session=None):
    # Create a new session if none is provided
    local_session = session or requests.Session()

    params = {
        'action': 'query',
        'prop': 'extracts|categories|info',
        'pageids': page_id,
        'explaintext': True,
        'exsectionformat': 'plain',
        'inprop': 'url',
        'format': 'json'
    }

    max_retries = 100
    for attempt in range(max_retries):
        try:
            response = local_session.get(url=base_url, params=params).json()
            if 'query' in response and 'pages' in response['query']:
                page_data = response['query']['pages'][str(page_id)]
                return {
                    'title': page_data.get('title', ''),
                    'content': page_data.get('extract', ''),
                    'categories': [cat['title'] for cat in page_data.get('categories', [])],
                    'url': page_data.get('fullurl', ''),
                    'last_modified': page_data.get('touched', '')
                }
            else:
                return None
        except (requests.exceptions.RequestException, ValueError) as e:
            wait_time = (attempt + 1) * 5  # Increasing backoff
            logger.warning(f"Error fetching page {page_id}: {e}. Retrying in {wait_time}s... ({attempt+1}/{max_retries})")
            time.sleep(wait_time)

def process_page(page, category, session=None):
    page_data = get_page_content(page['pageid'], session)

    if page_data and page_data['content']:
        page_data['source_category'] = category
        return page_data
    return None

def process_batch(batch, category):
    """Process a batch of pages"""
    results = []
    for page in batch:
        result = process_page(page, category)
        if result:
            results.append(result)
    return results

def fetch_pages_and_subcategories(category):
    _, categories = get_category_info(category[-1])
    pages, subcategories = get_category_members(category[-1])

    if len(pages) == 0:
        return pd.DataFrame(columns=['title', 'content', 'categories', 'url', 'last_modified', 'source_category']), subcategories + categories

    num_processes = 10
    batch_size = min(num_processes, len(pages))

    # num_cores = multiprocessing.cpu_count()


    logger.info(f"Category: {category[-1]}, Pages: {len(pages)}, Subcategories: {len(subcategories)}, Categories: {len(categories)}")

    all_results = []
    total_batches = (len(pages) + batch_size - 1) // batch_size
    for i in tqdm(range(0, len(pages), batch_size), total=total_batches, desc="Processing batches"):
        batch = pages[i:i+batch_size]

        # Create a pool for each batch to ensure fresh connections
        with multiprocessing.Pool(processes=num_processes) as pool:
            # Create a partial function with fixed arguments
            process_page_partial = partial(process_page, category=category, session=session)

            # Process the batch
            results = pool.map(process_page_partial, batch)
            all_results.extend(results)

        # Filter out None results
    all_pages = [result for result in all_results if result is not None]

    # Convert to DataFrame
    if all_pages:
        return pd.DataFrame(all_pages), subcategories + categories
    else:
        return pd.DataFrame(columns=['title', 'content', 'categories', 'url', 'last_modified', 'source_category']), subcategories + categories


0
([], ['ကဏ္ဍ:ဒဿနိကဗေဒ ပညာရေး'])


In [35]:
def get_page_redirect_title(title):
    # Now use the API to get the page by title
    params = {
        'action': 'query',
        'titles': title,
        'prop': 'extracts|categories|info',
        'explaintext': True,
        'exsectionformat': 'plain',
        'inprop': 'url',
        'format': 'json',
        'redirects': True
    }

    max_retries = 100

    for attempt in range(max_retries):
        try:
            response = requests.get(url=base_url, params=params).json()

            if 'query' in response and 'pages' in response['query']:
                # The API returns a dict with page IDs as keys
                # We don't know the page ID in advance, so we get the first (and only) page
                pages = response['query']['pages']

                page_id = next(iter(pages))
                page_data = pages[page_id]

                return page_data.get('title', '')
        except Exception as e:
            wait_time = (attempt + 1) * 5  # Increasing backoff
            logger.warning(f"Error fetching category {category}. Retrying in {wait_time}s... ({attempt+1}/{max_retries})")
            time.sleep(wait_time)

In [64]:
def get_category_info(category):
    """
    Get details about a category including its normalized name and parent categories
    Returns a tuple of (normalized_name, parent_categories)
    """
    # Add Category: prefix if not present
    if not category.startswith(('Category:', 'ကဏ္ဍ:')):
        category = f"Category:{category}"

    params = {
        'action': 'query',
        'titles': category,
        'prop': 'categories',
        'cllimit': 500,
        'format': 'json'
    }

    try:
        response = session.get(url=base_url, params=params).json()

        # Extract the normalized "to" name
        normalized_name = None
        if 'normalized' in response.get('query', {}):
            for norm in response['query']['normalized']:
                if norm.get('from') == category:
                    normalized_name = norm.get('to')
                    break

        # If no normalization happened, use the original category
        if normalized_name is None:
            normalized_name = category

        # Extract parent categories
        parent_categories = []
        if 'pages' in response.get('query', {}):
            pages = response['query']['pages']

            for page_id, page_info in pages.items():
                if 'categories' in page_info:
                    for cat in page_info['categories']:
                        parent_categories.append(cat['title'])

        return normalized_name, parent_categories

    except Exception as e:
        logger.error(f"Error getting category info: {str(e)}")
        return category, []

# Example usage
to_name, categories = get_category_info('ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း')
print(f"Normalized name: {to_name}")
print(f"Parent categories: {categories}")

Normalized name: ကဏ္ဍ:ဒဿနိကဗေဒနှင့် လူ့အဖွဲ့အစည်း
Parent categories: ['ကဏ္ဍ:ဒဿနိကဗေဒ', 'ကဏ္ဍ:လူ့အဖွဲ့အစည်း']


In [70]:
def get_category_redirect_name(category):
    """
    Get details about a category including its normalized name and parent categories
    Returns a tuple of (normalized_name, parent_categories)
    """
    # Add Category: prefix if not present
    if not category.startswith(('Category:', 'ကဏ္ဍ:')):
        category = f"Category:{category}"

    params = {
        'action': 'query',
        'titles': category,
        'prop': 'categories',
        'cllimit': 500,
        'format': 'json',
        "redirect": True
    }
    max_retries = 100
    for attempt in range(max_retries):
        try:
            response = session.get(url=base_url, params=params).json()

            # Extract the normalized "to" name
            normalized_name = None
            if 'normalized' in response.get('query', {}):
                for norm in response['query']['normalized']:
                    if norm.get('from') == category:
                        normalized_name = norm.get('to')
                        break

            # If no normalization happened, use the original category
            if normalized_name is None:
                normalized_name = category

            return normalized_name
        except Exception as e:
            wait_time = (attempt + 1) * 5  # Increasing backoff
            logger.error(f"Error getting category redirect info: {str(e)}")
            time.sleep(wait_time)

In [71]:
get_page_redirect_title("Category:ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း")

'ကဏ္ဍ:ဒဿနိကဗေဒနှင့် လူ့အဖွဲ့အစည်း'

In [72]:
get_category_redirect_name("Category:ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း")

'ကဏ္ဍ:ဒဿနိကဗေဒနှင့် လူ့အဖွဲ့အစည်း'

In [73]:
get_category_redirect_name("Category:မြန်မာ့ သမိုင်း")

'ကဏ္ဍ:မြန်မာ့ သမိုင်း'

In [60]:
base_url = "https://my.wikipedia.org/w/api.php"

params = {
    'action': 'query',
    'titles': "Category:ဒဿနိကဗေဒ",
    'prop': 'categories',  # This specifically asks for categories
    'cllimit': 500,        # Maximum number of categories to return
    'format': 'json'
}

categories = []
continuation = True
max_retries = 5
response = requests.get(url=base_url, params=params).json()
print(response)


{'batchcomplete': '', 'query': {'normalized': [{'from': 'Category:ဒဿနိကဗေဒ', 'to': 'ကဏ္ဍ:ဒဿနိကဗေဒ'}], 'pages': {'17429': {'pageid': 17429, 'ns': 14, 'title': 'ကဏ္ဍ:ဒဿနိကဗေဒ', 'categories': [{'ns': 14, 'title': 'ကဏ္ဍ:ပင်မ အကြောင်းအရာ ခွဲခြားရေးများ'}]}}}}


In [62]:
response['query']['pages']

{'17429': {'pageid': 17429,
  'ns': 14,
  'title': 'ကဏ္ဍ:ဒဿနိကဗေဒ',
  'categories': [{'ns': 14, 'title': 'ကဏ္ဍ:ပင်မ အကြောင်းအရာ ခွဲခြားရေးများ'}]}}

In [75]:
from functools import partial
import multiprocessing

In [78]:
# Use multiprocessing Pool to process categories in parallel
with multiprocessing.Pool(processes=10) as pool:
    # Use tqdm to show a progress bar
    results = list(tqdm(
        pool.imap(get_page_redirect_title, ['Category:မြန်မာ့ သမိုင်း', "ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း"]),
        total=len(['ကဏ္ဍ:မြန်မာနိုင်ငံ၏ သမိုင်း', 'ကဏ္ဍ:ဒဿနိကဗေဒနှင့် လူ့အဖွဲ့အစည်း']),
        desc="Processing categories"
    ))

Processing categories: 100%|██████████| 2/2 [00:00<00:00,  2.84it/s]


In [79]:
results

['ကဏ္ဍ:မြန်မာနိုင်ငံ၏ သမိုင်း', 'ဒဿနိကဗေဒနှင့် လူ့အဖွဲ့အစည်း']

In [68]:
get_page_redirect_title("Category:မြန်မာ့ သမိုင်း")

'ကဏ္ဍ:မြန်မာနိုင်ငံ၏ သမိုင်း'

In [46]:
get_category_members("ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း")

0
[{'pageid': 82542, 'ns': 14, 'title': 'ကဏ္ဍ:ဒဿနိကဗေဒ ပညာရေး'}]
1
[] ['ကဏ္ဍ:ဒဿနိကဗေဒ ပညာရေး']


([], ['ကဏ္ဍ:ဒဿနိကဗေဒ ပညာရေး'])

In [38]:
df, subcategoreis = fetch_pages_and_subcategories(category=["ဒဿနိကဗေဒနှင့်_လူ့အဖွဲ့အစည်း"])

In [39]:
subcategoreis

['ကဏ္ဍ:ဒဿနိကဗေဒ ပညာရေး']

# 4. Manual Verification of Entry Page Links and Categories Count

In [12]:
manual_verification_links = """
မြန်မာနိုင်ငံ
စီးပွားရေး • နိုင်ငံရေး • ပညာရေး • ပထဝီဝင် • ဘာသာစကားများ • ယဉ်ကျေးမှု • လူမျိုးများ • သမိုင်း • အတ္ထုပ္ပတ္တိ • အုပ်ချုပ်ရေး



ကျန်းမာရေး
ပြည်သူ့ကျန်းမာရေး • ဆေးပညာ • ဆေးများ • ရောဂါ


ဘာသာရေး
ခရစ်ယာန် • ဂျိန်း • ဂျူး • ဆစ်ခ် • ဗုဒ္ဓ • ဟိန္ဒူ • အစ္စလာမ်


သိပ္ပံ
ဇီဝဗေဒ • ဓာတုဗေဒ • နက္ခတ္တဗေဒ • ဘူမိဗေဒ • ရုက္ခဗေဒ • ရူပဗေဒ • သတ္တဗေဒ


ယဉ်ကျေးမှု
ကဗျာ • ဂီတ • ဂိမ်းများ • စာပေ • ဖျော်ဖြေရေး • ရုပ်ရှင် • ပန်းချီ • ပြတိုက်များ • ပွဲတော်များ • ဗိသုကာ • ဘာသာစကား • အနုပညာ • အားကစား


နည်းပညာ
ကွန်ပျူတာ • ဆက်သွယ်ရေး • ဆော့ဖ်ဝဲလ် • သယ်ယူပို့ဆောင်ရေး • အီလက်ထရွန်းနစ် • အင်ဂျင်နီယာ


ပထဝီဝင်
ဂြိုဟ် • ကမ္ဘာ • တိုက် • (တောင်အမေရိက • မြောက်အမေရိက • အာဖရိက • အာရှ • အန္တာတိက • ဥရောပ • ဩစတြေးလျ) • အိုရှန်းနီးယား • တောင်များ • နိုင်ငံများ • ပင်လယ်  • မြစ်များ • မြို့များ • ရေထု • ရေကန်များ


လူ့အဖွဲ့အစည်း
မနုဿဗေဒ • ပညာရေး • (ကျောင်း • ကောလိပ် • တက္ကသိုလ်) • လူ့ဘဝ • မိသားစု • သဘာဝတ္ထဗေဒ • ဒဿနပညာ • ဘာသာဗေဒ • ဥပဒေပညာ • လူမျိုး • လက်ထပ်ထိမ်းမြားခြင်း • နိုင်ငံရေး


သမိုင်း
ရှေးဟောင်းသုတေသနပညာ • ကျောက်ခေတ် • ရေခဲခေတ် • သံခေတ် • ရှေးဟောင်းအီဂျစ် • အမှောင်ခေတ် • အလယ်ခေတ် • ရီနေဆွန်းခေတ် • ဗြိတိသျှအင်ပါယာ • ပထမကမ္ဘာစစ် • ဒုတိယကမ္ဘာစစ် • မြန်မာ့သမိုင်း • စစ်အေးတိုက်ပွဲ

"""

len([link for link in manual_verification_links.split() if link != '•'])

101

101