In [2]:
from requests import get
from bs4 import BeautifulSoup, NavigableString

In [3]:
mmcmnc_url_source = "https://en.wikipedia.org/wiki/Mobile_country_code"

In [237]:
def parse_operators(operators_tag):
    rows = operators_tag.find_all('tr')[1:]
    operators = []
    for row in rows:
        def contents(t):
            print(t)
            ''.join([p.string for p in t.contents if len(t.contents) > 0])
        MCC, MNC, Brand, Operator, Status, Bands, _ = [''.join(td.strings) for td in row.find_all('td')]
        Brand = Brand if Brand is not None else 'Unknown'
        Operator = Operator if Operator is not None else 'Unknown'
        operators.append((MCC, MNC, Brand, Operator, Status, Bands))
    return operators

In [238]:
def parse_table(table_tag):
    operators_table = table_tag.find_next('table', {'class': 'wikitable'})
    return parse_operators(operators_table)

In [239]:
def print_operators(nation, opers):
    for MCC, MNC, Brand, Operator, Status, Bands in opers:
        print(f'{nation}, {MCC}, {MNC}, {Brand}, {Operator}, {Status}, {Bands}')     

In [240]:
def parse_nation(nation_tag):
    Nation = nation_tag.a.string
    opers = parse_table(nation_tag)
    k = ["Nation", "MCC", "MNC", "Brand", "Operator", "Status", "Bands"]
    return [ dict([(k, v) for k, v in zip(k, (Nation, *oper))]) for oper in opers ]


In [241]:
def parse_intl(intl_tag):
    Nation = intl_tag.span['id']
    intls = parse_table(intl_tag)
    k = ["Nation", "MCC", "MNC", "Brand", "Operator", "Status", "Bands"]
    return [ dict([(k, v) for k, v in zip(k, (Nation, *oper))]) for oper in intls ]

In [10]:
mmcmnc_page = get(mmcmnc_url_source)

In [11]:
html = BeautifulSoup(mmcmnc_page.content, 'html.parser')

In [90]:
nations = html.find_all('h4')

In [136]:
tables = html.find_all('table', {'class': 'wikitable'})[1:]

In [139]:
tables

[<table class="wikitable" width="100%">
 <tbody><tr>
 <th width="4%">MCC</th>
 <th width="4%">MNC</th>
 <th width="12%">Brand</th>
 <th width="22%">Operator</th>
 <th width="10%">Status</th>
 <th width="28%">Bands (MHz)</th>
 <th width="20%">References and notes
 </th></tr>
 <tr>
 <td>289</td>
 <td>67</td>
 <td>Aquafon</td>
 <td>Aquafon JSC</td>
 <td>Operational</td>
 <td>GSM 900 / GSM 1800 / UMTS 2100 / LTE 800</td>
 <td>MCC is not listed by ITU;<sup class="reference" id="cite_ref-itu_ob_1117_3-0"><a href="#cite_note-itu_ob_1117-3">[3]</a></sup> LTE band 20<sup class="reference" id="cite_ref-gsa_lte_4-0"><a href="#cite_note-gsa_lte-4">[4]</a></sup>
 </td></tr>
 <tr>
 <td>289</td>
 <td>88</td>
 <td>A-Mobile</td>
 <td>A-Mobile LLSC</td>
 <td>Operational</td>
 <td>GSM 900 / GSM 1800 / UMTS 2100 / LTE 800 / LTE 1800</td>
 <td>MCC is not listed by ITU<sup class="reference" id="cite_ref-itu_ob_1117_3-1"><a href="#cite_note-itu_ob_1117-3">[3]</a></sup>
 </td></tr></tbody></table>, <table cla

In [187]:
for table in tables:
    for row in table.find_all('tr'):
        for col in row.find_all('td'):
            if col is not None:
                if col.string == 'au':
                    print([''.join([p.string for p in td.contents]) for td in row.find_all('td')])
            

['440', '50', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE 700 / LTE 850 / LTE 1500 / LTE 2100 / TD-LTE 3500', 'bands 1, 11, 26 (18), 28, 42[148]\n']
['440', '51', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE 700 / LTE 850 / LTE 1500 / LTE 2100 / TD-LTE 3500', 'bands 1, 11, 26 (18), 28, 42[148]\n']
['440', '52', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE 700 / LTE 850 / LTE 1500 / LTE 2100 / TD-LTE 3500', 'bands 1, 11, 26 (18), 28, 42[148]\n']
['440', '53', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE 700 / LTE 850 / LTE 1500 / LTE 2100 / TD-LTE 3500', 'bands 1, 11, 26 (18), 28, 42[148]\n']
['440', '54', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE 700 / LTE 850 / LTE 1500 / LTE 2100 / TD-LTE 3500', 'bands 1, 11, 26 (18), 28, 42[148]\n']
['440', '70', 'au', 'KDDI Corporation', 'Operational', 'CDMA2000 850 / CDMA2000 2100 / LTE

In [242]:
MCCs = []
for nation in nations:
    for operator in parse_nation(nation):
        MCCs.append(operator)

In [226]:
tag = '<td><a href="/wiki/Colo-Colo" title="Colo-Colo">Colo-Colo</a> Móvil<br/><a class="extiw" href="https://es.wikipedia.org/wiki/Santiago_Wanderers" title="es:Santiago Wanderers">Wanderers</a> Móvil</td>'

In [231]:
t = BeautifulSoup(tag, 'lxml').td

In [236]:
t

<td><a href="/wiki/Colo-Colo" title="Colo-Colo">Colo-Colo</a> Móvil<br/><a class="extiw" href="https://es.wikipedia.org/wiki/Santiago_Wanderers" title="es:Santiago Wanderers">Wanderers</a> Móvil</td>

In [235]:
''.join(t.strings)

'Colo-Colo MóvilWanderers Móvil'

In [243]:
print(MCCs)

[{'Nation': 'Abkhazia', 'MCC': '289', 'MNC': '67', 'Brand': 'Aquafon', 'Operator': 'Aquafon JSC', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 800'}, {'Nation': 'Abkhazia', 'MCC': '289', 'MNC': '88', 'Brand': 'A-Mobile', 'Operator': 'A-Mobile LLSC', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 800 / LTE 1800'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '01', 'Brand': 'AWCC', 'Operator': 'Afghan Wireless Communication Company', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 1800'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '20', 'Brand': 'Roshan', 'Operator': 'Telecom Development Company Afghanistan Ltd.', 'Status': 'Operational', 'Bands': 'GSM 900 / UMTS'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '40', 'Brand': 'MTN', 'Operator': 'MTN Group Afghanistan', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '50', 'Brand': 'Etisala

In [130]:
intl_operators = nation.find_all_next('h2')[0]
for operator in parse_intl(intl_operators):
    MCCs.append(operator)
print(MCCs)

[{'Nation': 'Abkhazia', 'MCC': '289', 'MNC': '67', 'Brand': 'Aquafon', 'Operator': 'Aquafon JSC', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 800'}, {'Nation': 'Abkhazia', 'MCC': '289', 'MNC': '88', 'Brand': 'A-Mobile', 'Operator': 'A-Mobile LLSC', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 800 / LTE 1800'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '01', 'Brand': 'AWCC', 'Operator': 'Afghan Wireless Communication Company', 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 1800'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '20', 'Brand': 'Roshan', 'Operator': 'Telecom Development Company Afghanistan Ltd.', 'Status': 'Operational', 'Bands': 'GSM 900 / UMTS'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '40', 'Brand': 'MTN', 'Operator': None, 'Status': 'Operational', 'Bands': 'GSM 900 / GSM 1800 / UMTS 2100'}, {'Nation': 'Afghanistan', 'MCC': '412', 'MNC': '50', 'Brand': 'Etisalat', 'Operator': 'Et

In [129]:
[(m['MCC'], m['MNC'], m['Bands']) for m in MCCs
 if m['Nation'] == 'United States of America'
 and m['Brand'] == 'AT&T'
 and (
     m['Status'] == 'Unknown'
     or m['Status'] == 'Operational')]

[('310', '030', 'GSM 850'),
 ('310', '070', 'GSM 850'),
 ('310', '080', 'GSM 1900'),
 ('310', '090', 'GSM 1900'),
 ('310', '150', 'GSM 850 / UMTS 850 / UMTS 1900'),
 ('310', '170', 'GSM 1900'),
 ('310', '410', 'GSM 850 / GSM 1900 / UMTS 850 / UMTS 1900'),
 ('310', '670', 'Unknown'),
 ('310', '680', 'GSM 850 / GSM 1900'),
 ('310', '950', 'GSM 850'),
 ('311', '070', 'GSM 850'),
 ('311', '090', 'GSM 1900'),
 ('311', '190', 'Unknown'),
 ('312', '090', 'Unknown'),
 ('312', '680', 'Unknown'),
 ('313', '210', 'Unknown')]

In [None]:
[(m['Operator'], m['MCC'], m['MNC'], m['Bands']) for m in MCCs
 if m['Nation'] == 'United States of America'
 and 'LTE' in m['Bands']]

In [97]:
set([m['Nation'] for m in MCCs])

{'Abkhazia',
 'Afghanistan',
 'Albania',
 'Algeria',
 'American Samoa',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Ascension Island',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bonaire, Saba, Sint Eustatius',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'British Indian Ocean Territory',
 'British Virgin Islands',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Christmas Island',
 'Cocos Islands',
 'Colombia',
 'Comoros',
 'Congo',
 'Cook Islands',
 'Costa Rica',
 'Croatia',
 'Cuba',
 'Curaçao',
 'Cyprus',
 'Czech Republic',
 'Democratic Republic of the Congo',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'East Timor',
 'Ecuador',
 'Egypt',
 'El 

In [245]:
[m for m in MCCs if m['Nation'] == 'Slovakia']

[{'Nation': 'Slovakia',
  'MCC': '231',
  'MNC': '01',
  'Brand': 'Orange',
  'Operator': 'Orange Slovensko',
  'Status': 'Operational',
  'Bands': 'GSM 900 / GSM 1800 / UMTS 900 / UMTS 2100 / LTE 800 / LTE 2600'},
 {'Nation': 'Slovakia',
  'MCC': '231',
  'MNC': '02',
  'Brand': 'Telekom',
  'Operator': 'Slovak Telekom',
  'Status': 'Operational',
  'Bands': 'GSM 900 / GSM 1800 / UMTS 2100 / LTE 800 / LTE 1800 / LTE 2600 / TD-LTE 3700'},
 {'Nation': 'Slovakia',
  'MCC': '231',
  'MNC': '03',
  'Brand': '4ka',
  'Operator': 'SWAN Mobile, a.s.',
  'Status': 'Operational',
  'Bands': 'LTE 1800 / TD-LTE 3500 / TD-LTE 3700'},
 {'Nation': 'Slovakia',
  'MCC': '231',
  'MNC': '04',
  'Brand': 'Telekom',
  'Operator': 'Slovak Telekom',
  'Status': 'Operational',
  'Bands': 'GSM 900 / GSM 1800 / UMTS 2100'},
 {'Nation': 'Slovakia',
  'MCC': '231',
  'MNC': '05',
  'Brand': 'Orange',
  'Operator': 'Orange Slovensko',
  'Status': 'Operational',
  'Bands': 'GSM 900 / GSM 1800 / UMTS 900 / UMTS 21