<a href="https://colab.research.google.com/github/cinegemadar/colab/blob/main/phonebook_sync.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from lxml import html
import requests
from requests.auth import HTTPBasicAuth
from getpass import getpass
import csv


In [None]:
#@title  { run: "auto" }

user = 'dbalogh' #@param {type:"string"}
passwd = getpass("password:")

password:··········


In [None]:
page = requests.get(
    "https://trac.cae-engineering.hu/misc/wiki/WhoIsWho",
    auth=HTTPBasicAuth(user, passwd),
)
assert page.status_code == 200, "Cannot connect to WhoIsWho page."


In [16]:
def getNames(page):
  '''
  Returns a list of empolyee names.
  '''
  return page.xpath(
      '//*[@id="wikipage"]/table[1]/tr/td[2]/strong/a/text()'
      )


In [15]:
def getMobiles(page):
  '''
  Returns a list of empolyee mobile numbers. Where mobile number is not available
  the value will be 'None'. Where multiple mobile numbers are available, returns the 
  first one.
  '''
  # Employee details is a subset of the following information:
  # mobile: ... 
  # e-mail address 
  # extension, etc.
  # None of these are mandatory.
  employee_details_list = [
                           e.xpath('text()') for e in page.xpath(
                               '//*[@id="wikipage"]/table[1]/tr/td[2]'
                               )
                           ]
  # Extract mobile numbers from employee details list, where available.
  mobiles = []
  for employee_details in employee_details_list: # Check for mobile no in employee details.
    for detail in employee_details:
      data = str(detail)
      if data.startswith("mobile:"):
        # in case multiple mobile numbers are available.
        (mobile_no, *_) = data[8:].split(",") # unpack first mobile no, drop the rest.
        mobiles.append("".join(c for c in mobile_no.strip() if c.isdigit() or c == '+'))
        break
    else: # nobreak
      mobiles.append(None)
  return mobiles

In [29]:
def isValidLength(mobileNo, expectedLength = 8):
  '''
  Returns True is mobile number has a valid length.
  '''
  return mobileNo and len(mobileNo) > expectedLength 


def setMobilePrefix(mobileNo):
  '''
  Inplace replace the given mobile number with prefix version.
  Throws ValueError if cannot add prefix.
  '''

  # Check if mobile number is valid already.
  prefix_map = {
      ("0036","+36")   : mobileNo,          # No change, valid phone number.
      ("20","30","70") : f"+36{mobileNo}",  # Add '+36' prefix.
      ("36")           : f"+{mobileNo}"     # Add '+' prefix.
  }

  for prefix_family in prefix_map:
    for prefix in prefix_family:
      if mobileNo.startswith(prefix):
        return prefix_map[prefix_family]

  # If cannot fix prefix: rais an error.
  raise ValueError(f"Unknown mobile number format: {mobileNo}")


In [19]:
tree = html.fromstring(page.content)
full_phonebook = dict(zip(getNames(tree), getMobiles(tree)))

In [30]:
valid_phonebook = {}
for key, value in full_phonebook.items():
  if isValidLength(value):
    try:
      valid_phonebook[key] = setMobilePrefix(value)
    except ValueError:
      continue

with open("CAEPhonebook.csv", "w") as phonebook:
  writer = csv.writer(phonebook)
  writer.writerow(["Name","Phone"])
  for name, phone in valid_phonebook.items():
    writer.writerow([name, phone])