In [1]:
import numpy as np
import pandas as pd
import math 
import re

## Philippine Phone Numbers

You receive a messy DataFrame of phone numbers in a column called "raw_num". 
Your task: create a clean "mobile" column containing 11-digit PH mobile numbers using these rules:

**Valid formats allowed in input:**
1. "0917-123-4567"
2. "+63 917 123 4567"
3. "(+63)9171234567"
4. "9171234567"
5. "random text 09171234567 more text"

**Cleaning rules:**
1. Remove country code +63 or (+63) if present, replacing it with leading 0.
2. Strip all non-digits using regex.
3. After cleaning, final string must be exactly 11 digits and start with '09'.
4. Otherwise, set it to NaN.

In [2]:
# this code was generated with ChatGPT
df = pd.DataFrame({
    "raw_num": [
        "0917-123-4567",
        "+63 995 444 3322",
        "hello 09181234567 bye",
        "639171234567",
        "(+63)9171234567",
        "+++63-928-111-2233",
        "917 777 8899",
        "random text",
        "2024",
        "+63(910)555-6677"
    ]
})

print(df)

                 raw_num
0          0917-123-4567
1       +63 995 444 3322
2  hello 09181234567 bye
3           639171234567
4        (+63)9171234567
5     +++63-928-111-2233
6           917 777 8899
7            random text
8                   2024
9       +63(910)555-6677


In [3]:
def get_phone_number(raw_num):
    if pd.isna(raw_num):
        return np.nan

    text = str(raw_num)

    # remove nondigits
    digits = re.sub(r'\D', '', text)

    # remove country code
    if digits.startswith('63') and len(digits) == 12:
        digits = '0' + digits[2:]

    # add missing zeros
    if digits.startswith('9') and len(digits) == 10:
        digits = '0' + digits

    # 11 digits and starts with 09
    if len(digits) == 11 and digits.startswith('09'):
        return digits
        
    return np.nan

In [4]:
df['mobile'] = df['raw_num'].apply(get_phone_number)
df

Unnamed: 0,raw_num,mobile
0,0917-123-4567,9171234567.0
1,+63 995 444 3322,9954443322.0
2,hello 09181234567 bye,9181234567.0
3,639171234567,9171234567.0
4,(+63)9171234567,9171234567.0
5,+++63-928-111-2233,9281112233.0
6,917 777 8899,9177778899.0
7,random text,
8,2024,
9,+63(910)555-6677,9105556677.0
