In [None]:
# Import some libraries
import numpy as np  # useful for many scientific computing in Python
import pandas as pd # primary data structure library

In [None]:
# Function to clean up the chat string
def cleanup(input):
  if input[1] != "+":                         # If 2nd character is not "+"
    return ""                                 # Just return a blank string since it doesn't contain a phone number
  retVal = ""                                 # Otherwise, start with a blank string
  for char in input:                          # Loop through each character
    if char in ["\xa0"]:                      # Is it the unicode space?
      retVal += " "                           # Yes, then replace with an actual space
    elif char not in ["\u202a", "\u202c"]:    # It is some other weird character?
      retVal += char                          # No, then append to the final string
  return retVal                               # Return the cleaned string

In [None]:
# Function to return the country code or US/Canada area code
def getCode(phone):
  if "(" in phone:                                        # Does input string have "("
    return phone[phone.index("(") + 1:phone.index(")")]   # Yes, return the area code in between the parenthesis
  return phone[:phone.index(" ")]                         # No, return the country code, including the "+", before the first space

In [None]:
# Load the chat logs and process it line by line
filename='chat.txt'

phones = []                                 # Create a new/blank list

# Now open the file and read it
with open(filename, 'r') as file:           # Open the file in read-only mode
  while line := file.readline():            # Loop though the file line by line
    if 'joined' in line:                    # If the line contains "joined", use it
      extract = line.split(":", 4)          # Split the line up to the 4th ":" character
      if len(extract) >= 3:                 # Found more than 3 items?
        phone = extract[2].split("]")       # Yes, then extract the phone number from the 3rd element
        if len(phone) > 1:                  # Extraction found something?
          stripped = phone[1].strip()       # Yes, strip whitespaces from left and right
          cleaned = cleanup(stripped)       # Clean up the phone number
          if len(cleaned) > 0:              # Cleaned up properly?
            area_code = getCode(cleaned)    # Yes, extract the area/country code
            phones.append(area_code)        # Add the phone to the set

In [None]:
# Output the number of values
len(phones)

122

In [None]:
df = pd.DataFrame(phones)                 # Convert the phone set to a list then to a Pandas DataFrame
df.columns = ['Code']                     # Set the first column to "Code"
df.sort_values('Code', inplace=True)      # Sort the codes in place
df.reset_index(inplace=True, drop=True)   # Reset the index

In [None]:
# Display partial results
df.head(10)

Unnamed: 0,Code
0,223
1,233
2,31
3,33
4,34
5,34
6,39
7,39
8,41
9,41


In [None]:
df.shape

(122, 1)

In [None]:
df.dtypes

Code    object
dtype: object

In [None]:
df['Code'].value_counts()

+52     13
+971     7
703      7
+57      5
+966     3
        ..
407      1
+233     1
438      1
440      1
949      1
Name: Code, Length: 74, dtype: int64

In [None]:
# Save the data
df.to_csv("chat_area_codes_from_members.csv", index=False)