# Chapter 08 iPython Sessions - Part IV

## Pandas, Regex and Data Munging 

In [1]:
import pandas as pd

In [3]:
# Data Validation
zips = pd.Series({'Boston': '02215', 'Miami': '3310'})

In [4]:
zips

Boston    02215
Miami      3310
dtype: object

In [5]:
zips.str.match(r'\d{5}')

Boston     True
Miami     False
dtype: bool

In [6]:
# check if cities have State 
cities = pd.Series(['Boston, MA 02215', 'Miami FL 33101'])

In [7]:
cities.str.contains(r' [A-Z]{2} ')

0    True
1    True
dtype: bool

In [8]:
cities.str.match(r' [A-Z]{2} ')

0    False
1    False
dtype: bool

In [9]:
# Reformatting Data
contacts = [['Mike Green', 'demo1@deitel.com', '5555555555'],
            ['Sue Brown', 'demo2@deitel.com', '5555551234']]

In [10]:
contacts_df = pd.DataFrame(contacts, columns=['Name', 'Email', 'Phone'])

In [11]:
contacts_df

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,5555555555
1,Sue Brown,demo2@deitel.com,5555551234


In [12]:
import re

In [18]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [19]:
formatted_phone = contacts_df['Phone'].map(get_formatted_phone)

In [20]:
formatted_phone

0    555-555-5555
1    555-555-1234
Name: Phone, dtype: object

In [21]:
contacts_df.Phone = formatted_phone

In [22]:
contacts_df

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,555-555-5555
1,Sue Brown,demo2@deitel.com,555-555-1234


In [24]:
contacts_df = pd.DataFrame(contacts, columns=['Name', 'Email', 'Phone'])

In [30]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    if result:
        prefix, number, suffix = result.groups()
        return f'({prefix}) {number}-{suffix}'
    else:
        return value

In [31]:
formatted_phone = contacts_df.Phone.map(get_formatted_phone)

In [32]:
formatted_phone

0    (555) 555-5555
1    (555) 555-1234
Name: Phone, dtype: object