# 5.13 Intro to Data Science: Pandas, Regular Expressions and Data Munging 
### Cleaning Your Data 
### Data Validation

In [1]:
import pandas as pd

In [2]:
zips = pd.Series({'Boston': '02215', 'Miami': '3310'})

In [3]:
zips

Boston    02215
Miami      3310
dtype: object

In [4]:
zips.str.match(r'\d{5}')

Boston     True
Miami     False
dtype: bool

In [5]:
cities = pd.Series(['Boston, MA 02215', 'Miami, FL 33101'])

In [6]:
cities

0    Boston, MA 02215
1     Miami, FL 33101
dtype: object

In [7]:
cities.str.contains(r' [A-Z]{2} ')

0    True
1    True
dtype: bool

In [8]:
cities.str.match(r' [A-Z]{2} ')

0    False
1    False
dtype: bool

### Reformatting Your Data

In [9]:
contacts = [['Mike Green', 'demo1@deitel.com', '5555555555'],
            ['Sue Brown', 'demo2@deitel.com', '5555551234']]

In [10]:
contactsdf = pd.DataFrame(contacts, 
                          columns=['Name', 'Email', 'Phone'])

In [11]:
contactsdf

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,5555555555
1,Sue Brown,demo2@deitel.com,5555551234


In [12]:
import re

In [13]:
def get_formatted_phone(value):
    result = re.fullmatch(r'(\d{3})(\d{3})(\d{4})', value)
    return '-'.join(result.groups()) if result else value

In [14]:
formatted_phone = contactsdf['Phone'].map(get_formatted_phone)

In [15]:
formatted_phone

0    555-555-5555
1    555-555-1234
Name: Phone, dtype: object

In [16]:
contactsdf['Phone'] = formatted_phone

In [17]:
contactsdf

Unnamed: 0,Name,Email,Phone
0,Mike Green,demo1@deitel.com,555-555-5555
1,Sue Brown,demo2@deitel.com,555-555-1234


Elsa Ghirmazion

In [None]:
##########################################################################
# (C) Copyright 2019 by Deitel & Associates, Inc. and                    #
# Pearson Education, Inc. All Rights Reserved.                           #
#                                                                        #
# DISCLAIMER: The authors and publisher of this book have used their     #
# best efforts in preparing the book. These efforts include the          #
# development, research, and testing of the theories and programs        #
# to determine their effectiveness. The authors and publisher make       #
# no warranty of any kind, expressed or implied, with regard to these    #
# programs or to the documentation contained in these books. The authors #
# and publisher shall not be liable in any event for incidental or       #
# consequential damages in connection with, or arising out of, the       #
# furnishing, performance, or use of these programs.                     #
##########################################################################
