In [1]:
import numpy as np
import pandas as pd

## 6.1 Case conversion and spaces

In [2]:
inspections = pd.read_csv("chicago_food_inspections.csv")
inspections

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)
...,...,...
153805,WOLCOTT'S,Risk 1 (High)
153806,DUNKIN DONUTS/BASKIN-ROBBINS,Risk 2 (Medium)
153807,Cafe 608,Risk 1 (High)
153808,mr.daniel's,Risk 1 (High)


In [3]:
inspections["Name"].head()

0     MARRIOT MARQUIS CHICAGO   
1                    JETS PIZZA 
2                     ROOM 1520 
3      MARRIOT MARQUIS CHICAGO  
4                  CHARTWELLS   
Name: Name, dtype: object

In [4]:
inspections["Name"].head().values

array([' MARRIOT MARQUIS CHICAGO   ', ' JETS PIZZA ', '   ROOM 1520 ',
       '  MARRIOT MARQUIS CHICAGO  ', ' CHARTWELLS   '], dtype=object)

In [5]:
inspections["Name"] = inspections["Name"].str.strip()

In [7]:
inspections.head()

Unnamed: 0,Name,Risk
0,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
1,JETS PIZZA,Risk 2 (Medium)
2,ROOM 1520,Risk 3 (Low)
3,MARRIOT MARQUIS CHICAGO,Risk 1 (High)
4,CHARTWELLS,Risk 1 (High)


In [9]:
inspections.columns

Index(['Name', 'Risk'], dtype='object')

In [12]:
for column in inspections.columns :
    inspections[column] = inspections[column].str.strip()
    

In [13]:
inspections["Name"].str.lower().head()

0    marriot marquis chicago
1                 jets pizza
2                  room 1520
3    marriot marquis chicago
4                 chartwells
Name: Name, dtype: object

In [14]:
inspections['Name'].str.upper()

0              MARRIOT MARQUIS CHICAGO
1                           JETS PIZZA
2                            ROOM 1520
3              MARRIOT MARQUIS CHICAGO
4                           CHARTWELLS
                      ...             
153805                       WOLCOTT'S
153806    DUNKIN DONUTS/BASKIN-ROBBINS
153807                        CAFE 608
153808                     MR.DANIEL'S
153809                      TEMPO CAFE
Name: Name, Length: 153810, dtype: object

In [15]:
inspections['Name'].str.capitalize()

0              Marriot marquis chicago
1                           Jets pizza
2                            Room 1520
3              Marriot marquis chicago
4                           Chartwells
                      ...             
153805                       Wolcott's
153806    Dunkin donuts/baskin-robbins
153807                        Cafe 608
153808                     Mr.daniel's
153809                      Tempo cafe
Name: Name, Length: 153810, dtype: object

In [17]:
inspections['Name'].str.title()

0              Marriot Marquis Chicago
1                           Jets Pizza
2                            Room 1520
3              Marriot Marquis Chicago
4                           Chartwells
                      ...             
153805                       Wolcott'S
153806    Dunkin Donuts/Baskin-Robbins
153807                        Cafe 608
153808                     Mr.Daniel'S
153809                      Tempo Cafe
Name: Name, Length: 153810, dtype: object

## 6.2 String Slicing

In [19]:
inspections['Risk'].head()

0      Risk 1 (High)
1    Risk 2 (Medium)
2       Risk 3 (Low)
3      Risk 1 (High)
4      Risk 1 (High)
Name: Risk, dtype: object

In [20]:
len(inspections)

153810

In [22]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)', 'All', nan],
      dtype=object)

In [25]:
inspections = inspections.dropna(subset = ['Risk'])

In [26]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)', 'All'],
      dtype=object)

In [28]:
inspections = inspections.replace(to_replace = 'All', value = 'Risk 4 (Extreme)')

In [29]:
inspections['Risk'].unique()

array(['Risk 1 (High)', 'Risk 2 (Medium)', 'Risk 3 (Low)',
       'Risk 4 (Extreme)'], dtype=object)

## 6.3 String Slicing and Character replacement

In [30]:
inspections['Risk'].str.slice(5,6).head()

0    1
1    2
2    3
3    1
4    1
Name: Risk, dtype: object

In [31]:
inspections['Risk'].str[5:6].head()

0    1
1    2
2    3
3    1
4    1
Name: Risk, dtype: object

In [32]:
inspections['Risk'].str.slice(8).head()

0      High)
1    Medium)
2       Low)
3      High)
4      High)
Name: Risk, dtype: object

In [33]:
inspections['Risk'].str[8:].head()

0      High)
1    Medium)
2       Low)
3      High)
4      High)
Name: Risk, dtype: object

In [34]:
inspections['Risk'].str.slice(8, -1).head()

0      High
1    Medium
2       Low
3      High
4      High
Name: Risk, dtype: object

In [35]:
inspections['Risk'].str[8:-1].head()

0      High
1    Medium
2       Low
3      High
4      High
Name: Risk, dtype: object

In [36]:
inspections['Risk'].str.slice(8).str.replace(")","").head()

0      High
1    Medium
2       Low
3      High
4      High
Name: Risk, dtype: object

## 6.4 Boolean method

In [37]:
inspections['Name'].str.lower().str.contains('pizza').head()

0    False
1     True
2    False
3    False
4    False
Name: Name, dtype: bool

In [40]:
has_pizza = inspections['Name'].str.lower().str.contains('pizza')
inspections[has_pizza]

Unnamed: 0,Name,Risk
1,JETS PIZZA,Risk 2 (Medium)
19,NANCY'S HOME OF STUFFED PIZZA,Risk 1 (High)
27,"NARY'S GRILL & PIZZA ,INC.",Risk 1 (High)
29,NARYS GRILL & PIZZA,Risk 1 (High)
68,COLUTAS PIZZA,Risk 1 (High)
...,...,...
153756,ANGELO'S STUFFED PIZZA CORP,Risk 1 (High)
153764,COCHIAROS PIZZA #2,Risk 1 (High)
153772,FERNANDO'S MEXICAN GRILL & PIZZA,Risk 1 (High)
153788,REGGIO'S PIZZA EXPRESS,Risk 1 (High)


In [41]:
inspections['Name'].str.lower().str.startswith('tacos').head()

0    False
1    False
2    False
3    False
4    False
Name: Name, dtype: bool

In [42]:
starts_with_tacos = inspections['Name'].str.lower().str.startswith('tacos')
inspections[starts_with_tacos]

Unnamed: 0,Name,Risk
69,TACOS NIETOS,Risk 1 (High)
556,TACOS EL TIO 2 INC.,Risk 1 (High)
675,TACOS DON GABINO,Risk 1 (High)
958,TACOS EL TIO 2 INC.,Risk 1 (High)
1036,TACOS EL TIO 2 INC.,Risk 1 (High)
...,...,...
143587,TACOS DE LUNA,Risk 1 (High)
144026,TACOS GARCIA,Risk 1 (High)
146174,Tacos Place's 1,Risk 1 (High)
147810,TACOS MARIO'S LIMITED,Risk 1 (High)


In [43]:
ends_with_tacos = inspections['Name'].str.lower().str.endswith('tacos')

In [44]:
inspections[ends_with_tacos]

Unnamed: 0,Name,Risk
382,LAZO'S TACOS,Risk 1 (High)
569,LAZO'S TACOS,Risk 1 (High)
2652,FLYING TACOS,Risk 3 (Low)
3250,JONY'S TACOS,Risk 1 (High)
3812,PACO'S TACOS,Risk 1 (High)
...,...,...
151121,REYES TACOS,Risk 1 (High)
151318,EL MACHO TACOS,Risk 1 (High)
151801,EL MACHO TACOS,Risk 1 (High)
153087,RAYMOND'S TACOS,Risk 1 (High)


In [47]:
customers = pd.read_csv('customers.csv')

In [48]:
customers.head()

Unnamed: 0,Name,Address
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire..."
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,..."
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495"
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991"
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7..."


In [50]:
customers['Name'].str.len().head()

0    13
1    17
2    15
3    19
4    14
Name: Name, dtype: int64

In [51]:
phone_number = '555-123-4567'
phone_number.split('-')

['555', '123', '4567']

In [52]:
customers['Name'].str.split(pat = ' ').head()

0           [Frank, Manning]
1       [Elizabeth, Johnson]
2         [Donald, Stephens]
3    [Michael, Vincent, III]
4          [Jasmine, Zamora]
Name: Name, dtype: object

In [53]:
customers['Name'].str.split(' ').str.len().head()

0    2
1    2
2    2
3    3
4    2
Name: Name, dtype: int64

In [54]:
customers['Name'].str.split(' ', n = 1).head()

0          [Frank, Manning]
1      [Elizabeth, Johnson]
2        [Donald, Stephens]
3    [Michael, Vincent III]
4         [Jasmine, Zamora]
Name: Name, dtype: object

In [59]:
customers['Name'].str.split(' ', n = 1).str.get(0).head()

0        Frank
1    Elizabeth
2       Donald
3      Michael
4      Jasmine
Name: Name, dtype: object

In [60]:
customers['Name'].str.split(' ', n = 1).str.get(1).head()

0        Manning
1        Johnson
2       Stephens
3    Vincent III
4         Zamora
Name: Name, dtype: object

In [61]:
customers['Name'].str.split(' ', n = 1).str.get(-1).head()

0        Manning
1        Johnson
2       Stephens
3    Vincent III
4         Zamora
Name: Name, dtype: object

In [66]:
customers['Name'].str.split(' ', n = 1, expand = True).head()

Unnamed: 0,0,1
0,Frank,Manning
1,Elizabeth,Johnson
2,Donald,Stephens
3,Michael,Vincent III
4,Jasmine,Zamora


In [67]:
customers[['First Name', 'Last Name']] = customers['Name'].str.split(' ', n =1, expand = True)

In [68]:
customers

Unnamed: 0,Name,Address,First Name,Last Name
0,Frank Manning,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning
1,Elizabeth Johnson,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson
2,Donald Stephens,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens
3,Michael Vincent III,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III
4,Jasmine Zamora,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora
...,...,...,...,...
9956,Dana Browning,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning
9957,Amanda Anderson,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson
9958,Eric Davis,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis
9959,Taylor Hernandez,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez


In [70]:
customers.drop(labels = 'Name', axis = 'columns')

Unnamed: 0,Address,First Name,Last Name
0,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning
1,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson
2,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens
3,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III
4,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora
...,...,...,...
9956,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning
9957,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson
9958,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis
9959,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez


In [72]:
customers = customers.drop(labels = 'Name', axis = 'columns')

## 6.6 Coding Challenge

In [73]:
customers

Unnamed: 0,Address,First Name,Last Name
0,"6461 Quinn Groves, East Matthew, New Hampshire...",Frank,Manning
1,"1360 Tracey Ports Apt. 419, Kyleport, Vermont,...",Elizabeth,Johnson
2,"19120 Fleming Manors, Prestonstad, Montana, 23495",Donald,Stephens
3,"441 Olivia Creek, Jimmymouth, Georgia, 82991",Michael,Vincent III
4,"4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...",Jasmine,Zamora
...,...,...,...
9956,"762 Andrew Views Apt. 254, North Paul, New Mex...",Dana,Browning
9957,"44188 Day Crest Apt. 901, Lake Marcia, Maine, ...",Amanda,Anderson
9958,"73015 Michelle Squares, Watsonville, West Virg...",Eric,Davis
9959,"129 Keith Greens, Haleyfurt, Oklahoma, 98916",Taylor,Hernandez


In [74]:
customers['Address']

0       6461 Quinn Groves, East Matthew, New Hampshire...
1       1360 Tracey Ports Apt. 419, Kyleport, Vermont,...
2       19120 Fleming Manors, Prestonstad, Montana, 23495
3            441 Olivia Creek, Jimmymouth, Georgia, 82991
4       4246 Chelsey Ford Apt. 310, Karamouth, Utah, 7...
                              ...                        
9956    762 Andrew Views Apt. 254, North Paul, New Mex...
9957    44188 Day Crest Apt. 901, Lake Marcia, Maine, ...
9958    73015 Michelle Squares, Watsonville, West Virg...
9959         129 Keith Greens, Haleyfurt, Oklahoma, 98916
9960     355 Griffin Valley, Davidtown, New Mexico, 17581
Name: Address, Length: 9961, dtype: object

In [79]:
customers['Address'].str.split(',').str.len().nunique()

1

In [80]:
customers['Address'].str.split(',', expand = True)

Unnamed: 0,0,1,2,3
0,6461 Quinn Groves,East Matthew,New Hampshire,16656
1,1360 Tracey Ports Apt. 419,Kyleport,Vermont,31924
2,19120 Fleming Manors,Prestonstad,Montana,23495
3,441 Olivia Creek,Jimmymouth,Georgia,82991
4,4246 Chelsey Ford Apt. 310,Karamouth,Utah,76252
...,...,...,...,...
9956,762 Andrew Views Apt. 254,North Paul,New Mexico,28889
9957,44188 Day Crest Apt. 901,Lake Marcia,Maine,37378
9958,73015 Michelle Squares,Watsonville,West Virginia,03933
9959,129 Keith Greens,Haleyfurt,Oklahoma,98916


In [81]:
customers[['Street', 'City', 'State', 'Zip']] = customers['Address'].str.split(',', expand = True)

In [84]:
customers = customers.drop(labels = 'Address', axis = 'columns')

In [85]:
customers

Unnamed: 0,First Name,Last Name,Street,City,State,Zip
0,Frank,Manning,6461 Quinn Groves,East Matthew,New Hampshire,16656
1,Elizabeth,Johnson,1360 Tracey Ports Apt. 419,Kyleport,Vermont,31924
2,Donald,Stephens,19120 Fleming Manors,Prestonstad,Montana,23495
3,Michael,Vincent III,441 Olivia Creek,Jimmymouth,Georgia,82991
4,Jasmine,Zamora,4246 Chelsey Ford Apt. 310,Karamouth,Utah,76252
...,...,...,...,...,...,...
9956,Dana,Browning,762 Andrew Views Apt. 254,North Paul,New Mexico,28889
9957,Amanda,Anderson,44188 Day Crest Apt. 901,Lake Marcia,Maine,37378
9958,Eric,Davis,73015 Michelle Squares,Watsonville,West Virginia,03933
9959,Taylor,Hernandez,129 Keith Greens,Haleyfurt,Oklahoma,98916


In [87]:
for column in customers.columns :
    customers[column].str.strip()

In [88]:
customers

Unnamed: 0,First Name,Last Name,Street,City,State,Zip
0,Frank,Manning,6461 Quinn Groves,East Matthew,New Hampshire,16656
1,Elizabeth,Johnson,1360 Tracey Ports Apt. 419,Kyleport,Vermont,31924
2,Donald,Stephens,19120 Fleming Manors,Prestonstad,Montana,23495
3,Michael,Vincent III,441 Olivia Creek,Jimmymouth,Georgia,82991
4,Jasmine,Zamora,4246 Chelsey Ford Apt. 310,Karamouth,Utah,76252
...,...,...,...,...,...,...
9956,Dana,Browning,762 Andrew Views Apt. 254,North Paul,New Mexico,28889
9957,Amanda,Anderson,44188 Day Crest Apt. 901,Lake Marcia,Maine,37378
9958,Eric,Davis,73015 Michelle Squares,Watsonville,West Virginia,03933
9959,Taylor,Hernandez,129 Keith Greens,Haleyfurt,Oklahoma,98916
