In [1]:
import pandas as pd

In [3]:
chicago = pd.read_csv("chicago.csv")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [4]:
chicago.info() # Salary is stored as objec (=string)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null object
Employee Annual Salary    32062 non-null object
dtypes: object(4)
memory usage: 1002.0+ KB


In [7]:
chicago["Department"].nunique() # The number of unique departments in this list 

35

In [8]:
chicago["Department"].count() #We can optimize memory usage by convert string to category

32062

In [9]:
chicago["Department"] = chicago["Department"].astype("category")

In [10]:
chicago.info() # reduced the memory usage by 25%

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null category
Employee Annual Salary    32062 non-null object
dtypes: category(1), object(3)
memory usage: 784.4+ KB


## Common String Method- .lower(), .upper(), .title(), and len()

In [12]:
chicago = pd.read_csv("chicago.csv")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [14]:
"HELLO World".lower()

'hello world'

In [18]:
"Hello World".upper()

'HELLO WORLD'

In [19]:
"hello world".title()

'Hello World'

In [20]:
len("Hello World") #spaces count

11

In [24]:
chicago["Name"].str.lower().head()  
#When using string method on pandas Series, put .str prefex before the string method

0        aaron,  elvia j
1      aaron,  jeffery m
2         aaron,  karina
3    aaron,  kimberlei r
4    abad jr,  vicente m
Name: Name, dtype: object

In [26]:
chicago["Name"].str.title().head() # capitalize the first word of every word

0        Aaron,  Elvia J
1      Aaron,  Jeffery M
2         Aaron,  Karina
3    Aaron,  Kimberlei R
4    Abad Jr,  Vicente M
Name: Name, dtype: object

In [30]:
chicago["Position Title"].str.title().head()

0            Water Rate Taker
1              Police Officer
2              Police Officer
3    Chief Contract Expediter
4           Civil Engineer Iv
Name: Position Title, dtype: object

In [31]:
chicago["Position Title"] = chicago["Position Title"].str.title()

In [32]:
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",Water Rate Taker,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",Police Officer,POLICE,$84450.00
2,"AARON, KARINA",Police Officer,POLICE,$84450.00
3,"AARON, KIMBERLEI R",Chief Contract Expediter,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",Civil Engineer Iv,WATER MGMNT,$106836.00


In [36]:
chicago["Department"].str.len().head() # this len is not the python built-in method 

0    11.0
1     6.0
2     6.0
3    16.0
4    11.0
Name: Department, dtype: float64

## The .str.replace() Method

In [37]:
chicago = pd.read_csv("chicago.csv")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [38]:
"Hello World".replace("l", "!") # replace method on a regular string

'He!!o Wor!d'

In [39]:
chicago.tail() 

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00
32062,,,,


In [41]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all") # Use dropna(how = "all") to drop the final row
chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00


In [12]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [13]:
chicago["Department"] = chicago["Department"].str.replace("MGMNT", "MANAGEMENT")
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MANAGEMENT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MANAGEMENT,$106836.00


#### Turn string number to float number

In [14]:
chicago["Employee Annual Salary"].str.replace("$", "").astype(float).head() #replace $ with empty string

0     90744.0
1     84450.0
2     84450.0
3     89880.0
4    106836.0
Name: Employee Annual Salary, dtype: float64

In [16]:
chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace("$", "").astype(float) 
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MANAGEMENT,90744.0
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,84450.0
2,"AARON, KARINA",POLICE OFFICER,POLICE,84450.0
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,89880.0
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MANAGEMENT,106836.0


In [17]:
chicago.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 32062 entries, 0 to 32061
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null object
Employee Annual Salary    32062 non-null float64
dtypes: float64(1), object(3)
memory usage: 1.2+ MB


In [18]:
chicago["Employee Annual Salary"].mean()

80204.17863389682

In [19]:
chicago["Employee Annual Salary"].std()

25098.32986750994

In [21]:
chicago["Employee Annual Salary"].nlargest(10)

8184     300000.0
7954     216210.0
25532    202728.0
8924     197736.0
8042     197724.0
19208    195000.0
3706     187680.0
18556    187680.0
29466    187680.0
13754    185364.0
Name: Employee Annual Salary, dtype: float64

## Filtering wiht String Methods

In [22]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [26]:
chicago["Position Title"].str.lower().head() # Normalize before filtering. Pandas string match is case sensitive

0            water rate taker
1              police officer
2              police officer
3    chief contract expediter
4           civil engineer iv
Name: Position Title, dtype: object

### .contains() method

In [30]:
mask = chicago["Position Title"].str.lower().str.contains("water")  # .contains() returen boolean
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
554,"ALUISE, VINCENT G",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,$82044.00
685,"ANDERSON, ANDREW J",DISTRICT SUPERINTENDENT OF WATER DISTRIBUTION,WATER MGMNT,$109272.00
702,"ANDERSON, DONALD",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00


### .startwith() method

In [31]:
mask = chicago["Position Title"].str.lower().str.startswith("water")
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,$82044.00
1054,"ASHLEY, KARMA T",WATER CHEMIST II,WATER MGMNT,$82044.00
1079,"ATKINS, JOANNA M",WATER CHEMIST II,WATER MGMNT,$82044.00
1181,"AZEEM, MOHAMMED A",WATER CHEMIST II,WATER MGMNT,$53172.00


### .endswith() method

In [33]:
mask = chicago["Position Title"].str.lower().str.endswith("ist")
chicago[mask].head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
184,"AFROZ, NAYYAR",PSYCHIATRIST,HEALTH,$99840.00
308,"ALARCON, LUIS J",LOAN PROCESSING SPECIALIST,COMMUNITY DEVELOPMENT,$81948.00
422,"ALLAIN, CAROLYN",SENIOR TELECOMMUNICATIONS SPECIALIST,DoIT,$89880.00
472,"ALLEN, ROBERT",MACHINIST,WATER MGMNT,$94328.00
705,"ANDERSON, EDWARD M",SR PROCUREMENT SPECIALIST,PROCUREMENT,$91476.00


## String Method to remove white space - .strip(), .lstrip(), and .rstrip()

In [34]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [36]:
"     Hello World  ".lstrip() #remove spaces from the left

'Hello World  '

In [37]:
"     Hello World  ".rstrip() #remove spaces from the right

'     Hello World'

In [39]:
"     Hello World  ".strip() #remove spaces from the both ends

'Hello World'

In [42]:
chicago["Name"].str.lstrip().head()

0        AARON,  ELVIA J
1      AARON,  JEFFERY M
2         AARON,  KARINA
3    AARON,  KIMBERLEI R
4    ABAD JR,  VICENTE M
Name: Name, dtype: object

In [45]:
chicago["Name"] = chicago["Name"].str.strip()
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [46]:
chicago["Position Title"] = chicago["Position Title"].str.lstrip().str.rstrip() # same as strip() method
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


## String Methods on Index and Columns

In [48]:
import pandas as pd
chicago = pd.read_csv("chicago.csv", index_col = "Name").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [51]:
chicago.index # .index is an atribute of DataFrame

Index(['AARON,  ELVIA J', 'AARON,  JEFFERY M', 'AARON,  KARINA',
       'AARON,  KIMBERLEI R', 'ABAD JR,  VICENTE M', 'ABARCA,  ANABEL',
       'ABARCA,  EMMANUEL', 'ABASCAL,  REECE E', 'ABBASI,  CHRISTOPHER',
       'ABBATACOLA,  ROBERT J',
       ...
       'ZWIT,  JEFFREY J', 'ZWOLFER,  MATTHEW W', 'ZYCH,  MATEUSZ',
       'ZYDEK,  BRYAN', 'ZYGADLO,  JOHN P', 'ZYGADLO,  MICHAEL J',
       'ZYGOWICZ,  PETER J', 'ZYMANTAS,  MARK E', 'ZYRKOWSKI,  CARLO E',
       'ZYSKOWSKI,  DARIUSZ'],
      dtype='object', name='Name', length=32062)

In [52]:
chicago.index.str.strip().str.title()

Index(['Aaron,  Elvia J', 'Aaron,  Jeffery M', 'Aaron,  Karina',
       'Aaron,  Kimberlei R', 'Abad Jr,  Vicente M', 'Abarca,  Anabel',
       'Abarca,  Emmanuel', 'Abascal,  Reece E', 'Abbasi,  Christopher',
       'Abbatacola,  Robert J',
       ...
       'Zwit,  Jeffrey J', 'Zwolfer,  Matthew W', 'Zych,  Mateusz',
       'Zydek,  Bryan', 'Zygadlo,  John P', 'Zygadlo,  Michael J',
       'Zygowicz,  Peter J', 'Zymantas,  Mark E', 'Zyrkowski,  Carlo E',
       'Zyskowski,  Dariusz'],
      dtype='object', name='Name', length=32062)

In [56]:
chicago.index = chicago.index.str.strip().str.title() #reassign
chicago.head()

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [57]:
chicago.columns

Index(['Position Title', 'Department', 'Employee Annual Salary'], dtype='object')

In [58]:
chicago.columns.str.upper()

Index(['POSITION TITLE', 'DEPARTMENT', 'EMPLOYEE ANNUAL SALARY'], dtype='object')

In [59]:
chicago.columns = chicago.columns.str.upper() #reassign
chicago.head()

Unnamed: 0_level_0,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


## Split Strings by Characters with .str.split() Method

In [4]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [5]:
"Hello my name is Boris".split() # split method takes a single argument, sepalateor

['Hello', 'my', 'name', 'is', 'Boris']

In [6]:
"Hello my name is Boris".split(" ") # split method takes a single argument, sepalateor

['Hello', 'my', 'name', 'is', 'Boris']

In [10]:
chicago["Name"].str.split(",").head()

0        [AARON,   ELVIA J]
1      [AARON,   JEFFERY M]
2         [AARON,   KARINA]
3    [AARON,   KIMBERLEI R]
4    [ABAD JR,   VICENTE M]
Name: Name, dtype: object

In [13]:
chicago["Name"].str.split(",").str.get(0).head()  # str.get() method pulls elements from the list

0      AARON
1      AARON
2      AARON
3      AARON
4    ABAD JR
Name: Name, dtype: object

In [15]:
chicago["Name"].str.split(",").str.get(0).str.title().head()

0      Aaron
1      Aaron
2      Aaron
3      Aaron
4    Abad Jr
Name: Name, dtype: object

In [18]:
chicago["Name"].str.split(",").str.get(0).str.title().value_counts().head(10)

Williams     293
Johnson      244
Smith        241
Brown        185
Jones        183
Rodriguez    171
Jackson      136
Garcia       130
Davis        127
Hernandez    110
Name: Name, dtype: int64

In [24]:
chicago["Position Title"].str.split(" ").str.get(0).value_counts().head(10) # Index position of the first item is 0

POLICE             10856
FIREFIGHTER-EMT     1509
SERGEANT            1186
POOL                 918
FIREFIGHTER          810
CROSSING             775
MOTOR                721
SANITATION           715
PARAMEDIC            641
ASST                 606
Name: Position Title, dtype: int64

## More practice wiht Splits

In [1]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [4]:
chicago["Name"].str.split(",").str.get(0).value_counts().head()

WILLIAMS    293
JOHNSON     244
SMITH       241
BROWN       185
JONES       183
Name: Name, dtype: int64

In [5]:
chicago["Name"].str.split(",").head(10)

0          [AARON,   ELVIA J]
1        [AARON,   JEFFERY M]
2           [AARON,   KARINA]
3      [AARON,   KIMBERLEI R]
4      [ABAD JR,   VICENTE M]
5          [ABARCA,   ANABEL]
6        [ABARCA,   EMMANUEL]
7        [ABASCAL,   REECE E]
8     [ABBASI,   CHRISTOPHER]
9    [ABBATACOLA,   ROBERT J]
Name: Name, dtype: object

In [33]:
chicago["Name"].str.split(",").str.get(1).head(10)

0          ELVIA J
1        JEFFERY M
2           KARINA
3      KIMBERLEI R
4        VICENTE M
5           ANABEL
6         EMMANUEL
7          REECE E
8      CHRISTOPHER
9         ROBERT J
Name: Name, dtype: object

In [36]:
chicago["Name"].str.split(",").str.get(1).str.split(" ").head(10) # There are spaces before the first name

0        [, , ELVIA, J]
1      [, , JEFFERY, M]
2          [, , KARINA]
3    [, , KIMBERLEI, R]
4      [, , VICENTE, M]
5          [, , ANABEL]
6        [, , EMMANUEL]
7        [, , REECE, E]
8     [, , CHRISTOPHER]
9       [, , ROBERT, J]
Name: Name, dtype: object

In [6]:
chicago["Name"].str.split(",").str.get(1).str.strip().str.split(" ").head(10) #strip method delete the spaces from both sides 

0        [ELVIA, J]
1      [JEFFERY, M]
2          [KARINA]
3    [KIMBERLEI, R]
4      [VICENTE, M]
5          [ANABEL]
6        [EMMANUEL]
7        [REECE, E]
8     [CHRISTOPHER]
9       [ROBERT, J]
Name: Name, dtype: object

In [7]:
chicago["Name"].str.split(",").str.get(1).str.strip().str.split(" ").str.get(0).head(10)

0          ELVIA
1        JEFFERY
2         KARINA
3      KIMBERLEI
4        VICENTE
5         ANABEL
6       EMMANUEL
7          REECE
8    CHRISTOPHER
9         ROBERT
Name: Name, dtype: object

In [8]:
import pandas as pd
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00


In [13]:
chicago["Name"].str.split(",",  expand = True).head(10) # "expand = True" returns a DataFrame rather than a python list

Unnamed: 0,0,1
0,AARON,ELVIA J
1,AARON,JEFFERY M
2,AARON,KARINA
3,AARON,KIMBERLEI R
4,ABAD JR,VICENTE M
5,ABARCA,ANABEL
6,ABARCA,EMMANUEL
7,ABASCAL,REECE E
8,ABBASI,CHRISTOPHER
9,ABBATACOLA,ROBERT J


In [14]:
chicago[["First Name", "Last Name"]] = chicago["Name"].str.split(",",  expand = True).head(10) # store the results in a new columns

In [15]:
chicago.head(10)

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,First Name,Last Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,AARON,KARINA
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,AARON,KIMBERLEI R
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,ABAD JR,VICENTE M
5,"ABARCA, ANABEL",ASST TO THE ALDERMAN,CITY COUNCIL,$70764.00,ABARCA,ANABEL
6,"ABARCA, EMMANUEL",GENERAL LABORER - DSS,STREETS & SAN,$41849.60,ABARCA,EMMANUEL
7,"ABASCAL, REECE E",TRAFFIC CONTROL AIDE-HOURLY,OEMC,$20051.20,ABASCAL,REECE E
8,"ABBASI, CHRISTOPHER",STAFF ASST TO THE ALDERMAN,CITY COUNCIL,$49452.00,ABBASI,CHRISTOPHER
9,"ABBATACOLA, ROBERT J",ELECTRICAL MECHANIC,AVIATION,$93600.00,ABBATACOLA,ROBERT J


In [17]:
chicago["Position Title"].str.split(" ", expand = True).head(10)
# There is different number of spaces in every single department value. 
# We have None in a column when the number of words in that row is relatively small.

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,WATER,RATE,TAKER,,,,,,
1,POLICE,OFFICER,,,,,,,
2,POLICE,OFFICER,,,,,,,
3,CHIEF,CONTRACT,EXPEDITER,,,,,,
4,CIVIL,ENGINEER,IV,,,,,,
5,ASST,TO,THE,ALDERMAN,,,,,
6,GENERAL,LABORER,-,DSS,,,,,
7,TRAFFIC,CONTROL,AIDE-HOURLY,,,,,,
8,STAFF,ASST,TO,THE,ALDERMAN,,,,
9,ELECTRICAL,MECHANIC,,,,,,,


In [19]:
chicago["Position Title"].str.split(" ", expand = True, n = 1).head(10) 
# n = 1 means 1 split for every row
# everything comes out after the fist space is in the second column

Unnamed: 0,0,1
0,WATER,RATE TAKER
1,POLICE,OFFICER
2,POLICE,OFFICER
3,CHIEF,CONTRACT EXPEDITER
4,CIVIL,ENGINEER IV
5,ASST,TO THE ALDERMAN
6,GENERAL,LABORER - DSS
7,TRAFFIC,CONTROL AIDE-HOURLY
8,STAFF,ASST TO THE ALDERMAN
9,ELECTRICAL,MECHANIC
