## Working with cleaning up text data (splitting columns, changing dtypes, etc.)

In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("chicago.csv")
df.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null object
Employee Annual Salary    32062 non-null object
dtypes: object(4)
memory usage: 1002.0+ KB


In [5]:
df.describe()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
count,32062,32062,32062,32062
unique,31776,1093,35,1156
top,"HERNANDEZ, JUAN C",POLICE OFFICER,POLICE,$87384.00
freq,4,9184,12618,2394


In [6]:
df["Department"].nunique()

35

In [7]:
df["Department"].count()

32062

In [8]:
# Good reason to convert to category. .astype() doesn't have inplace so have 
# to reset to original. Notice the mem usage drops.
df["Department"].astype(dtype = "category")
df["Department"] = df["Department"].astype(dtype = "category")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32063 entries, 0 to 32062
Data columns (total 4 columns):
Name                      32062 non-null object
Position Title            32062 non-null object
Department                32062 non-null category
Employee Annual Salary    32062 non-null object
dtypes: category(1), object(3)
memory usage: 784.4+ KB


## Common string methods: .lower(), .upper(), .title(), and .len()
You must put .str before you add the .method()

In [9]:
chicago = pd.read_csv("chicago.csv")
chicago["Department"] = chicago["Department"].astype(dtype = "category")
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00


In [10]:
"HELLO WORLD".lower()

'hello world'

In [11]:
"hello world".title()

'Hello World'

In [12]:
len("hellow world")

12

In [13]:
chicago["Name"].str.lower()

0                 aaron,  elvia j
1               aaron,  jeffery m
2                  aaron,  karina
3             aaron,  kimberlei r
4             abad jr,  vicente m
5                 abarca,  anabel
6               abarca,  emmanuel
7               abascal,  reece e
8            abbasi,  christopher
9           abbatacola,  robert j
10          abbatemarco,  james j
11               abbate,  terry m
12               abbott,  betty l
13              abbott,  lynise m
14         abbruzzese,  william j
15                abdallah,  zaid
16          abdelhadi,  abdalmahd
17            abdellatif,  aref r
18             abdelmajeid,  aziz
19            abdollahzadeh,  ali
20       abdul-karim,  muhammad a
21            abdullah,  daniel n
22               abdullah,  kevin
23           abdullah,  lakenya n
24            abdullah,  rashad j
25           abdulsattar,  mudhar
26           abdul-shakur,  tahir
27         abdulwahab,  abuubaida
28              abejero,  jason v
29        aber

In [14]:
chicago["Name"] = chicago["Name"].str.title()
chicago["Position Title"] = chicago["Position Title"].str.title()
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary
0,"Aaron, Elvia J",Water Rate Taker,WATER MGMNT,$90744.00
1,"Aaron, Jeffery M",Police Officer,POLICE,$84450.00
2,"Aaron, Karina",Police Officer,POLICE,$84450.00
3,"Aaron, Kimberlei R",Chief Contract Expediter,GENERAL SERVICES,$89880.00
4,"Abad Jr, Vicente M",Civil Engineer Iv,WATER MGMNT,$106836.00


In [15]:
chicago["Department"].str.len()

0        11.0
1         6.0
2         6.0
3        16.0
4        11.0
5        12.0
6        13.0
7         4.0
8        12.0
9         8.0
10        4.0
11        6.0
12       16.0
13        6.0
14        4.0
15        6.0
16        6.0
17        4.0
18        6.0
19        4.0
20       11.0
21        4.0
22        4.0
23        4.0
24       16.0
25       11.0
26       13.0
27       16.0
28        6.0
29        4.0
         ... 
32033     6.0
32034     6.0
32035     6.0
32036    13.0
32037     4.0
32038    11.0
32039     4.0
32040     6.0
32041     4.0
32042    16.0
32043    13.0
32044     6.0
32045     4.0
32046     7.0
32047     6.0
32048     3.0
32049     4.0
32050    11.0
32051     8.0
32052     6.0
32053     4.0
32054     6.0
32055     6.0
32056    16.0
32057    16.0
32058     6.0
32059     6.0
32060     6.0
32061     4.0
32062     NaN
Name: Department, Length: 32063, dtype: float64

## The .str.replace() Method

In [16]:
# It has some null rows so we need to add .dropna(how = "all") to remove
# rows that have ALL null values
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [17]:
"Hello World".replace("l", "!")

'He!!o Wor!d'

In [18]:
chicago["Department"] = chicago["Department"].str.replace("MGMNT", "MANAGEMENT")

In [19]:
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MANAGEMENT,$90744.00,WATER MGMNT
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,POLICE
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,POLICE
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,GENERAL SERVICES
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MANAGEMENT,$106836.00,WATER MGMNT


In [20]:
chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace("$", "").astype(float)

In [21]:
chicago["Employee Annual Salary"].sum()
chicago["Employee Annual Salary"].mean()
chicago["Employee Annual Salary"].std()
chicago["Employee Annual Salary"].nlargest(10)
chicago["Employee Annual Salary"].nsmallest(5)

15102       0.96
12       2756.00
27       2756.00
47       2756.00
295      2756.00
Name: Employee Annual Salary, dtype: float64

## Filtering with string methods
Use .contain() 

In [22]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [23]:
# Extract the word "water" if contained anywhere in the row
mask = chicago["Position Title"].str.lower().str.contains("water")
chicago[mask]

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT
554,"ALUISE, VINCENT G",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00,WATER MGMNT
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
685,"ANDERSON, ANDREW J",DISTRICT SUPERINTENDENT OF WATER DISTRIBUTION,WATER MGMNT,$109272.00,WATER MGMNT
702,"ANDERSON, DONALD",FOREMAN OF WATER PIPE CONSTRUCTION,WATER MGMNT,$102440.00,WATER MGMNT
1054,"ASHLEY, KARMA T",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
1079,"ATKINS, JOANNA M",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
1181,"AZEEM, MOHAMMED A",WATER CHEMIST II,WATER MGMNT,$53172.00,WATER MGMNT
1285,"BAJIC, JOHN A",WATER METER MACHINIST,WATER MGMNT,$82576.00,WATER MGMNT
2400,"BOLTON, BRIAN E",WATER RATE TAKER,WATER MGMNT,$78948.00,WATER MGMNT


In [24]:
# ONly where "water" starts at the very beginning
chicago[chicago["Position Title"].str.lower().str.startswith("water")]

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT
671,"ANDER, PERRY A",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
1054,"ASHLEY, KARMA T",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
1079,"ATKINS, JOANNA M",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
1181,"AZEEM, MOHAMMED A",WATER CHEMIST II,WATER MGMNT,$53172.00,WATER MGMNT
1285,"BAJIC, JOHN A",WATER METER MACHINIST,WATER MGMNT,$82576.00,WATER MGMNT
2400,"BOLTON, BRIAN E",WATER RATE TAKER,WATER MGMNT,$78948.00,WATER MGMNT
2586,"BOYCE, ADNER L",WATER CHEMIST II,WATER MGMNT,$82044.00,WATER MGMNT
2745,"BRANDYS, DANIEL",WATER CHEMIST II,WATER MGMNT,$53172.00,WATER MGMNT
3143,"BROWN, SHARON L",WATER RATE TAKER,WATER MGMNT,$82728.00,WATER MGMNT


In [25]:
# Extract all rows where value in position title ends with "ist"
mask2 = chicago["Position Title"].str.lower().str.endswith("ist")
chicago[mask2]

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
184,"AFROZ, NAYYAR",PSYCHIATRIST,HEALTH,$99840.00,HEALTH
308,"ALARCON, LUIS J",LOAN PROCESSING SPECIALIST,COMMUNITY DEVELOPMENT,$81948.00,COMMUNITY DEVELOPMENT
422,"ALLAIN, CAROLYN",SENIOR TELECOMMUNICATIONS SPECIALIST,DoIT,$89880.00,DoIT
472,"ALLEN, ROBERT",MACHINIST,WATER MGMNT,$94328.00,WATER MGMNT
705,"ANDERSON, EDWARD M",SR PROCUREMENT SPECIALIST,PROCUREMENT,$91476.00,PROCUREMENT
1022,"ARTEAGA, PAUL",MACHINIST,TRANSPORTN,$94328.00,TRANSPORTN
1163,"AYALA JR, JUAN",FIELD SANITATION SPECIALIST,STREETS & SAN,$78948.00,STREETS & SAN
1285,"BAJIC, JOHN A",WATER METER MACHINIST,WATER MGMNT,$82576.00,WATER MGMNT
1558,"BARRETT, BARBARA J",TECHNICAL TRAINING SPECIALIST,POLICE,$94200.00,POLICE
1869,"BELTRAN, MAURICIO",PROCUREMENT SPECIALIST,PROCUREMENT,$79596.00,PROCUREMENT


## More string methods - .strip(), .lstrip(), and .rstrip()

In [26]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [27]:
"      Hello World  ".lstrip().rstrip()
"     Hello World    ".strip()

'Hello World'

In [28]:
chicago["Name"] = chicago["Name"].str.rstrip().str.lstrip()

In [29]:
chicago["Position Title"].str.strip()

0                                     WATER RATE TAKER
1                                       POLICE OFFICER
2                                       POLICE OFFICER
3                             CHIEF CONTRACT EXPEDITER
4                                    CIVIL ENGINEER IV
5                                 ASST TO THE ALDERMAN
6                                GENERAL LABORER - DSS
7                          TRAFFIC CONTROL AIDE-HOURLY
8                           STAFF ASST TO THE ALDERMAN
9                                  ELECTRICAL MECHANIC
10                                   FIRE ENGINEER-EMT
11                                      POLICE OFFICER
12                                  FOSTER GRANDPARENT
13                                           CLERK III
14                              INVESTIGATOR - IPRA II
15                                      POLICE OFFICER
16                                      POLICE OFFICER
17       FIREFIGHTER (PER ARBITRATORS AWARD)-PARAMEDIC
18        

## String methods on index labels and columns labels

In [1]:
chicago = pd.read_csv("chicago.csv", index_col = "Name").dropna(how = "all")
chicago["Department"] = chicago["Department"].astype("category")
chicago.tail()

NameError: name 'pd' is not defined

In [31]:
chicago.index  # Can call all the regular string methods on this
chicago.index = chicago.index.str.strip().str.title() # right-side performed first

In [32]:
chicago.head()

Unnamed: 0_level_0,Position Title,Department,Employee Annual Salary,Deparment
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00,POLICE
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00,POLICE
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,GENERAL SERVICES
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,WATER MGMNT


In [33]:
chicago.columns
chicago.columns = chicago.columns.str.upper()

In [34]:
chicago.head()

Unnamed: 0_level_0,POSITION TITLE,DEPARTMENT,EMPLOYEE ANNUAL SALARY,DEPARMENT
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Aaron, Elvia J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT
"Aaron, Jeffery M",POLICE OFFICER,POLICE,$84450.00,POLICE
"Aaron, Karina",POLICE OFFICER,POLICE,$84450.00,POLICE
"Aaron, Kimberlei R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,GENERAL SERVICES
"Abad Jr, Vicente M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,WATER MGMNT


## Split strings by characters with str.split() method
Also covers .str.get(), .value_counts()

In [35]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [36]:
# Python example
"Hello my name is Gaylon".split()

['Hello', 'my', 'name', 'is', 'Gaylon']

In [37]:
chicago["Name"].str.split(",").str.get(0).str.title().value_counts()

Williams          293
Johnson           244
Smith             241
Brown             185
Jones             183
Rodriguez         171
Jackson           136
Garcia            130
Davis             127
Hernandez         110
Martinez          108
Lopez             106
Gonzalez          104
Perez             100
Wilson             94
Rivera             90
Thomas             89
Anderson           82
Torres             81
Murphy             80
Robinson           79
Moore              78
Harris             76
Sanchez            76
Miller             75
Lewis              74
Taylor             73
Martin             72
Clark              66
White              66
                 ... 
Werth               1
Lafata              1
Hardge              1
Mietka              1
Pulkownik           1
Mustafa             1
Outley              1
Krikava             1
Huante              1
Mc Miller           1
Omosikeji           1
Mc Leod             1
Yockey              1
Poklacki Jr         1
Mason Jr  

In [38]:
# Let's get the most common first word
chicago["Position Title"].str.split().str.get(0).str.title().value_counts()

Police                   10856
Firefighter-Emt           1509
Sergeant                  1186
Pool                       918
Firefighter                810
Crossing                   775
Motor                      721
Sanitation                 715
Paramedic                  641
Asst                       606
Traffic                    512
Fire                       512
Senior                     470
Construction               452
Lieutenant-Emt             394
Administrative             375
Library                    365
Librarian                  335
Lieutenant                 332
Operating                  324
Electrical                 313
Aviation                   309
Firefighter/Paramedic      259
General                    257
Staff                      250
Clerk                      242
Foreman                    237
Hoisting                   214
Deputy                     213
Laborer                    210
                         ...  
Portfolio                    1
Supervis

## More practice with splits

In [39]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [40]:
chicago["Name"].str.split(",").str.get(0).value_counts().head(3)

WILLIAMS    293
JOHNSON     244
SMITH       241
Name: Name, dtype: int64

In [41]:
# Let's get the most common first names
chicago["Name"].str.split(",")
chicago["Name"].str.split(",").str.get(1)
chicago["Name"].str.split(",").str.get(1).str.split()
chicago["Name"].str.split(",").str.get(1).str.split().str.get(0)
chicago["Name"].str.split(",").str.get(1).str.split().str.get(0).value_counts().head()

MICHAEL    1153
JOHN        899
JAMES       676
ROBERT      622
JOSEPH      537
Name: Name, dtype: int64

## The EXPAND and N parameters of the str.split() Method
str.split(expand = True) returns a new dataframe, rather than a python list of lists. Also can use (n = 1) parameter to limit the number of splits.

In [42]:
chicago = pd.read_csv("chicago.csv").dropna(how = "all")
chicago["Deparment"] = chicago["Department"].astype("category")
chicago.tail()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment
32057,"ZYGADLO, MICHAEL J",FRM OF MACHINISTS - AUTOMOTIVE,GENERAL SERVICES,$99528.00,GENERAL SERVICES
32058,"ZYGOWICZ, PETER J",POLICE OFFICER,POLICE,$87384.00,POLICE
32059,"ZYMANTAS, MARK E",POLICE OFFICER,POLICE,$84450.00,POLICE
32060,"ZYRKOWSKI, CARLO E",POLICE OFFICER,POLICE,$87384.00,POLICE
32061,"ZYSKOWSKI, DARIUSZ",CHIEF DATA BASE ANALYST,DoIT,$113664.00,DoIT


In [47]:
chicago["Name"].str.split(",", expand = True)
chicago[["First Name", "Last Name"]] = chicago["Name"].str.split(",", expand = True)

In [48]:
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment,First Name,Last Name
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT,AARON,ELVIA J
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,POLICE,AARON,JEFFERY M
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,POLICE,AARON,KARINA
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,GENERAL SERVICES,AARON,KIMBERLEI R
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,WATER MGMNT,ABAD JR,VICENTE M


In [52]:
# Let's make new columns split from position title
chicago["Position Title"].str.split(expand = True)

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,WATER,RATE,TAKER,,,,,,
1,POLICE,OFFICER,,,,,,,
2,POLICE,OFFICER,,,,,,,
3,CHIEF,CONTRACT,EXPEDITER,,,,,,
4,CIVIL,ENGINEER,IV,,,,,,
5,ASST,TO,THE,ALDERMAN,,,,,
6,GENERAL,LABORER,-,DSS,,,,,
7,TRAFFIC,CONTROL,AIDE-HOURLY,,,,,,
8,STAFF,ASST,TO,THE,ALDERMAN,,,,
9,ELECTRICAL,MECHANIC,,,,,,,


In [56]:
# But what if some splits + expand = True would create multiple
# columns in the DF with "None"? Use the n= parameter.
chicago["Position Title"].str.split(" ", expand = True, n = 1)
chicago[["First Title Word", "Remaining Words"]] = chicago["Position Title"].str.split(" ", expand = True, n = 1)

In [57]:
chicago.head()

Unnamed: 0,Name,Position Title,Department,Employee Annual Salary,Deparment,First Name,Last Name,First Title Word,Remaining Words
0,"AARON, ELVIA J",WATER RATE TAKER,WATER MGMNT,$90744.00,WATER MGMNT,AARON,ELVIA J,WATER,RATE TAKER
1,"AARON, JEFFERY M",POLICE OFFICER,POLICE,$84450.00,POLICE,AARON,JEFFERY M,POLICE,OFFICER
2,"AARON, KARINA",POLICE OFFICER,POLICE,$84450.00,POLICE,AARON,KARINA,POLICE,OFFICER
3,"AARON, KIMBERLEI R",CHIEF CONTRACT EXPEDITER,GENERAL SERVICES,$89880.00,GENERAL SERVICES,AARON,KIMBERLEI R,CHIEF,CONTRACT EXPEDITER
4,"ABAD JR, VICENTE M",CIVIL ENGINEER IV,WATER MGMNT,$106836.00,WATER MGMNT,ABAD JR,VICENTE M,CIVIL,ENGINEER IV
