- 对字段Name进行分割为两个字段：firstName & lastName

In [1]:
import datetime
import pandas as pd

In [2]:
info = pd.read_csv('fake_info.csv')
info.head()

Unnamed: 0,Name,Telephone,Birth,Company,Position,Address,Email
0,James Cortez,001-063-413-7593x55230,1973-01-17,"Smith, Sandoval and Kirk","Doctor, general practice","97659 Robin Highway Apt. 942\nFryland, KY 66604",stephaniehutchinson@rios-hanson.org
1,Summer Velez,624.477.9653x5953,2006-03-30,Hines PLC,Publishing rights manager,5813 John Canyon Apt. 990\nWest Nathanielmouth...,harrisbradley@yahoo.com
2,Daniel Mason,001-826-022-1771x091,1975-05-13,Harris and Sons,Product/process development scientist,"1369 Dennis Summit\nPort Erikberg, DE 92989",teresaperez@yahoo.com
3,Lori Henderson,263.163.4209x83443,2021-04-16,Martin LLC,"Psychologist, clinical","107 John Mills\nSouth Tamara, NJ 15639",turnerdonna@lewis.net
4,Samantha Nunez,+1-314-769-6447,2015-08-07,Waters-Osborne,Health promotion specialist,2130 Natasha Ridges Apt. 972\nWest Laurenshire...,yphillips@hotmail.com


In [3]:
# split有三个参数：pat-按什么分割，默认空白；n-切割几次；expand-默认False，返回序列，True返回DataFrame
names = info['Name'].str.split(pat=' ', n=1, expand=True)  
names.head()

Unnamed: 0,0,1
0,James,Cortez
1,Summer,Velez
2,Daniel,Mason
3,Lori,Henderson
4,Samantha,Nunez


- 插入firstname和lastname字段到指定位置，并删除原name字段

In [4]:
info.insert(0, 'firstName', names[0])
info.insert(1, 'lastName', names[1])
info.drop('Name', axis=1, inplace=True)
info.head()

Unnamed: 0,firstName,lastName,Telephone,Birth,Company,Position,Address,Email
0,James,Cortez,001-063-413-7593x55230,1973-01-17,"Smith, Sandoval and Kirk","Doctor, general practice","97659 Robin Highway Apt. 942\nFryland, KY 66604",stephaniehutchinson@rios-hanson.org
1,Summer,Velez,624.477.9653x5953,2006-03-30,Hines PLC,Publishing rights manager,5813 John Canyon Apt. 990\nWest Nathanielmouth...,harrisbradley@yahoo.com
2,Daniel,Mason,001-826-022-1771x091,1975-05-13,Harris and Sons,Product/process development scientist,"1369 Dennis Summit\nPort Erikberg, DE 92989",teresaperez@yahoo.com
3,Lori,Henderson,263.163.4209x83443,2021-04-16,Martin LLC,"Psychologist, clinical","107 John Mills\nSouth Tamara, NJ 15639",turnerdonna@lewis.net
4,Samantha,Nunez,+1-314-769-6447,2015-08-07,Waters-Osborne,Health promotion specialist,2130 Natasha Ridges Apt. 972\nWest Laurenshire...,yphillips@hotmail.com


- 从Email中提取域名（Series字符串操作支持正则表达式）

In [5]:
info['Domain'] = info['Email'].str.extract('@(.*)')
info

Unnamed: 0,firstName,lastName,Telephone,Birth,Company,Position,Address,Email,Domain
0,James,Cortez,001-063-413-7593x55230,1973-01-17,"Smith, Sandoval and Kirk","Doctor, general practice","97659 Robin Highway Apt. 942\nFryland, KY 66604",stephaniehutchinson@rios-hanson.org,rios-hanson.org
1,Summer,Velez,624.477.9653x5953,2006-03-30,Hines PLC,Publishing rights manager,5813 John Canyon Apt. 990\nWest Nathanielmouth...,harrisbradley@yahoo.com,yahoo.com
2,Daniel,Mason,001-826-022-1771x091,1975-05-13,Harris and Sons,Product/process development scientist,"1369 Dennis Summit\nPort Erikberg, DE 92989",teresaperez@yahoo.com,yahoo.com
3,Lori,Henderson,263.163.4209x83443,2021-04-16,Martin LLC,"Psychologist, clinical","107 John Mills\nSouth Tamara, NJ 15639",turnerdonna@lewis.net,lewis.net
4,Samantha,Nunez,+1-314-769-6447,2015-08-07,Waters-Osborne,Health promotion specialist,2130 Natasha Ridges Apt. 972\nWest Laurenshire...,yphillips@hotmail.com,hotmail.com
...,...,...,...,...,...,...,...,...,...
99995,Julie,Williams,001-460-209-6358x986,2017-12-05,Schroeder-Young,Stage manager,32740 Anderson Junction Suite 346\nJohnsonfurt...,nmiller@hotmail.com,hotmail.com
99996,Rebecca,Willis,+1-287-065-1465x355,2018-09-06,"Williams, Hodge and Weaver",Systems analyst,"5302 Alison Throughway\nNorth Erin, TX 26478",kbrock@wood-walter.info,wood-walter.info
99997,Janet,Gregory,+1-031-438-3129,1979-04-26,"Mitchell, Pena and Williams",Lawyer,"625 Wade Route Suite 688\nLake Barbara, MI 65061",thorton@gmail.com,gmail.com
99998,Danny,Burns,216.960.0254x922,1996-10-26,Wagner Group,Forensic psychologist,"791 John Ports\nEast Jeremiahbury, DE 49247",sarahmassey@hotmail.com,hotmail.com


- 计算每个人的年龄，并筛选出年龄在35-45岁，职位包含manager的数据
    - 先将Birth转换为日期格式
    - 再将当前日期减去Birth日期+1，得出年龄
    - 根据年龄区间和职位包含的关键字，筛选数据

In [6]:
info['Birth'] = pd.to_datetime(info.Birth)
info['Age'] = datetime.datetime.now().year - info.Birth.dt.year +1
info.head()

Unnamed: 0,firstName,lastName,Telephone,Birth,Company,Position,Address,Email,Domain,Age
0,James,Cortez,001-063-413-7593x55230,1973-01-17,"Smith, Sandoval and Kirk","Doctor, general practice","97659 Robin Highway Apt. 942\nFryland, KY 66604",stephaniehutchinson@rios-hanson.org,rios-hanson.org,50
1,Summer,Velez,624.477.9653x5953,2006-03-30,Hines PLC,Publishing rights manager,5813 John Canyon Apt. 990\nWest Nathanielmouth...,harrisbradley@yahoo.com,yahoo.com,17
2,Daniel,Mason,001-826-022-1771x091,1975-05-13,Harris and Sons,Product/process development scientist,"1369 Dennis Summit\nPort Erikberg, DE 92989",teresaperez@yahoo.com,yahoo.com,48
3,Lori,Henderson,263.163.4209x83443,2021-04-16,Martin LLC,"Psychologist, clinical","107 John Mills\nSouth Tamara, NJ 15639",turnerdonna@lewis.net,lewis.net,2
4,Samantha,Nunez,+1-314-769-6447,2015-08-07,Waters-Osborne,Health promotion specialist,2130 Natasha Ridges Apt. 972\nWest Laurenshire...,yphillips@hotmail.com,hotmail.com,8


In [7]:
filter_info = info.loc[(info.Age.between(35,45)) & info.Position.str.contains('manager')]
filter_info.head()

Unnamed: 0,firstName,lastName,Telephone,Birth,Company,Position,Address,Email,Domain,Age
15,Jessica,Chambers,(282)584-7521x47057,1987-05-06,Doyle Inc,Quarry manager,Unit 2762 Box 3166\nDPO AP 16099,thomas50@garcia.info,garcia.info,36
67,Sherri,Harris,3259346512,1983-04-21,Lopez-Holder,Hotel manager,"6284 Joseph Orchard\nMatthewhaven, AZ 13843",rebeccawinters@hotmail.com,hotmail.com,40
158,Austin,Mcknight,894-857-4376x0470,1981-04-11,Chavez-Adkins,Heritage manager,"2674 Hill Vista\nPort Mallory, VT 27163",lkrause@harmon-robinson.biz,harmon-robinson.biz,42
197,Kimberly,Nunez,001-166-070-2536x642,1978-08-18,"Walker, Williams and Johnson",Insurance account manager,"0273 Logan Squares\nLake Richard, CO 31083",brownkent@rios.org,rios.org,45
233,Mr.,Joshua Lee Jr.,800-430-1906x3417,1985-04-20,"Ortiz, Carpenter and Roberts",Purchasing manager,"684 Paul Springs\nWest Gabrielmouth, NY 69442",monica29@yahoo.com,yahoo.com,38
