In [16]:
import numpy as np
import pandas as pd

flickrData = pd.read_csv('https://github.com/realpython/python-data-cleaning/raw/master/Datasets/BL-Flickr-Images-Book.csv')

# what columns that we will drop because of high amount of NaN
to_drop = ['Edition Statement',
           'Corporate Author',
           'Corporate Contributors',
           'Former owner',
           'Engraver']

# see what % is NaN in a column
flickrData['Engraver'].isna().value_counts(dropna=False)/len(flickrData)

# drop NaN
flickrData.drop(columns=to_drop, inplace=True, axis=1)

# set new index to Identifier column
flickrData.set_index('Identifier', inplace=True)

# regex for first 4 digits
regex = r'^(\d{4})'

# extra data in column based on regex
flickrDataExtr = flickrData['Date of Publication'].str.extract(regex, expand=False)

# change data to numberic instead of object
flickrData['Date of Publication'] = pd.to_numeric(flickrDataExtr)

# check percent of column nulls
flickrData['Date of Publication'].isnull().sum()/len(flickrData)

flickrData['Date of Publication'].describe()

flickrDataPop = flickrData['Place of Publication']

london = flickrDataPop.str.contains("London")
oxford = flickrDataPop.str.contains('Oxford')

flickrData['Place of Publication'] = np.where(london,'London', 
                                            np.where(oxford, 'Oxford',
                                            flickrDataPop.str.replace('-',' ')))


flickrData.head(10)

Unnamed: 0_level_0,Place of Publication,Date of Publication,Publisher,Title,Author,Contributors,Issuance type,Flickr URL,Shelfmarks
Identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
206,London,1879.0,S. Tinsley & Co.,Walter Forbes. [A novel.] By A. A,A. A.,"FORBES, Walter.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12641.b.30.
216,London,1868.0,Virtue & Co.,All for Greed. [A novel. The dedication signed...,"A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12626.cc.2.
218,London,1869.0,"Bradbury, Evans & Co.",Love the Avenger. By the author of “All for Gr...,"A., A. A.","BLAZE DE BURY, Marie Pauline Rose - Baroness",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12625.dd.1.
472,London,1851.0,James Darling,"Welsh Sketches, chiefly ecclesiastical, to the...","A., E. S.","Appleyard, Ernest Silvanus.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 10369.bbb.15.
480,London,1857.0,Wertheim & Macintosh,"[The World in which I live, and my place in it...","A., E. S.","BROOME, John Henry.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 9007.d.28.
481,London,1875.0,William Macintosh,"[The World in which I live, and my place in it...","A., E. S.","BROOME, John Henry.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 9006.ee.10.
519,London,1872.0,The Author,Lagonells. By the author of Darmayne (F. E. A....,"A., F. E.","ASHLEY, Florence Emily.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 12637.e.3.
667,Oxford,,,"The Coming of Spring, and other poems. By J. A...","A., J.|A., J.","ANDREWS, J. - Writer of Verse",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 011652.g.73.
874,London,1676.0,,"A Warning to the inhabitants of England, and L...",Remaʿ.,"ADAMS, Mary.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 11645.bb.42.
1143,London,1679.0,,A Satyr against Vertue. (A poem: supposed to b...,"A., T.","OLDHAM, John.",monographic,http://www.flickr.com/photos/britishlibrary/ta...,British Library HMNTS 11602.ee.10.(2.)
