In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# collections data
csv_file = "resources/Mia_objects_raw.csv"

In [3]:
df = pd.read_csv(csv_file, index_col='Unnamed: 0')
df.head()

Unnamed: 0,accession_number,artist,classification,continent,country,creditline,culture,dated,department,id,life_date,medium,nationality,object_name,provenance,room,style,title
0,10.1,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c.1888-89,Prints and Drawings,0,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Air, from the series The Four Elements"
1,10.2,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,1,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Earth, from the series The Four Elements"
2,10.3,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,2,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",G352,19th century,"Fire, from the series The Four Elements"
3,10.4,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,3,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Water, from the series The Four Elements"
4,13.29,Walter Shirlaw,Drawings,North America,United States,Gift of Mrs. Florence M. Shirlaw,,19th century,Prints and Drawings,4,"American, 1838 - 1909",Watercolor,American,Drawing,,Not on View,19th century,Montana Indian Reservation I


In [4]:
# strip "P" & "p" from prints & drawings
df['accession_number'] = df['accession_number'].str.lstrip('P.')
df['accession_number'] = df['accession_number'].str.lstrip('p.')

# split out first half of accession number
df['accession_year'] = df['accession_number'].str.split('.', n=1, expand=True)[0]

# Acession Number Cleaning

### RBL/RB/X
Adding columns for unknown accession letters

In [5]:
# add 'RBL' column
df['RBL'] = df['accession_year'].str.extract(r'(RBL)', expand=True)
# add 'RB' column
df['RB'] = df['accession_year'].str.extract(r'(RB)', expand=True)
# add 'X' column
df['X'] = df['accession_year'].str.extract(r'(^X)', expand=True)

### L
Adding column for loaned objects

In [6]:
# add loan column
df['loan'] = df['accession_year'].str.extract(r'(^L)', expand=True)

Stripping letters from accession_year

In [7]:
# strip 'RBL' from accession_year column (strips L, RB & RBL)
df['accession_year'] = df['accession_year'].str.lstrip('RBL')

# strip "X" from accession_year column
df['accession_year'] = df['accession_year'].str.lstrip('X')

### Filter out invalid years

Filter dataframe three ways:
- Filter out accession years longer than 4 digits (years go up to 4 digits e.g. 2019)
- Filter out accession numbers shorter than 4 digits (standard accession numbers are in format YY.#)
- Use groupby to filter out any accession years with less than two entries

In [8]:
# replace all empty cells with None
df = df.replace('', None)

In [9]:
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]

In [10]:
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]

In [11]:
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)

In [12]:
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund, 1966       7573
The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916        4962
Gift of Mrs. Charles C. Bovey, 1924                                      3133
Gift of George A. Goddard, 1919                                           614
Gift of H. V. Jones                                                       518
The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916\r\n     325
Gift of Mrs. George P. Douglas, 1946                                      267
Gift of Herschel V. Jones, 1926                                           231
Gift of Mrs. George P. Douglas, 1929                                      212
Gift of Mrs.C.C.Bovey, 1924                                               163
Name: creditline, dtype: int64

### Pull Accession Year from Creditline
Using creditline info, update accession year when possible for objects filtered out of dataframe with invalid accession years

- Pull unique entries from remainder dataframe
- Update accession year using last 4 digits of creditlines
- Re-run data filtering code and check for any manual updates

In [13]:
# pull value counts into dataframe/series
creditlines = df_remainder.creditline.unique()
creditlines_df = pd.DataFrame(creditlines, columns=['creditline'])

In [14]:
# convert all columns to string to fix float type error
creditlines_df['creditline'] = creditlines_df['creditline'].apply(lambda x: str(x))

In [15]:
# add column with last 4 digits of each creditline
creditlines_df['year'] = creditlines_df['creditline'].apply(lambda x: x[-4:])

#check to see if year column is numerical
creditlines_df['alpha'] = creditlines_df['year'].apply(lambda x: x.isdigit())
creditlines_df.head()

Unnamed: 0,creditline,year,alpha
0,"Gift of Mrs. C. J. Martin, in memory of Charle...",rtin,False
1,The William Hood Dunwoody Fund,Fund,False
2,Gift of Mr. and Mrs. E.D. Brooks,ooks,False
3,Lent by E. H. Hewitt,witt,False
4,Gift of M. Knoedler & Co.,Co.,False


In [16]:
# pull out only creditlines with years at the end
creditlines_year = creditlines_df[creditlines_df['alpha']==True]

# strip 19 from left side of year
creditlines_year['year'] = creditlines_year['year'].map(lambda x: x[-2:])

#drop alpha column from dataframe
creditlines_year = creditlines_year.drop('alpha', axis=1)

creditlines_year.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """


Unnamed: 0,creditline,year
16,The William M. Ladd Collection\r\nGift of Hers...,16
26,"Gift of Mrs. Charles C. Bovey, 1924",24
29,The Minnich Collection\r\nThe Ethel Morrison V...,66
30,The Minnich Collection\r\n\nThe Ethel Morrison...,66
31,"Gift of Herschel V. Jones, 1925",25


In [17]:
# merge year column to existing dataframe
df = df.merge(creditlines_year, how='left', on='creditline')

In [22]:
# fill nan values in year column using accession_year column
df['year'] = df['year'].fillna(df['accession_year'])

# drop/rename columns so only one 'accession_year' column
df.drop('accession_year',inplace=True,axis=1)
df.rename(columns={'year':'accession_year'}, inplace=True)
df.head()

Unnamed: 0,accession_number,artist,classification,continent,country,creditline,culture,dated,department,id,...,object_name,provenance,room,style,title,RBL,RB,X,loan,accession_year
0,10.1,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c.1888-89,Prints and Drawings,0,...,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Air, from the series The Four Elements",,,,,10
1,10.2,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,1,...,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Earth, from the series The Four Elements",,,,,10
2,10.3,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,2,...,Drawing,"[Art dealer, London, acquired from ""an old hou...",G352,19th century,"Fire, from the series The Four Elements",,,,,10
3,10.4,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,3,...,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Water, from the series The Four Elements",,,,,10
4,13.29,Walter Shirlaw,Drawings,North America,United States,Gift of Mrs. Florence M. Shirlaw,,19th century,Prints and Drawings,4,...,Drawing,,Not on View,19th century,Montana Indian Reservation I,,,,,13


In [19]:
df.year.value_counts()

66         8197
99         7717
16         5127
2013       4005
2003       3731
98         3550
24         3499
2007       3121
2004       3066
2002       2864
96         2650
2015       2625
2001       2490
2010       2356
97         2306
2005       2180
74         2018
95         1960
2000       1957
2017       1828
2014       1771
2018       1771
2012       1771
82         1681
77         1676
90         1671
2016       1651
2006       1625
81         1580
94         1468
           ... 
3,456         1
No #2         1
12,377        1
11,628        1
5,918         1
10,844        1
13,606        1
No #148       1
8             1
13,621        1
18,052        1
2667          1
3             1
4,749         1
4,636         1
4,673         1
No #75        1
5,399         1
2207          1
18,037        1
13,710        1
4,771         1
6,947         1
No #36        1
11,791        1
10,804        1
2835          1
4,670         1
13,608        1
59a-d         1
Name: year, Length: 2081

In [23]:
df.accession_year.value_counts()

66         8197
99         7717
16         5127
2013       4005
2003       3731
98         3550
24         3499
2007       3121
2004       3066
2002       2864
96         2650
2015       2625
2001       2490
2010       2356
97         2306
2005       2180
74         2018
95         1960
2000       1957
2017       1828
2014       1771
2018       1771
2012       1771
82         1681
77         1676
90         1671
2016       1651
2006       1625
81         1580
94         1468
           ... 
3,456         1
No #2         1
12,377        1
11,628        1
5,918         1
10,844        1
13,606        1
No #148       1
8             1
13,621        1
18,052        1
2667          1
3             1
4,749         1
4,636         1
4,673         1
No #75        1
5,399         1
2207          1
18,037        1
13,710        1
4,771         1
6,947         1
No #36        1
11,791        1
10,804        1
2835          1
4,670         1
13,608        1
59a-d         1
Name: accession_year, Le

In [None]:
# Update top 10 entries
df.loc[(df['creditline'] == 'The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of George A. Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916\r\n'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1946'), 
            'accession_year'] = '46'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1926'), 
            'accession_year'] = '26'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1929'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Mrs.C.C.Bovey, 1924'), 
            'accession_year'] = '24'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

In [None]:
# Update top 10 entries
df.loc[(df['creditline'] == 'Gift of Mr. R. E. Lewis, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of Miss A. G. Latham, 1943'), 
            'accession_year'] = '43'
df.loc[(df['creditline'] == 'Bequest of Mrs. Charles S. Pillsbury, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Philip Little in memory of Annie Jeannette Jackson, 1931'), 
            'accession_year'] = '31'
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Mr. George A. Goddard, 1919'), 
            'accession_year'] = '19'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

In [None]:
# Update top 10 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of Mrs. Philip Little, Jr., 1963'), 
            'accession_year'] = '63'
df.loc[(df['creditline'] == 'Gift of Mrs. Philip Little, Jr., 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Gift of Miss Perrie Jones, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. B.J.O Nordfeldt, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'Gift of Howard Mansfield, 1926'), 
            'accession_year'] = '26'
df.loc[(df['creditline'] == 'Gift of Ruth Lathrop Sikes in memory of her brother Bruce Sikes, 1967'), 
            'accession_year'] = '67'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(12)

In [None]:
# Update top 12 entries
df.loc[(df['creditline'] == 'Gift of Miss Eileen Bigelow and Mrs. O. H. Ingram, in memory of their mother, Mrs. Alice F. Bigelow, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of the Artist, 1923'), 
            'accession_year'] = '23'
df.loc[(df['creditline'] == 'The Minnich Collection\r\nThe Ethel Morrison Van DerLip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Miss Katherine Bullard, 1918'), 
            'accession_year'] = '18'
df.loc[(df['creditline'] == 'Gift of Funds from Mrs. Franklin M. Crosby Jr. and the William Hood Dunwoody Fund, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1941'), 
            'accession_year'] = '41'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(15)

In [None]:
# Update top 15 entries
df.loc[(df['creditline'] == 'Pillsbury Bequest, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Mrs. Hiram C. Truesdale, 1927'), 
            'accession_year'] = '27'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:19]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1929\r\n'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Friends of Art in Minneapolis, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Miss Tessie Jones in memory of her parents, Mr. and Mrs. Herschel V. Jones, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Mrs. Childe Hassam, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Ethel Morrison Van DerLip, 1916'), 
            'accession_year'] = '16'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:20]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edward A. Foster, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edward A. Foster, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of the Estate of Wanda Gag, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'Gift of Mrs. George Resler, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of J. B. Neumann, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Purchase: July, 1922'), 
            'accession_year'] = '22'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:19]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of The John L. Smith Fund, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of the Artist, 1930'), 
            'accession_year'] = '30'
df.loc[(df['creditline'] == 'The Martha T. Wallace Memorial Fund, 1930'), 
            'accession_year'] = '30'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1946'), 
            'accession_year'] = '46'
df.loc[(df['creditline'] == 'Purchase: 1938'), 
            'accession_year'] = '38'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[10:24]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1929'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Mrs. Leo B. Harris, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of the Artist, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of Mr. & Mrs. E. D. Brooks, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1961'), 
            'accession_year'] = '61'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[10:28]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The David Draper Dayton Fund, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Bequest of Mrs. Charles S. PIllsbury, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Purchase: 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Mrs. Keith Merrill, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Clement Haupers, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E. D. Brooks, 1919'), 
            'accession_year'] = '19'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[15:33]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Anonymous Gift, 1920'), 
            'accession_year'] = '20'
df.loc[(df['creditline'] == 'Gift of Philip W. Pillsbury, 1959'), 
            'accession_year'] = '59'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edmund D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Ethel Morrison Van Derlip, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. Grace B. Wells, 1954'), 
            'accession_year'] = '54'
df.loc[(df['creditline'] == 'Ethel Morrison Van Derlip Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Elmer E. Tafflinger, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E.D. Brooks, May 1919'), 
            'accession_year'] = '19'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[20:34]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Estate of Mrs. John Washburn, 1942'), 
            'accession_year'] = '42'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. Ridgely Hunt, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Mr. Marvin Small, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The William Hood Dundwoody Fund, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Richard P. Gale, 1956'), 
            'accession_year'] = '56'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[25:44]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of M. H. Taylor, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Anonymous Gift, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1963'), 
            'accession_year'] = '63'
df.loc[(df['creditline'] == 'Gift of Mr. Richard P. Gale, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'The John R. Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Alexander Masley, 1932'), 
            'accession_year'] = '32'
df.loc[(df['creditline'] == 'The Discretionary Purchase Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mr. H. J. L. Wright, 1921'), 
            'accession_year'] = '21'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Timothy Cole, August 1919\r\n'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'Gift of the artist, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Purchase: Dunwoody Fund, 1915'), 
            'accession_year'] = '15'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[25:43]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. Edmund D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. Gardner Teall, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Gardner Teall, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E. D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. & Mrs. E. D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. Edmund D. Brooks, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'Gift of General Mills, Inc., 1960'), 
            'accession_year'] = '60'
df.loc[(df['creditline'] == 'Gift of Margaret McMillan Webber, 1951'), 
            'accession_year'] = '51'
df.loc[(df['creditline'] == 'Gift of Benjamin Miller, 1929'), 
            'accession_year'] = '29'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[30:53]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Gardner C. Teall, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. B.J.O. Nordfeldt, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'Gift of Carl Jones, 1952'), 
            'accession_year'] = '52'
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of the Estate of George W. Morgan, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of the Friends of the Institute, 1931'), 
            'accession_year'] = '31'
df.loc[(df['creditline'] == 'Gift of funds from Mr. and Mrs. D. Thomas Bergen, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Trufant and Wyman Fund, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs.Charles C.Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Mr. S. C. Burton, June 5, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of funds from Mr. and Mrs. Hall Peterson, 1966'), 
            'accession_year'] = '66'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of the Artist, 1933'), 
            'accession_year'] = '33'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles S. Pillsbury, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'Gift of Mrs. Fred Wells, 1954'), 
            'accession_year'] = '54'
df.loc[(df['creditline'] == 'Purchase: Discretionary Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Vera Andrus, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'Gift of Wheelwright, 1937'), 
            'accession_year'] = '37'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Arthur Weisenberger, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The Willaim M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Truesdale, 1927'), 
            'accession_year'] = '27'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Mrs. C.C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Theodore W. Bennett, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Bequest of George W. Morgan, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs. George Resler, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Bequest of Putnam Dana McMillan, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. C.A. Reed, 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Eugene Larkin, 1963'), 
            'accession_year'] = '63'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1950'), 
            'accession_year'] = '50'
df.loc[(df['creditline'] == 'Mr. S. C. Burton, March 31, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Anonymous gift\r\nThe William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Purchase: July 1922'), 
            'accession_year'] = '22'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of the artist, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. George Halpin, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs. George W. P. Heffelfinger, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'The Mrs. Charles S. Pillsbury Memorial Fund, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'The David M. Daniels Fund, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Nesto Jacometti, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. Carl O. Schniewind, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Miss Leona Prasse, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. W. L. Tenney and Alice Tenney Mitchell, 1962'), 
            'accession_year'] = '62'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The F.S. Winston Fund, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'GIft of Mr. George A. Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'William Hood Dunwoody Fund Purchase, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'The William M. Ladd Collection, Gift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Winton Jones, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Richard S. and Phyllis Davis, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mrs. Ridgley Hunt, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Dr. and Mrs. Markle Karlen, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1920'), 
            'accession_year'] = '20'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mary H. Taylor, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of John L. Smith Fund, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1952'), 
            'accession_year'] = '52'
df.loc[(df['creditline'] == 'Purchase: July 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Museum Purchase, 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Dr. and Mrs. Markle Karlen, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of the artist, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1929'), 
            'accession_year'] = '29'

In [None]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

In [None]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. Paul Morand, 1932'), 
            'accession_year'] = '32'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Bequest of Margaret McMillan Webber, 1951'), 
            'accession_year'] = '51'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles S. Pillsbury, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of Arthur L. Jenks, August 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Gardner Teall, 1920'), 
            'accession_year'] = '20'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1952 (2nd Biennial)'), 
            'accession_year'] = '52'

In [None]:
df_remainder.creditline.value_counts()

In [None]:
df_grouped.accession_year.value_counts()