In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# collections data
csv_file = "resources/Mia_objects_raw.csv"

In [3]:
df = pd.read_csv(csv_file, index_col='Unnamed: 0')
df.head()

Unnamed: 0,accession_number,artist,classification,continent,country,creditline,culture,dated,department,id,life_date,medium,nationality,object_name,provenance,room,style,title
0,10.1,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c.1888-89,Prints and Drawings,0,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Air, from the series The Four Elements"
1,10.2,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,1,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Earth, from the series The Four Elements"
2,10.3,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,2,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",G352,19th century,"Fire, from the series The Four Elements"
3,10.4,Frederick G. Smith; Artist: Formerly attribute...,Drawings,Europe,England,"Gift of Mrs. C. J. Martin, in memory of Charle...",,c. 1888-89,Prints and Drawings,3,"English, 1833 - 1911","Pen and ink, brush and wash over graphite",English,Drawing,"[Art dealer, London, acquired from ""an old hou...",Not on View,19th century,"Water, from the series The Four Elements"
4,13.29,Walter Shirlaw,Drawings,North America,United States,Gift of Mrs. Florence M. Shirlaw,,19th century,Prints and Drawings,4,"American, 1838 - 1909",Watercolor,American,Drawing,,Not on View,19th century,Montana Indian Reservation I


In [4]:
# strip "P" & "p" from prints & drawings
df['accession_number'] = df['accession_number'].str.lstrip('P.')
df['accession_number'] = df['accession_number'].str.lstrip('p.')

# split out first half of accession number
df['accession_year'] = df['accession_number'].str.split('.', n=1, expand=True)[0]

# Acession Number Cleaning

### RBL/RB/X
Adding columns for unknown accession letters

In [5]:
# add 'RBL' column
df['RBL'] = df['accession_year'].str.extract(r'(RBL)', expand=True)
# add 'RB' column
df['RB'] = df['accession_year'].str.extract(r'(RB)', expand=True)
# add 'X' column
df['X'] = df['accession_year'].str.extract(r'(^X)', expand=True)

### L
Adding column for loaned objects

In [6]:
# add loan column
df['loan'] = df['accession_year'].str.extract(r'(^L)', expand=True)

Stripping letters from accession_year

In [7]:
# strip 'RBL' from accession_year column (strips L, RB & RBL)
df['accession_year'] = df['accession_year'].str.lstrip('RBL')

# strip "X" from accession_year column
df['accession_year'] = df['accession_year'].str.lstrip('X')

### Filter out invalid years

Filter dataframe three ways:
- Filter out accession years longer than 4 digits (years go up to 4 digits e.g. 2019)
- Filter out accession numbers shorter than 4 digits (standard accession numbers are in format YY.#)
- Use groupby to filter out any accession years with less than two entries

In [8]:
# replace all empty cells with None
df = df.replace('', None)

In [9]:
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]

In [10]:
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]

In [11]:
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)

### Manually Updating Using Creditline info
Using creditline info, update accession year when possible for objects filtered out of dataframe with invalid accession years

- Pull top entries in remainder dataframes
- Update accession year with information from creditline
- Re-run data filtering code and start again

In [12]:
# starting accession year data for reference
df_grouped.accession_year.value_counts()

99      7717
2013    4006
2003    3731
98      3550
2007    3121
2004    3066
2002    2864
96      2650
2015    2625
2001    2490
2010    2356
97      2306
2005    2180
74      2018
95      1960
2000    1957
2017    1828
2014    1771
2018    1771
2012    1771
82      1680
77      1676
90      1671
2016    1651
2006    1625
81      1580
94      1468
75      1360
2008    1323
47      1295
        ... 
37       167
28       139
14       129
56       127
45       124
44       122
24       108
09       105
46       105
26       105
16       103
20       103
29       100
32        98
43        91
13        77
22        62
55        61
36        59
33        56
59        56
39        54
49        47
60        46
57        45
38        44
53        36
48        36
18        36
52        33
Name: accession_year, Length: 107, dtype: int64

In [13]:
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund, 1966       7573
The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916        4962
Gift of Mrs. Charles C. Bovey, 1924                                      3133
Gift of George A. Goddard, 1919                                           614
Gift of H. V. Jones                                                       518
The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916\r\n     325
Gift of Mrs. George P. Douglas, 1946                                      267
Gift of Herschel V. Jones, 1926                                           231
Gift of Mrs. George P. Douglas, 1929                                      212
Gift of Mrs.C.C.Bovey, 1924                                               163
Name: creditline, dtype: int64

In [14]:
# Update top 10 entries
df.loc[(df['creditline'] == 'The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of George A. Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916\r\n'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1946'), 
            'accession_year'] = '46'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1926'), 
            'accession_year'] = '26'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1929'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Mrs.C.C.Bovey, 1924'), 
            'accession_year'] = '24'

In [15]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916    872
Gift of H. V. Jones                                                  518
Gift of Mr. R. E. Lewis, 1965                                        153
Gift of Herschel V. Jones, 1925                                      137
The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund         114
Gift of Miss A. G. Latham, 1943                                       99
Bequest of Mrs. Charles S. Pillsbury, 1958                            91
Gift of Philip Little in memory of Annie Jeannette Jackson, 1931      84
Gift of Mrs. C.C. Bovey, 1941                                         79
Gift of Mr. George A. Goddard, 1919                                   77
Name: creditline, dtype: int64

In [16]:
# Update top 10 entries
df.loc[(df['creditline'] == 'Gift of Mr. R. E. Lewis, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of Miss A. G. Latham, 1943'), 
            'accession_year'] = '43'
df.loc[(df['creditline'] == 'Bequest of Mrs. Charles S. Pillsbury, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Philip Little in memory of Annie Jeannette Jackson, 1931'), 
            'accession_year'] = '31'
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Mr. George A. Goddard, 1919'), 
            'accession_year'] = '19'

In [17]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(10)

The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916        872
Gift of H. V. Jones                                                      518
The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund             114
Gift of Mrs. C.C. Bovey, 1924                                             74
Gift of Mrs. Philip Little, Jr., 1963                                     69
Gift of Mrs. Philip Little, Jr., 1962                                     65
Gift of Miss Perrie Jones, 1961                                           65
Gift of Mrs. B.J.O Nordfeldt, 1955                                        63
Gift of Howard Mansfield, 1926                                            63
Gift of Ruth Lathrop Sikes in memory of her brother Bruce Sikes, 1967     61
Name: creditline, dtype: int64

In [18]:
# Update top 10 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of Mrs. Philip Little, Jr., 1963'), 
            'accession_year'] = '63'
df.loc[(df['creditline'] == 'Gift of Mrs. Philip Little, Jr., 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Gift of Miss Perrie Jones, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. B.J.O Nordfeldt, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'Gift of Howard Mansfield, 1926'), 
            'accession_year'] = '26'
df.loc[(df['creditline'] == 'Gift of Ruth Lathrop Sikes in memory of her brother Bruce Sikes, 1967'), 
            'accession_year'] = '67'

In [19]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(12)

The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916                                            872
Gift of H. V. Jones                                                                                          518
The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund                                                 114
Gift of Miss Eileen Bigelow and Mrs. O. H. Ingram, in memory of their mother, Mrs. Alice F. Bigelow, 1965     60
Gift of the Artist, 1923                                                                                      58
The Ethel Morrison Van Derlip Fund                                                                            55
The Minnich Collection\r\nThe Ethel Morrison Van DerLip Fund, 1966                                            53
Gift of F.N. Edmonds                                                                                          52
Gift of Miss Katherine Bullard, 1918                                                            

In [21]:
# Update top 12 entries
df.loc[(df['creditline'] == 'Gift of Miss Eileen Bigelow and Mrs. O. H. Ingram, in memory of their mother, Mrs. Alice F. Bigelow, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of the Artist, 1923'), 
            'accession_year'] = '23'
df.loc[(df['creditline'] == 'The Minnich Collection\r\nThe Ethel Morrison Van DerLip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Miss Katherine Bullard, 1918'), 
            'accession_year'] = '18'
df.loc[(df['creditline'] == 'Gift of Funds from Mrs. Franklin M. Crosby Jr. and the William Hood Dunwoody Fund, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1941'), 
            'accession_year'] = '41'

In [22]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts().head(15)

The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916    872
Gift of H. V. Jones                                                  518
The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund         114
The Ethel Morrison Van Derlip Fund                                    55
Gift of F.N. Edmonds                                                  52
Pillsbury Bequest, 1958                                               40
Gift of Mrs. Carl W. Jones in Memory of Her Husband                   38
Gift of Frederick B. Wells                                            38
Gift of Mrs. Darwin R. Martin                                         38
The Ethel Morrison Van Derlip Fund, 1965                              36
Gift of Mrs. George P. Douglas, 1955                                  35
The Christina N. and Swan J. Turnblad Memorial Fund, 1964             35
The William Hood Dunwoody Fund                                        31
Gift of the Estate of Dorothy Millett Lindeke      

In [23]:
# Update top 15 entries
df.loc[(df['creditline'] == 'Pillsbury Bequest, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Mrs. Hiram C. Truesdale, 1927'), 
            'accession_year'] = '27'

In [24]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:19]

Gift of Mrs. Darwin R. Martin                                                               38
Gift of Frederick B. Wells                                                                  38
Gift of Mrs. Carl W. Jones in Memory of Her Husband                                         38
The William Hood Dunwoody Fund                                                              31
Gift of the Estate of Dorothy Millett Lindeke                                               30
The William Hood Dunwoody Fund, 1915                                                        28
Gift of Friends of Art in Minneapolis, 1917                                                 27
Gift of Miss Tessie Jones in memory of her parents, Mr. and Mrs. Herschel V. Jones, 1966    27
Gift of Mrs. George P. Douglas, 1929\r\n                                                    27
Gift of Mrs. Childe Hassam, 1940                                                            24
Gift of Bruce B. Dayton                           

In [25]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'Gift of Mrs. George P. Douglas, 1929\r\n'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Friends of Art in Minneapolis, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Miss Tessie Jones in memory of her parents, Mr. and Mrs. Herschel V. Jones, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Mrs. Childe Hassam, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Ethel Morrison Van DerLip, 1916'), 
            'accession_year'] = '16'

In [28]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:20]

Gift of Frederick B. Wells                             38
Gift of Mrs. Darwin R. Martin                          38
Gift of Mrs. Carl W. Jones in Memory of Her Husband    38
The William Hood Dunwoody Fund                         31
Gift of the Estate of Dorothy Millett Lindeke          30
The William Hood Dunwoody Fund, 1915                   28
Gift of Bruce B. Dayton                                24
Gift of Ethel Morrison Van DerLip, 1916                22
Gift of Mr. and Mrs. Edward A. Foster, 1966            21
Gift of Mr. and Mrs. Edward A. Foster, 1965            21
Goddard, 1919                                          21
Gift of the Estate of Wanda Gag, 1956                  19
Gift of Mrs. George Resler, 1965                       19
Gift of J. B. Neumann, 1962                            19
Purchase: July, 1922                                   19
Name: creditline, dtype: int64

In [29]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edward A. Foster, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edward A. Foster, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of the Estate of Wanda Gag, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'Gift of Mrs. George Resler, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of J. B. Neumann, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Purchase: July, 1922'), 
            'accession_year'] = '22'

In [32]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[5:19]

Gift of Frederick B. Wells                             38
Gift of Mrs. Carl W. Jones in Memory of Her Husband    38
Gift of Mrs. Darwin R. Martin                          38
The William Hood Dunwoody Fund                         31
Gift of the Estate of Dorothy Millett Lindeke          30
The William Hood Dunwoody Fund, 1915                   28
Gift of Bruce B. Dayton                                24
Gift of Ethel Morrison Van DerLip, 1916                22
Gift of The John L. Smith Fund, 1927                   17
Gift of the Artist, 1930                               17
The Martha T. Wallace Memorial Fund, 1930              16
The William Hood Dunwoody Fund, 1946                   16
The John R. Van Derlip Print Fund                      16
Purchase: 1938                                         16
Name: creditline, dtype: int64

In [33]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of The John L. Smith Fund, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of the Artist, 1930'), 
            'accession_year'] = '30'
df.loc[(df['creditline'] == 'The Martha T. Wallace Memorial Fund, 1930'), 
            'accession_year'] = '30'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1946'), 
            'accession_year'] = '46'
df.loc[(df['creditline'] == 'Purchase: 1938'), 
            'accession_year'] = '38'

In [37]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[10:24]

The William Hood Dunwoody Fund, 1915                         28
Gift of Bruce B. Dayton                                      24
Gift of Ethel Morrison Van DerLip, 1916                      22
The John R. Van Derlip Print Fund                            16
Gift of Mrs. C.C. Bovey, 1929                                15
The William M. Ladd Collection, Gift of Herschel V. Jones    15
Gift of the Artist, 1957                                     15
Gift of Mrs. Leo B. Harris, 1965                             15
Gift of J. B. Neumann                                        14
Gift of Herschel V. Jones, 1927                              13
Gift of George A. Goddard                                    13
On Permanent Loan from the State Arts Society                13
Gift of Mr. & Mrs. E. D. Brooks, 1919                        13
The William Hood Dunwoody Fund, 1961                         12
Name: creditline, dtype: int64

In [38]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1929'), 
            'accession_year'] = '29'
df.loc[(df['creditline'] == 'Gift of Mrs. Leo B. Harris, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of the Artist, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of Mr. & Mrs. E. D. Brooks, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1961'), 
            'accession_year'] = '61'

In [41]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[10:28]

The William Hood Dunwoody Fund, 1915                         28
Gift of Bruce B. Dayton                                      24
Gift of Ethel Morrison Van DerLip, 1916                      22
The John R. Van Derlip Print Fund                            16
The William M. Ladd Collection, Gift of Herschel V. Jones    15
Gift of J. B. Neumann                                        14
On Permanent Loan from the State Arts Society                13
Gift of George A. Goddard                                    13
The David Draper Dayton Fund, 1958                           11
Bequest of Mrs. Charles S. PIllsbury, 1958                   11
The Miscellaneous Works of Art Purchase Fund                 11
Purchase: 1922                                               11
Anonymous Gift                                               11
Gift of Dr. Angus Morrison                                   11
Gift of Mrs. Keith Merrill, 1940                             11
Gift of Clement Haupers, 1965           

In [42]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The David Draper Dayton Fund, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Bequest of Mrs. Charles S. PIllsbury, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Purchase: 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Mrs. Keith Merrill, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Clement Haupers, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E. D. Brooks, 1919'), 
            'accession_year'] = '19'

In [48]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[15:33]

Gift of J. B. Neumann                              14
Gift of George A. Goddard                          13
On Permanent Loan from the State Arts Society      13
The Miscellaneous Works of Art Purchase Fund       11
Gift of Dr. Angus Morrison                         11
Anonymous Gift                                     11
Anonymous Gift, 1920                               10
Gift of J.B. Neumann                               10
Gift of Philip W. Pillsbury, 1959                  10
Gift of Mr. and Mrs. Edmund D. Brooks, May 1919    10
Gift of Ethel Morrison Van Derlip, 1916             9
Gift of Mrs. Grace B. Wells, 1954                   9
Gift of Herschel V. Jones                           9
Ethel Morrison Van Derlip Fund, 1965                9
The John R. Van Derlip Fund                         9
Gift of Elmer E. Tafflinger, 1927                   9
Gift of Mr. and Mrs. E.D. Brooks, May 1919          9
Gift of Mrs. C.C. Bovey                             9
Name: creditline, dtype: int

In [49]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Anonymous Gift, 1920'), 
            'accession_year'] = '20'
df.loc[(df['creditline'] == 'Gift of Philip W. Pillsbury, 1959'), 
            'accession_year'] = '59'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Edmund D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Ethel Morrison Van Derlip, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. Grace B. Wells, 1954'), 
            'accession_year'] = '54'
df.loc[(df['creditline'] == 'Ethel Morrison Van Derlip Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Elmer E. Tafflinger, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E.D. Brooks, May 1919'), 
            'accession_year'] = '19'

In [52]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[20:34]

Gift of Dr. Angus Morrison                                   11
Gift of J.B. Neumann                                         10
The John R. Van Derlip Fund                                   9
Gift of Ethel Morrison Van Derlip, 1916                       9
Gift of Mrs. C.C. Bovey                                       9
Gift of Herschel V. Jones                                     9
Estate of Mrs. John Washburn, 1942                            8
The Ethel Morrison Van Derlip Fund, 1961                      8
Gift of Mrs. Ridgely Hunt, 1940                               8
Gift of Mr. Marvin Small, 1961                                8
The William Hood Dundwoody Fund, 1915                         8
The Christina N. and Swan J. Turnblad Memorial Fund, 1966     8
The Ethel Morrison Van Derlip Fund, 1964                      8
Gift of Richard P. Gale, 1956                                 8
Name: creditline, dtype: int64

In [53]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Estate of Mrs. John Washburn, 1942'), 
            'accession_year'] = '42'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. Ridgely Hunt, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Mr. Marvin Small, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The William Hood Dundwoody Fund, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Richard P. Gale, 1956'), 
            'accession_year'] = '56'

In [58]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[25:44]

The John R. Van Derlip Fund                                 9
The William Hood Dundwoody Fund, 1915                       7
Gift of M. H. Taylor, 1927                                  7
Anonymous Gift, 1938                                        7
The Ethel Morrison Van Derlip Fund, 1963                    7
Gift of Mr. Richard P. Gale, 1956                           7
The William Hood Dunwoody Fund, 1941                        7
The John R. Van Derlip Fund, 1966                           6
Gift of Alexander Masley, 1932                              6
The Discretionary Purchase Fund, 1965                       6
Gift of Mr. H. J. L. Wright, 1921                           6
Gift of Mrs. Charles C. Bovey, 1941                         6
Gift of Timothy Cole, August 1919\r\n                       6
Gift of the Artist                                          6
The William Hood Dunwoody Fund, 1947                        6
Gift of the artist, 1966                                    6
The Mode

In [59]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of M. H. Taylor, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'Anonymous Gift, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'The Ethel Morrison Van Derlip Fund, 1963'), 
            'accession_year'] = '63'
df.loc[(df['creditline'] == 'Gift of Mr. Richard P. Gale, 1956'), 
            'accession_year'] = '56'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'The John R. Van Derlip Fund, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Alexander Masley, 1932'), 
            'accession_year'] = '32'
df.loc[(df['creditline'] == 'The Discretionary Purchase Fund, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'Gift of Mr. H. J. L. Wright, 1921'), 
            'accession_year'] = '21'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Timothy Cole, August 1919\r\n'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'Gift of the artist, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Purchase: Dunwoody Fund, 1915'), 
            'accession_year'] = '15'

In [62]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[25:43]

Gift of Mrs. C.C. Bovey                                        9
The William Hood Dundwoody Fund, 1915                          7
The Modernism Collection, gift of Norwest Bank Minnesota       6
Gift of the Artist                                             6
Purchase: Dunwoody Fund, 1915                                  6
Gift of Mrs. Edmund D. Brooks, May 1919                        5
Gift of Mr. Gardner Teall, 1917                                5
The William M. Ladd Collection\r\nGift of Herschel V. Jones    5
Gift of Gardner Teall, 1917                                    5
Gift of Mary C. Wheelwright                                    5
Gift of Herschel V. Jones, 1916                                5
Gift of Mr. and Mrs. E. D. Brooks, May 1919                    5
Gift of Mr. & Mrs. E. D. Brooks, May 1919                      5
Van Derlip Funds                                               5
Gift of Mr. Edmund D. Brooks, 1915                             5
Gift of General Mills, In

In [63]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. Edmund D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. Gardner Teall, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Gardner Teall, 1917'), 
            'accession_year'] = '17'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. E. D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. & Mrs. E. D. Brooks, May 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Mr. Edmund D. Brooks, 1915'), 
            'accession_year'] = '15'
df.loc[(df['creditline'] == 'Gift of General Mills, Inc., 1960'), 
            'accession_year'] = '60'
df.loc[(df['creditline'] == 'Gift of Margaret McMillan Webber, 1951'), 
            'accession_year'] = '51'
df.loc[(df['creditline'] == 'Gift of Benjamin Miller, 1929'), 
            'accession_year'] = '29'

In [68]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[30:53]

Gift of Mary C. Wheelwright                                      5
Van Derlip Funds                                                 5
The William M. Ladd Collection\r\nGift of Herschel V. Jones      5
Gift of Mr. Edmund D. Brooks, 1915                               5
Gift of Herschel V. Jones, 1916                                  5
Gift of Mrs. C. C. Bovey, 1940                                   4
Gift of Mrs. Teall                                               4
Gift of Gardner C. Teall, 1916                                   4
Bequest of Mrs. Charles S. Pillsbury                             4
Gift of Mrs. B.J.O. Nordfeldt, 1955                              4
Gift of Carl Jones, 1952                                         4
Gift of Mr. George A. Goddard                                    4
Gift of Mrs. C.C. Bovey, 1940                                    4
Gift of the Estate of George W. Morgan, 1958                     4
Gift of the Friends of the Institute, 1931                    

In [70]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. C. C. Bovey, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Gardner C. Teall, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mrs. B.J.O. Nordfeldt, 1955'), 
            'accession_year'] = '55'
df.loc[(df['creditline'] == 'Gift of Carl Jones, 1952'), 
            'accession_year'] = '52'
df.loc[(df['creditline'] == 'Gift of Mrs. C.C. Bovey, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of the Estate of George W. Morgan, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of the Friends of the Institute, 1931'), 
            'accession_year'] = '31'
df.loc[(df['creditline'] == 'Gift of funds from Mr. and Mrs. D. Thomas Bergen, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Trufant and Wyman Fund, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs.Charles C.Bovey, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Mr. S. C. Burton, June 5, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of funds from Mr. and Mrs. Hall Peterson, 1966'), 
            'accession_year'] = '66'

In [75]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

Gift of Mrs. C. J. Martin, in memory of Charles Jairus Martin        4
The William Hood Dunwoody Fund, 1919                                 3
Book Funds (?)                                                       3
Gift of the Artist, 1933                                             3
Gift of Mrs. Charles S. Pillsbury, 1938                              3
Van Derlip: Funds                                                    3
Gift of Mrs. Fred Wells, 1954                                        3
Purchase: Discretionary Fund, 1964                                   3
Gift of Mr. and Mrs. John E. Andrus III                              3
Gift of Vera Andrus, 1938                                            3
Gift of Wheelwright, 1937                                            3
Gift of Mr. and Mrs. Arthur Weisenberger, 1961                       3
Gift of Mrs. C. C. Bovey                                             3
The Willaim M. Ladd Collection\r\nGift of Herschel V. Jones, 1916    3
Gift o

In [76]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of the Artist, 1933'), 
            'accession_year'] = '33'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles S. Pillsbury, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'Gift of Mrs. Fred Wells, 1954'), 
            'accession_year'] = '54'
df.loc[(df['creditline'] == 'Purchase: Discretionary Fund, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Gift of Vera Andrus, 1938'), 
            'accession_year'] = '38'
df.loc[(df['creditline'] == 'Gift of Wheelwright, 1937'), 
            'accession_year'] = '37'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Arthur Weisenberger, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The Willaim M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Truesdale, 1927'), 
            'accession_year'] = '27'

In [78]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

Gift of Mr. George A. Goddard              4
Mrs. C.C. Bovey, 1941                      3
Gift of Theodore W. Bennett, 1964          3
The William M. Ladd Collection             3
Gift of Mr. and Mrs. John E. Andrus III    3
Gift of Mrs. B.J.O Nordfeldt               3
Bequest of George W. Morgan, 1958          3
Gift of Mrs. George Resler, 1925           3
Book Funds (?)                             3
Bequest of Dorothy Millett Lindeke         3
Bequest of Putnam Dana McMillan, 1961      3
Gift of Mrs. C.A. Reed, 1922               3
Gift of Eugene Larkin, 1963                3
Bequest of Herschel V. Jones               3
Gift of Mrs. George P. Douglas             3
Name: creditline, dtype: int64

In [79]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Mrs. C.C. Bovey, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Theodore W. Bennett, 1964'), 
            'accession_year'] = '64'
df.loc[(df['creditline'] == 'Bequest of George W. Morgan, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs. George Resler, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Bequest of Putnam Dana McMillan, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'Gift of Mrs. C.A. Reed, 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Eugene Larkin, 1963'), 
            'accession_year'] = '63'

In [85]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[40:55]

Gift of Mrs. Teall                                    4
Gift of Mrs. B.J.O Nordfeldt                          3
Bequest of Herschel V. Jones                          3
Gift of Mr. and Mrs. John E. Andrus III               3
Book Funds (?)                                        3
The William M. Ladd Collection                        3
Van Derlip: Funds                                     3
Bequest of Dorothy Millett Lindeke                    3
Gift of Mrs. George P. Douglas                        3
Gift of Mrs. C. C. Bovey                              3
Gift of the artist, 1941                              2
Gift of Mr. and Mrs. George Halpin, 1958              2
Gift of Mrs. George W. P. Heffelfinger, 1965          2
Research collection, Minneapolis Institute of Arts    2
The John R. Van Derlip Library Fund                   2
Name: creditline, dtype: int64

In [81]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The William Hood Dunwoody Fund, 1950'), 
            'accession_year'] = '50'
df.loc[(df['creditline'] == 'Mr. S. C. Burton, March 31, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Anonymous gift\r\nThe William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Purchase: July 1922'), 
            'accession_year'] = '22'

In [89]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

The William M. Ladd Collection                              3
Van Derlip: Funds                                           3
Bequest of Dorothy Millett Lindeke                          3
Gift of Mrs. George P. Douglas                              3
Gift of Mrs. C. C. Bovey                                    3
Gift of the artist, 1941                                    2
Gift of Mr. and Mrs. George Halpin, 1958                    2
Gift of Mrs. George W. P. Heffelfinger, 1965                2
Research collection, Minneapolis Institute of Arts          2
The John R. Van Derlip Library Fund                         2
The Mrs. Charles S. Pillsbury Memorial Fund, 1962           2
Gift of Frank P. Leslie                                     2
The David M. Daniels Fund, 1967                             2
Gift of Mrs. J.A. Vaughn                                    2
Gift of Nesto Jacometti, 1957                               2
Gift of Mr. Carl O. Schniewind, 1957                        2
Gift of 

In [90]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of the artist, 1941'), 
            'accession_year'] = '41'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. George Halpin, 1958'), 
            'accession_year'] = '58'
df.loc[(df['creditline'] == 'Gift of Mrs. George W. P. Heffelfinger, 1965'), 
            'accession_year'] = '65'
df.loc[(df['creditline'] == 'The Mrs. Charles S. Pillsbury Memorial Fund, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'The David M. Daniels Fund, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Nesto Jacometti, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. Carl O. Schniewind, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Miss Leona Prasse, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mr. W. L. Tenney and Alice Tenney Mitchell, 1962'), 
            'accession_year'] = '62'

In [91]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

Bequest of Herschel V. Jones                                       3
Gift of Mrs. George P. Douglas                                     3
Gift of Mrs. C. C. Bovey                                           3
Gift of Mr. and Mrs. John E. Andrus III                            3
Van Derlip: Funds                                                  3
Gift of Charles Bell                                               2
Research collection, Minneapolis Institute of Arts                 2
The John Russell Van Derlip Library Fund                           2
The F.S. Winston Fund, 1957                                        2
GIft of Mr. George A. Goddard, 1919                                2
William Hood Dunwoody Fund Purchase, 1947                          2
The William M. Ladd Collection, Gift of Herschel V. Jones, 1916    2
Gift of Russell A. Plimpton                                        2
Gift of Mr. and Mrs. Winton Jones, 1967                            2
Gift of Herschel V. Jones, 1919   

In [92]:
# Update next 15 entries
df.loc[(df['creditline'] == 'The F.S. Winston Fund, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'GIft of Mr. George A. Goddard, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'William Hood Dunwoody Fund Purchase, 1947'), 
            'accession_year'] = '47'
df.loc[(df['creditline'] == 'The William M. Ladd Collection, Gift of Herschel V. Jones, 1916'), 
            'accession_year'] = '16'
df.loc[(df['creditline'] == 'Gift of Mr. and Mrs. Winton Jones, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Gift of Richard S. and Phyllis Davis, 1957'), 
            'accession_year'] = '57'
df.loc[(df['creditline'] == 'Gift of Mrs. Ridgley Hunt, 1940'), 
            'accession_year'] = '40'
df.loc[(df['creditline'] == 'Gift of Dr. and Mrs. Markle Karlen, 1966'), 
            'accession_year'] = '66'
df.loc[(df['creditline'] == 'Gift of Herschel V. Jones, 1920'), 
            'accession_year'] = '20'

In [95]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

Gift of Mrs. B.J.O Nordfeldt                          3
Bequest of Herschel V. Jones                          3
Book Funds (?)                                        3
Bequest of Dorothy Millett Lindeke                    3
The William M. Ladd Collection                        3
Gift of Frank P. Leslie                               2
Gift of Mary H. Taylor, 1925                          2
Gift of John L. Smith Fund, 1927                      2
Gift of Russell A. Plimpton                           2
The Miscellaneous Works of Art Purchase Fund, 1952    2
Purchase: July 1919                                   2
Gift of Howard Mansfield                              2
Museum Purchase, 1922                                 2
Gift of Dr. and Mrs. Markle Karlen, 1967              2
Gift of the artist, 1961                              2
The Miscellaneous Works of Art Purchase Fund, 1929    2
Gift of Trinity Parish, New York                      2
The John Russell Van Derlip Library Fund        

In [96]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mary H. Taylor, 1925'), 
            'accession_year'] = '25'
df.loc[(df['creditline'] == 'Gift of John L. Smith Fund, 1927'), 
            'accession_year'] = '27'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1952'), 
            'accession_year'] = '52'
df.loc[(df['creditline'] == 'Purchase: July 1919'), 
            'accession_year'] = '19'
df.loc[(df['creditline'] == 'Museum Purchase, 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Dr. and Mrs. Markle Karlen, 1967'), 
            'accession_year'] = '67'
df.loc[(df['creditline'] == 'Gift of the artist, 1961'), 
            'accession_year'] = '61'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1929'), 
            'accession_year'] = '29'

In [97]:
# rerun filters on updated dataframe
# filter dataframe for accession years longer than 4 digits
df_1 = df[df['accession_year'].map(lambda x: len(x) < 5)]
df_2 = df[df['accession_year'].map(lambda x: len(x) > 4)]
# filter dataframe for accession numbers shorter than 4 digits
df_3 = df_1[df_1['accession_number'].map(lambda x: len(x) > 3)]
df_4 = df_1[df_1['accession_number'].map(lambda x: len(x) < 4)]
# Use groupby to filter out any accession years with less than 30 entries
df_grouped = df_3.groupby('accession_year').filter(lambda x: len(x) > 30)
df_remainder = df_3.groupby('accession_year').filter(lambda x: len(x) <= 30)
# combine df_2, df_4, and df_remainder into one larger dataframe
df_remainder = df_remainder.append([df_2,df_4])
df_remainder.creditline.value_counts()[45:65]

Gift of Mrs. B.J.O Nordfeldt                                         3
The William M. Ladd Collection                                       3
Van Derlip: Funds                                                    3
Bequest of Herschel V. Jones                                         3
Gift of Mrs. George P. Douglas                                       3
Gift of Mrs. Ridgely Hunt                                            2
Gift of Mrs. Paul Morand, 1932                                       2
The Christina N. and Swan J. Turnblad Memorial Fund, 1962            2
Gift of Frank P. Leslie                                              2
Library Duplicate Fund                                               2
Bequest of Margaret McMillan Webber, 1951                            2
Gift of William Channing Whitney                                     2
Gift of Trinity Parish, New York                                     2
Bequest of the Sawyer Estate                                         2
Gift o

In [98]:
# Update next 15 entries
df.loc[(df['creditline'] == 'Gift of Mrs. Paul Morand, 1932'), 
            'accession_year'] = '32'
df.loc[(df['creditline'] == 'The Christina N. and Swan J. Turnblad Memorial Fund, 1962'), 
            'accession_year'] = '62'
df.loc[(df['creditline'] == 'Bequest of Margaret McMillan Webber, 1951'), 
            'accession_year'] = '51'
df.loc[(df['creditline'] == 'Gift of Mrs. Charles S. Pillsbury, 1924'), 
            'accession_year'] = '24'
df.loc[(df['creditline'] == 'Gift of Arthur L. Jenks, August 1922'), 
            'accession_year'] = '22'
df.loc[(df['creditline'] == 'Gift of Gardner Teall, 1920'), 
            'accession_year'] = '20'
df.loc[(df['creditline'] == 'The Miscellaneous Works of Art Purchase Fund, 1952 (2nd Biennial)'), 
            'accession_year'] = '52'

In [99]:
df_remainder.creditline.value_counts()

The William M. Ladd Collection\r\nGift of Herschel V. Jones, 1916               872
Gift of H. V. Jones                                                             518
The Minnich Collection\r\nThe Ethel Morrison Van Derlip Fund                    114
The Ethel Morrison Van Derlip Fund                                               55
Gift of F.N. Edmonds                                                             52
Gift of Frederick B. Wells                                                       38
Gift of Mrs. Carl W. Jones in Memory of Her Husband                              38
Gift of Mrs. Darwin R. Martin                                                    38
The William Hood Dunwoody Fund                                                   31
Gift of the Estate of Dorothy Millett Lindeke                                    30
The William Hood Dunwoody Fund, 1915                                             28
Gift of Bruce B. Dayton                                                     

In [100]:
df_grouped.accession_year.value_counts()

66      8187
99      7717
16      4529
2013    4006
2003    3731
98      3550
24      3491
2007    3121
2004    3066
2002    2864
96      2650
2015    2625
2001    2490
2010    2356
97      2306
2005    2180
74      2018
95      1960
2000    1957
2017    1828
2018    1771
2014    1771
2012    1771
82      1680
77      1676
90      1671
2016    1651
2006    1625
81      1580
94      1468
        ... 
63       290
62       272
54       202
43       190
56       184
15       172
35       171
37       170
55       163
28       139
14       129
45       124
44       122
20       115
09       105
32       104
22       100
18        84
57        81
13        77
38        73
59        66
36        59
33        59
39        54
60        51
49        47
52        39
48        36
53        36
Name: accession_year, Length: 108, dtype: int64