### Chapter 7

In [18]:
# I chose the Museum data.

import pandas as pd

MetData = pd.read_csv("MetObjects.csv", low_memory=False)

print(MetData.shape)


(477804, 54)


In [22]:
# Preview data
MetData.head(5)

Unnamed: 0,Object Number,Is Highlight,Is Timeline Work,Is Public Domain,Object ID,Gallery Number,Department,AccessionYear,Object Name,Title,...,River,Classification,Rights and Reproduction,Link Resource,Object Wikidata URL,Metadata Date,Repository,Tags,Tags AAT URL,Tags Wikidata URL
0,1979.486.1,False,False,False,1,,The American Wing,1979,Coin,One-dollar Liberty Head Coin,...,,,,http://www.metmuseum.org/art/collection/search/1,,,"Metropolitan Museum of Art, New York, NY",,,
1,1980.264.5,False,False,False,2,,The American Wing,1980,Coin,Ten-dollar Liberty Head Coin,...,,,,http://www.metmuseum.org/art/collection/search/2,,,"Metropolitan Museum of Art, New York, NY",,,
2,67.265.9,False,False,False,3,,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,...,,,,http://www.metmuseum.org/art/collection/search/3,,,"Metropolitan Museum of Art, New York, NY",,,
3,67.265.10,False,False,False,4,,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,...,,,,http://www.metmuseum.org/art/collection/search/4,,,"Metropolitan Museum of Art, New York, NY",,,
4,67.265.11,False,False,False,5,,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,...,,,,http://www.metmuseum.org/art/collection/search/5,,,"Metropolitan Museum of Art, New York, NY",,,


In [23]:
# Replace missing values with "NA"
MetDataFilled = MetData.fillna("NA")
print(MetData.isnull().sum())

Object Number                   0
Is Highlight                    0
Is Timeline Work                0
Is Public Domain                0
Object ID                       0
Gallery Number             426028
Department                      0
AccessionYear                3556
Object Name                  1691
Title                       29185
Culture                    270425
Period                     386848
Dynasty                    454571
Reign                      466578
Portfolio                  454274
Constituent ID             202269
Artist Role                204368
Artist Prefix              202269
Artist Display Name        202269
Artist Display Bio         204368
Artist Suffix              202317
Artist Alpha Sort          202269
Artist Nationality         202269
Artist Begin Date          202269
Artist End Date            202269
Artist Gender              374743
Artist ULAN URL            255783
Artist Wikidata URL        260072
Object Date                 13867
Object Begin D

In [25]:
# let's identify duplicat object numbers and remove them

CheckDupes = MetData.drop_duplicates(subset='Object Number')
duplicates = CheckDupes['Object Number'].duplicated()

print(duplicates)

0         False
1         False
2         False
3         False
4         False
          ...  
477799    False
477800    False
477801    False
477802    False
477803    False
Name: Object Number, Length: 474872, dtype: bool


### Chapter 8 & 10

In [44]:
# I would like to combine columns in different variables and then combined them into one new one. 
# Using a def- it's probably the long way around doing this. 

def combine_cols(MetData):
    return str(MetData['Object Number']) + '-' + str(MetData['Department'])
DepartmentData = MetData.apply(combine_cols, axis=1)
DepartmentData.head(10)

0    1979.486.1-The American Wing
1    1980.264.5-The American Wing
2      67.265.9-The American Wing
3     67.265.10-The American Wing
4     67.265.11-The American Wing
5     67.265.12-The American Wing
6     67.265.13-The American Wing
7     67.265.14-The American Wing
8     67.265.15-The American Wing
9    1979.486.3-The American Wing
dtype: object

In [57]:
# Adding headers to column
df = pd.DataFrame(DepartmentData, columns=['Object & Dept.'])
df.head(5)

Unnamed: 0,Object & Dept.
0,1979.486.1-The American Wing
1,1980.264.5-The American Wing
2,67.265.9-The American Wing
3,67.265.10-The American Wing
4,67.265.11-The American Wing


In [49]:
# Adding Acession Yeat to a new variable

df2 = pd.DataFrame(MetData, columns=['AccessionYear'])
df2.head(5)

Unnamed: 0,AccessionYear
0,1979
1,1980
2,1967
3,1967
4,1967


In [58]:
# Adding Object & Dept with year acquired
df['Year Acquired'] = df2
df.head(5)

Unnamed: 0,Object & Dept.,Year Acquired
0,1979.486.1-The American Wing,1979
1,1980.264.5-The American Wing,1980
2,67.265.9-The American Wing,1967
3,67.265.10-The American Wing,1967
4,67.265.11-The American Wing,1967


In [61]:
# Now I would like to pivot and count objects by department
DepartmentObjectCount = MetData.pivot_table(index='AccessionYear', values='Object Number', aggfunc='count')

DepartmentObjectCount.head(5)

Unnamed: 0_level_0,Object Number
AccessionYear,Unnamed: 1_level_1
1870,1
1871,60
1872,4
1873,66
1874,5396


In [68]:
# Lets Group by Department and find the latest year object was acquired

grouped = MetData.groupby('Object Name').max('AccessionYear')

# Print the result
grouped.head(5)

Unnamed: 0_level_0,Is Highlight,Is Timeline Work,Is Public Domain,Object ID,Object Begin Date,Object End Date,Metadata Date
Object Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"""Autophone"" Organette",False,False,True,504965,1878,1878,
"""Basso""",False,False,True,504735,1850,1900,
"""Chanot Model"" Violin",False,False,False,504299,1800,1899,
"""Humantone"" Nose Flute",False,False,False,504178,1941,1999,
"""Japanese Fiddle""",False,False,True,500679,1801,1900,


### Chapter 11

In [74]:
# practicing generating frequencies based on the accession year
date_range = pd.date_range(start='1900-01-01', end='2022-12-31', freq='M')
date_range


DatetimeIndex(['1900-01-31', '1900-02-28', '1900-03-31', '1900-04-30',
               '1900-05-31', '1900-06-30', '1900-07-31', '1900-08-31',
               '1900-09-30', '1900-10-31',
               ...
               '2022-03-31', '2022-04-30', '2022-05-31', '2022-06-30',
               '2022-07-31', '2022-08-31', '2022-09-30', '2022-10-31',
               '2022-11-30', '2022-12-31'],
              dtype='datetime64[ns]', length=1476, freq='M')

In [79]:
# converting the AccessionYear column to datetime
MetData['AccessionYear2'] = pd.to_datetime(MetData['AccessionYear'], format='%Y')
MetData.head(5)

ValueError: unconverted data remains: -02-15