In [7]:
import pandas as pd
import requests
from io import StringIO
import numpy as np
import csv

# Define the URL of the CSV file.
url = 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv'

# Read the CSV file into a pandas DataFrame.
df = pd.read_csv(url)

# Filter the DataFrame to include only rows with 'ConstituentID', 'Wiki QID', and 'EndDate'.
filtered_df = df[(df['ConstituentID'].notnull()) & (df['Wiki QID'].notnull()) & (df['EndDate'].notnull())]

# Display the filtered DataFrame.
#display(filtered_df)

In [8]:
# Read the CSV file from the Wikidata Query Service into a pandas DataFrame.
df = pd.read_csv('momadeath.csv')

# Select only the 'itemLabel' and death year columns.
selected_columns = df[['itemLabel', 'P2174Value', 'yearOfDeath', 'dateOfDeath']]

# Display the DataFrame with the selected columns.
#display(selected_columns)

In [9]:
# Merging the two dataframes on 'ConstituentID' and 'P2174Value'.
merged_df = pd.merge(filtered_df, selected_columns, left_on='ConstituentID', right_on='P2174Value')

# Filtering rows where 'EndDate' and 'yearOfDeath' do not match.
mismatch_df = merged_df[merged_df['EndDate'] != merged_df['yearOfDeath']]

# Filter the DataFrame to remove rows where 'external_value' is "0".
df_mismatch_finder = mismatch_df[mismatch_df['EndDate'] != 0]

# Displaying the dataframe with mismatched values.
#display(df_mismatch_finder)

In [10]:
# Create a new DataFrame with the specified columns and values.
df_mismatch_moma = pd.DataFrame({
    'item_id': df_mismatch_finder['Wiki QID'],  # value from 'Wiki QID' column
    'statement_guid': np.nan,  # Null value
    'property_id': 'P570',  # constant value "P570"
    'wikidata_value': df_mismatch_finder['dateOfDeath'],  # value from 'dateOfBirth' column
    'meta_wikidata_value': np.nan,  # Null value
    'external_value': df_mismatch_finder['EndDate'].astype(str),  # value from 'BeginDate' column
    'external_url': 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv',  # constant URL value
    'type': 'statement'  # constant value "statement"
})

# Display the new DataFrame.
#display(df_mismatch_moma)

In [11]:
# Specify the file path.
file_path = 'output_moma_death.csv'

# Write the DataFrame to a CSV file with index=False.
df_mismatch_moma.to_csv(file_path, index=False)

In [12]:
display(df_mismatch_moma)

Unnamed: 0,item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url,type
101,Q71244,,P570,1889-12-22T00:00:00Z,,1882,https://media.githubusercontent.com/media/Muse...,statement
136,Q318352,,P570,1909-01-29T00:00:00Z,,1903,https://media.githubusercontent.com/media/Muse...,statement
138,Q318352,,P570,1909-01-09T00:00:00Z,,1903,https://media.githubusercontent.com/media/Muse...,statement
175,Q2178796,,P570,1934-02-27T00:00:00Z,,1909,https://media.githubusercontent.com/media/Muse...,statement
283,Q2144563,,P570,1986-12-06T00:00:00Z,,1987,https://media.githubusercontent.com/media/Muse...,statement
329,Q570353,,P570,1981-10-23T00:00:00Z,,1983,https://media.githubusercontent.com/media/Muse...,statement
354,Q454568,,P570,1947-03-30T00:00:00Z,,1945,https://media.githubusercontent.com/media/Muse...,statement
381,Q737913,,P570,1991-12-12T00:00:00Z,,1992,https://media.githubusercontent.com/media/Muse...,statement
401,Q96752,,P570,1971-12-31T00:00:00Z,,1972,https://media.githubusercontent.com/media/Muse...,statement
475,Q3760455,,P570,1903-01-01T00:00:00Z,,1902,https://media.githubusercontent.com/media/Muse...,statement
