In [1]:
import pandas as pd
import requests
from io import StringIO
import numpy as np
import csv

# Define the URL of the CSV file.
url = 'https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/constituents.csv'

# Read the CSV file into a pandas DataFrame.
df = pd.read_csv(url)

# Filter the DataFrame to include only rows with 'constituentid', 'wikidataid', and 'endyear'.
filtered_df = df[(df['constituentid'].notnull()) & (df['wikidataid'].notnull()) & (df['endyear'].notnull())]

# Display the filtered DataFrame.
#display(filtered_df)

In [2]:
# Read the CSV file from the Wikidata Query Service into a pandas DataFrame.
df = pd.read_csv('ngadeath.csv') 

# Select only the 'itemLabel' and death year columns.
selected_columns = df[['itemLabel', 'P2252Value', 'yearOfDeath', 'date']]

# Display the DataFrame with the selected columns.
#display(selected_columns)

In [3]:
# Merging the two dataframes on 'constituentid' and 'P2252Value'.
merged_df = pd.merge(filtered_df, selected_columns, left_on='constituentid', right_on='P2252Value')

# Filtering rows where 'endyear' and 'yearOfDeath' do not match.
mismatch_df = merged_df[merged_df['endyear'] != merged_df['yearOfDeath']]

# Filter the DataFrame to remove rows where 'external_value' is "0".
df_mismatch_finder = mismatch_df[mismatch_df['endyear'] != 0]

# Displaying the dataframe with mismatched values.
#display(df_mismatch_finder)

In [4]:
# Create a new DataFrame with the specified columns and values.
df_mismatch_nga_death = pd.DataFrame({
    'item_id': df_mismatch_finder['wikidataid'],  # Value from 'Wiki QID' column.
    'statement_guid': np.nan,               # Null value.
    'property_id': 'P570',               # Constant value "P570".
    'wikidata_value': df_mismatch_finder['yearOfDeath'],      # Value from 'dateOfBirth' column.
    'meta_wikidata_value': np.nan,          # Null value.
    'external_value': df_mismatch_finder['endyear'].astype(str),        # Value from 'BeginDate' column.
    'external_url': 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv',  # Constant URL value.
    'type': 'statement'                   # Constant value "statement".
})

# Display the new DataFrame.
#display(df_mismatch_nga_death)

In [5]:
# Specify the file path.
file_path = 'output_nga_death.csv'

# Write the DataFrame to a CSV file with index=False.
df_mismatch_nga_death.to_csv(file_path, index=False)

In [6]:
display(df_mismatch_nga_death)

Unnamed: 0,item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url,type
1,Q725410,,P570,1803.0,,1802.0,https://media.githubusercontent.com/media/Muse...,statement
5,Q553273,,P570,1679.0,,1683.0,https://media.githubusercontent.com/media/Muse...,statement
38,Q809520,,P570,1555.0,,1531.0,https://media.githubusercontent.com/media/Muse...,statement
49,Q21403137,,P570,1884.0,,1913.0,https://media.githubusercontent.com/media/Muse...,statement
62,Q818396,,P570,1518.0,,1517.0,https://media.githubusercontent.com/media/Muse...,statement
...,...,...,...,...,...,...,...,...
9291,Q17537842,,P570,1889.0,,1899.0,https://media.githubusercontent.com/media/Muse...,statement
9292,Q17537842,,P570,1920.0,,1899.0,https://media.githubusercontent.com/media/Muse...,statement
9299,Q52149206,,P570,1701.0,,1700.0,https://media.githubusercontent.com/media/Muse...,statement
9333,Q112716406,,P570,1870.0,,1866.0,https://media.githubusercontent.com/media/Muse...,statement
