In [7]:
import pandas as pd
import requests
from io import StringIO
import numpy as np
import csv

# Define the URL of the CSV file.
url = 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv'

# Read the CSV file into a pandas DataFrame.
df = pd.read_csv(url)

# Filter the DataFrame to include only rows with 'ConstituentID', 'Wiki QID', and 'BeginDate'.
filtered_df = df[(df['ConstituentID'].notnull()) & (df['Wiki QID'].notnull()) & (df['BeginDate'].notnull())]

# Display the filtered DataFrame.
#display(filtered_df)

In [8]:
# Read the CSV file from the Wikidata Query Service into a pandas DataFrame.
df = pd.read_csv('yearanddate.csv') 

# Select only the 'itemLabel' and birth year columns.
selected_columns = df[['itemLabel', 'P2174Value', 'dateOfBirthYear', 'dateOfBirth']]

# Display the DataFrame with the selected columns.
#display(selected_columns)

In [9]:
# Merging the two dataframes on 'ConstituentID' and 'P2174Value'.
merged_df = pd.merge(filtered_df, selected_columns, left_on='ConstituentID', right_on='P2174Value')

# Filtering rows where 'BeginDate' and 'dateOfBirthYear' do not match.
mismatch_df = merged_df[merged_df['BeginDate'] != merged_df['dateOfBirthYear']]

# Filter the DataFrame to remove rows where 'external_value' is "0".
df_mismatch_finder = mismatch_df[mismatch_df['BeginDate'] != 0]

# Displaying the dataframe with mismatched values.
#display(df_mismatch_finder)

In [10]:
# Create a new DataFrame with the specified columns and values.
df_mismatch_moma = pd.DataFrame({
    'item_id': df_mismatch_finder['Wiki QID'],  # Value from 'Wiki QID' column.
    'statement_guid': np.nan,               # Null value.
    'property_id': 'P569',               # Constant value "P2174".
    'wikidata_value': df_mismatch_finder['dateOfBirth'],      # Value from 'dateOfBirth' column.
    'meta_wikidata_value': np.nan,          # Null value.
    'external_value': df_mismatch_finder['BeginDate'].astype(str),        # Value from 'BeginDate' column.
    'external_url': 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv',  # Constant URL value.
    'type': 'statement'                   # Constant value "statement".
})

# Display the new DataFrame.
#display(df_mismatch_moma)

In [11]:
# Specify the file path.
file_path = 'output_moma_birth.csv'

# Write the DataFrame to a CSV file with index=False.
df_mismatch_moma.to_csv(file_path, index=False)

In [12]:
display(df_mismatch_moma)

Unnamed: 0,item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url,type
5,Q153739,,P569,1888-09-16T00:00:00Z,,1886,https://media.githubusercontent.com/media/Muse...,statement
6,Q153739,,P569,1887-09-16T00:00:00Z,,1886,https://media.githubusercontent.com/media/Muse...,statement
60,Q576570,,P569,1860-01-01T00:00:00Z,,1858,https://media.githubusercontent.com/media/Muse...,statement
75,Q325503,,P569,1923-02-26T00:00:00Z,,1915,https://media.githubusercontent.com/media/Muse...,statement
88,Q1298270,,P569,1889-12-15T00:00:00Z,,1888,https://media.githubusercontent.com/media/Muse...,statement
...,...,...,...,...,...,...,...,...
3740,Q16960971,,P569,1955-01-01T00:00:00Z,,1942,https://media.githubusercontent.com/media/Muse...,statement
3817,Q270641,,P569,1944-04-18T00:00:00Z,,1947,https://media.githubusercontent.com/media/Muse...,statement
3836,Q508344,,P569,1864-11-05T00:00:00Z,,1865,https://media.githubusercontent.com/media/Muse...,statement
3900,Q5301405,,P569,1938-04-11T00:00:00Z,,1933,https://media.githubusercontent.com/media/Muse...,statement
