In [7]:
import pandas as pd
import requests
from io import StringIO
import numpy as np
import csv

# Define the URL of the CSV file.
url = 'https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/constituents.csv'

# Read the CSV file into a pandas DataFrame.
df = pd.read_csv(url)

# Filter the DataFrame to include only rows with 'constituentid', 'wikidataid', and 'beginyear'.
filtered_df = df[(df['constituentid'].notnull()) & (df['wikidataid'].notnull()) & (df['beginyear'].notnull())]

# Display the filtered DataFrame.
#display(filtered_df)

In [8]:
# Read the CSV file from the Wikidata Query Service into a pandas DataFrame.
df = pd.read_csv('ngayearanddate.csv')

# Select only the 'itemLabel' and birth year columns.
selected_columns = df[['itemLabel', 'P2252Value', 'yearOfBirth', 'date']]

# Display the DataFrame with the selected columns.
#display(selected_columns)

In [9]:
# Merging the two dataframes on 'constituentid' and 'P2252Value'.
merged_df = pd.merge(filtered_df, selected_columns, left_on='constituentid', right_on='P2252Value')

# Filtering rows where 'beginyear' and 'yearOfBirth' do not match.
mismatch_df = merged_df[merged_df['beginyear'] != merged_df['yearOfBirth']]

# Filter the DataFrame to remove rows where 'external_value' is "0".
df_mismatch_finder = mismatch_df[mismatch_df['beginyear'] != 0]

# Displaying the dataframe with mismatched values.
#display(df_mismatch_finder)

In [10]:
# Create a new DataFrame with the specified columns and values.
df_mismatch_nga = pd.DataFrame({
    'item_id': df_mismatch_finder['wikidataid'],  # value from 'Wiki QID' column
    'statement_guid': np.nan,  # Null value
    'property_id': 'P569',  # constant value "P2174"
    'wikidata_value': df_mismatch_finder['yearOfBirth'],  # value from 'dateOfBirth' column
    'meta_wikidata_value': np.nan,  # Null value
    'external_value': df_mismatch_finder['beginyear'].astype(str),  # value from 'BeginDate' column
    'external_url': 'https://media.githubusercontent.com/media/MuseumofModernArt/collection/main/Artists.csv',  # constant URL value
    'type': 'statement'  # constant value "statement"
})

# Display the new DataFrame.
#display(df_mismatch_nga)

In [11]:
# Specify the file path.
file_path = 'output_nga_birth.csv'

# Write the DataFrame to a CSV file with index=False.
df_mismatch_nga.to_csv(file_path, index=False)

In [12]:
display(df_mismatch_nga)

Unnamed: 0,item_id,statement_guid,property_id,wikidata_value,meta_wikidata_value,external_value,external_url,type
0,Q725410,,P569,1760.0,,1755.0,https://media.githubusercontent.com/media/Muse...,statement
12,Q390098,,P569,1360.0,,1369.0,https://media.githubusercontent.com/media/Muse...,statement
13,Q5664,,P569,1390.0,,1395.0,https://media.githubusercontent.com/media/Muse...,statement
14,Q5664,,P569,1387.0,,1395.0,https://media.githubusercontent.com/media/Muse...,statement
16,Q5664,,P569,1400.0,,1395.0,https://media.githubusercontent.com/media/Muse...,statement
...,...,...,...,...,...,...,...,...
10237,Q3057700,,P569,1931.0,,1913.0,https://media.githubusercontent.com/media/Muse...,statement
10243,Q19520420,,P569,1904.0,,1905.0,https://media.githubusercontent.com/media/Muse...,statement
10244,Q63247127,,P569,1921.0,,1922.0,https://media.githubusercontent.com/media/Muse...,statement
10275,Q58032695,,P569,1896.0,,1897.0,https://media.githubusercontent.com/media/Muse...,statement
