#FactGrid QuickStatment Processing

Notebook by Melinee Her

Preparing the cleaned CDLI and ORACC data for uploading to FactGrid


# Mount Google Drive folder + imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#any necessary imports
import pandas as pd
import zipfile
from zipfile import ZipFile
import json
import requests
from tqdm import tqdm
import os
import errno
import re
import random
import numpy as np
import sys
import copy
import networkx as nx
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import collections

#Set folder for remote drive
#folder = '/content/drive/My Drive/FactGrid Cuneiform (AWCA)/people/Melinee/'
folder = '/content/drive/MyDrive/Melinee/'

#importing utils for the method which downloads the current text json files
os.chdir(folder + 'network/utils/')
from utils import oracc_download

# This is a user defined module that searches through the texts to find the entities in the text that
# are people and places, to be imported as nodes into the network
os.chdir(folder + 'network/')
import rank_parser4 as rp

pd.set_option('display.max_columns', None)

# Prepare for __FactGrid__ QuickStatement batch processing

Once the `all_merged` df is properly joined together, the last step before we export the resulting CSV is to include the FactGrid statements into the data frame, which we will call `factgrid_df`.

Most of this process is straightforward, but the one statement we need to create is a __description__ for each object.

###1 To do so we will use a couple of the cleaned up fields from `all_merged` df above and add a new field with header 'Den' which will join three existing columns together to form a descriptive sentence.

In [None]:
#import all_merged as den_df
den_df = pd.read_csv(folder + '/ORACC_DFS/all_merged.csv', low_memory=False, index_col=0)

In [None]:
#fill null values with ''
den_df.fillna('', inplace = True)
den_df = den_df.rename({'Len_x':'place_x', 'Len_y':'place_y'}, axis = 1) #because we create a len category later on, we need to rename it to place
den_df

Unnamed: 0,id_text,first_lang_x,first_lang_y,first_lang_qid_x,first_lang_qid_y,first_lang_parent_x,first_lang_parent_y,P155_lang_x,P155_lang_y,other_lang_x,other_lang_y,material_x,material_y,first_mat_x,first_mat_y,first_mat_label_x,first_mat_label_y,first_mat_qid_x,first_mat_qid_y,P155_mat_x,P155_mat_y,other_mat_x,other_mat_y,collection_x,collection_y,collection_qid_x,collection_qid_y,collection_wikidata_id_x,collection_wikidata_id_y,collection_name_x,collection_name_y,collection_name_native_x,collection_name_native_y,collection_url_x,collection_url_y,wiki_url_eng_x,wiki_url_eng_y,wiki_url_native_x,wiki_url_native_y,P155_collection_x,P155_collection_y,other_collection_x,other_collection_y,provenience_FG_qid_x,provenience_FG_qid_y,provenience_x,provenience_y,excavation_no_x,excavation_no_y,museum_no_x,museum_no_y,findspot_square_x,findspot_square_y,ancientplace_x,ancientplace_y,place_x,place_y,object_type_x,object_type2_x,object_type_y,object_type2_y,object_FG_qid_x,object_FG_qid_y,object_FG_Label_x,object_FG_Label_y,genre_x,genre_y,genre_qid_x,genre_qid_y,genre_current_count_x,genre_current_count_y,P155_genre_x,P155_genre_y,other_genre_x,other_genre_y,period_x,period_y,period_qid_x,period_qid_y,period_wikidata_URI_x,period_wikidata_URI_y,P155_period_x,P155_period_y,other_possible_period_x,other_possible_period_y,designation_x,designation_y
0,P000001,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,a",,VAT 01533,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 065","W 06435,a"
1,P000002,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,b",,VAT 15263,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 066","W 06435,b"
2,P000003,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09123,d",,VAT 15253,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"ATU 3, pl. 081, W 9123,d","W 09123,d"
3,P000004,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09169,d",,VAT 15168,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"CDLI Lexical 000002, ex. 051","W 09169,d"
4,P000005,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09206,k",,VAT 15153,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"CDLI Lexical 000002, ex. 172","W 09206,k"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366911,X096677,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,BM 096677
366912,X201001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iraq 82 129
366913,X201002,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Iraq 82 133
366914,X225104,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Unknown,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"OB Contracts, pl. D4 no. 24"


###2 For each object with a 'cdli_id' + 'designation' we will make the description by joining the following fields:
 * 'lang' + ( and 'lang2') [if questionable, exclude 'lang']
 * 'genre' +
 * 'object' +
 * " from " +
 * 'provenience = Len' + (the CDLI 'provenience' field has many missing fields, so we will use the 'Len' field instead)
 * ", dated to " +
 * 'period' +
 * " and currently held in the" +
 * 'collection'



###3 The result should look something like this: __"Sumerian administrative tablet from Girsu, dated to Ur III (ca. 2100-2000 BC) and currently held in the British Museum, London, UK"__



In [None]:
den = []

for i in range(den_df.shape[0]):

  d = ''

  ####### language ########

  # only includes lang if first_lang is not questionable
  if den_df.loc[i, 'P155_lang_x'] == '':
    first_l = den_df['first_lang_x'][i].capitalize()
    d = d + first_l

  # check if other language exists
  if den_df.loc[i, 'other_lang_x'] != '':
    # split multiple other languages
    other_l_lst = den_df.loc[i, 'other_lang_x'].split(',')

    # if only one other language
    if len(other_l_lst) == 1:
      other_l = other_l_lst[0]
      if '?' in other_l:
        other_l = other_l.replace('?', '').rstrip().capitalize()
        d = d + ' and possibly ' + other_l
      else:
        d = d + ' and ' + other_l.capitalize()

    # if more than one other languages
    else:
      # for all but the last one
      for other_l in other_l_lst[0:(len(other_l_lst)-1)]:
        # remove spaces
        other_l = other_l.replace(' ', '')
        if '?' in other_l:
          other_l = other_l.replace('?', '').rstrip().capitalize()
          d = d + ', possibly ' + other_l
        else:
          d = d + ', ' + other_l.capitalize()
      # for the last one
      last_other_l = other_l_lst[-1]
      if '?' in last_other_l:
        last_other_l = last_other_l.replace('?', '').rstrip().capitalize()
        d = d + ', and possibly ' + last_other_l
      else:
        d = d + ', and ' + last_other_l.capitalize()


  ######## genre ########

  g = den_df.loc[i, 'genre_x'].lower()

  # if genre is questionable
  if den_df.loc[i, 'P155_genre_x'] != '':
    # if genre starts the sentence
    if d == '':
      d = d + g.capitalize() + ' (?)'
    else:
      d = d + ' ' + g + ' (?)'

  # if genre is certain
  else:
    # if genre starts the sentence
    if d == '':
      d = d + g.capitalize()
    else:
      d = d + ' ' + g


  ######## object ########

  o = den_df.loc[i,'object_type_x'].lower()

  # if object is questionable
  if o == '':
    if len(d) == 0:
      d = d + 'An '
    d = d + 'object of an unknown type'
  else:
    d = d + ' ' + o

  ######## provenience ########

  p = den_df.loc[i, 'place_x']

  if p == '':
    d = d + ' from an unspecified location'
  else:
    d = d + ' found in the ' + p + ' provenience'


  ######## if the rest of the sentence is empty then leave off the rest of the description ######
  if den_df.loc[i, 'period_x'] == '' and den_df.loc[i,'collection_x'] == '':
    d = d + '.'
    den += [d]
    continue;

  ######## period ########

  # if period is questionable
  if den_df.loc[i, 'P155_period_x'] != '':
    d = d + ', dated to ' + den_df.loc[i, 'period_x'] + ' (?)'
  else:
    if den_df.loc[i, 'period_x'] != '':
      d = d + ', dated to ' + den_df.loc[i, 'period_x']
    else:
      d = d + ', dated to an unspecified period'

  ######## collection ########
  # if the collection is questionable
  if den_df.loc[i, 'P155_collection_x'] != '':
    d = d + ' and currently held in the ' + den_df.loc[i, 'P155_collection_x'] + ' (?)'
  else:
    if den_df.loc[i,'collection_x'] != '':
      d = d + ' and currently held in the ' + den_df.loc[i, 'collection_x']

  d = d.capitalize() + '.'
  den += [d]

In [None]:
pd.unique(pd.Series(den))

array(['Lexical tablet found in the warkāʼ provenience, dated to uruk iii (ca. 3200-3000 bc) and currently held in the vorderasiatisches museum, berlin, germany.',
       'Lexical tablet found in the warkāʼ provenience, dated to uruk iv (ca. 3350-3200 bc) and currently held in the vorderasiatisches museum, berlin, germany.',
       'Lexical tablet found in the warkāʼ provenience, dated to uruk iii (ca. 3200-3000 bc) and currently held in the national museum of iraq, baghdad, iraq.',
       ..., ' bowl (with sculpture) from an unspecified location.',
       ' beaker from an unspecified location.',
       ' bronze cross from an unspecified location.'], dtype=object)

In [None]:
den_df['Den'] = den
den_df.head()

Unnamed: 0,id_text,first_lang_x,first_lang_y,first_lang_qid_x,first_lang_qid_y,first_lang_parent_x,first_lang_parent_y,P155_lang_x,P155_lang_y,other_lang_x,other_lang_y,material_x,material_y,first_mat_x,first_mat_y,first_mat_label_x,first_mat_label_y,first_mat_qid_x,first_mat_qid_y,P155_mat_x,P155_mat_y,other_mat_x,other_mat_y,collection_x,collection_y,collection_qid_x,collection_qid_y,collection_wikidata_id_x,collection_wikidata_id_y,collection_name_x,collection_name_y,collection_name_native_x,collection_name_native_y,collection_url_x,collection_url_y,wiki_url_eng_x,wiki_url_eng_y,wiki_url_native_x,wiki_url_native_y,P155_collection_x,P155_collection_y,other_collection_x,other_collection_y,provenience_FG_qid_x,provenience_FG_qid_y,provenience_x,provenience_y,excavation_no_x,excavation_no_y,museum_no_x,museum_no_y,findspot_square_x,findspot_square_y,ancientplace_x,ancientplace_y,place_x,place_y,object_type_x,object_type2_x,object_type_y,object_type2_y,object_FG_qid_x,object_FG_qid_y,object_FG_Label_x,object_FG_Label_y,genre_x,genre_y,genre_qid_x,genre_qid_y,genre_current_count_x,genre_current_count_y,P155_genre_x,P155_genre_y,other_genre_x,other_genre_y,period_x,period_y,period_qid_x,period_qid_y,period_wikidata_URI_x,period_wikidata_URI_y,P155_period_x,P155_period_y,other_possible_period_x,other_possible_period_y,designation_x,designation_y,Den
0,P000001,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,a",,VAT 01533,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 065","W 06435,a",Lexical tablet found in the warkāʼ provenience...
1,P000002,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,b",,VAT 15263,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 066","W 06435,b",Lexical tablet found in the warkāʼ provenience...
2,P000003,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09123,d",,VAT 15253,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"ATU 3, pl. 081, W 9123,d","W 09123,d",Lexical tablet found in the warkāʼ provenience...
3,P000004,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09169,d",,VAT 15168,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"CDLI Lexical 000002, ex. 051","W 09169,d",Lexical tablet found in the warkāʼ provenience...
4,P000005,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09206,k",,VAT 15153,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"CDLI Lexical 000002, ex. 172","W 09206,k",Lexical tablet found in the warkāʼ provenience...


Creating the 'Len' for the dataframe

In [None]:
den_df['object_type_x'] = [x.capitalize() for x in den_df['object_type_x']]

lst = []

for i in range(den_df.shape[0]):
  sentence = 'Cuneiform'
  if den_df['object_type_x'][i] != '':
    sentence += ' ' + den_df['object_type_x'][i]
  else:
    sentence += ' artifact'
  sentence += ' ' + den_df['designation_x'][i]
  lst.append(sentence)

Len = pd.DataFrame({'Len' : lst})
Len

Unnamed: 0,Len
0,"Cuneiform Tablet CDLI Lexical 000002, ex. 065"
1,"Cuneiform Tablet CDLI Lexical 000002, ex. 066"
2,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d"
3,"Cuneiform Tablet CDLI Lexical 000002, ex. 051"
4,"Cuneiform Tablet CDLI Lexical 000002, ex. 172"
...,...
366911,Cuneiform artifact
366912,Cuneiform artifact
366913,Cuneiform artifact
366914,Cuneiform artifact


In [None]:
den_df['Len'] = Len
den_df.head(3)

Unnamed: 0,id_text,first_lang_x,first_lang_y,first_lang_qid_x,first_lang_qid_y,first_lang_parent_x,first_lang_parent_y,P155_lang_x,P155_lang_y,other_lang_x,other_lang_y,material_x,material_y,first_mat_x,first_mat_y,first_mat_label_x,first_mat_label_y,first_mat_qid_x,first_mat_qid_y,P155_mat_x,P155_mat_y,other_mat_x,other_mat_y,collection_x,collection_y,collection_qid_x,collection_qid_y,collection_wikidata_id_x,collection_wikidata_id_y,collection_name_x,collection_name_y,collection_name_native_x,collection_name_native_y,collection_url_x,collection_url_y,wiki_url_eng_x,wiki_url_eng_y,wiki_url_native_x,wiki_url_native_y,P155_collection_x,P155_collection_y,other_collection_x,other_collection_y,provenience_FG_qid_x,provenience_FG_qid_y,provenience_x,provenience_y,excavation_no_x,excavation_no_y,museum_no_x,museum_no_y,findspot_square_x,findspot_square_y,ancientplace_x,ancientplace_y,place_x,place_y,object_type_x,object_type2_x,object_type_y,object_type2_y,object_FG_qid_x,object_FG_qid_y,object_FG_Label_x,object_FG_Label_y,genre_x,genre_y,genre_qid_x,genre_qid_y,genre_current_count_x,genre_current_count_y,P155_genre_x,P155_genre_y,other_genre_x,other_genre_y,period_x,period_y,period_qid_x,period_qid_y,period_wikidata_URI_x,period_wikidata_URI_y,P155_period_x,P155_period_y,other_possible_period_x,other_possible_period_y,designation_x,designation_y,Den,Len
0,P000001,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,a",,VAT 01533,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,Tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 065","W 06435,a",Lexical tablet found in the warkāʼ provenience...,"Cuneiform Tablet CDLI Lexical 000002, ex. 065"
1,P000002,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 06435,b",,VAT 15263,,"M XVIII,?",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,Tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk III (ca. 3200-3000 BC),,Q512134,,Q114877803,,,,,,"CDLI Lexical 000002, ex. 066","W 06435,b",Lexical tablet found in the warkāʼ provenience...,"Cuneiform Tablet CDLI Lexical 000002, ex. 066"
2,P000003,,,,,,,,,,,clay,clay,clay,clay,clay,clay,Q471153,Q471153,,,,,"Vorderasiatisches Museum, Berlin, Germany","Vorderasiatisches Museum, Berlin, Germany",Q510658,Q510658,Q542084,Q542084,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,Vorderasiatisches Museum,https://www.smb.museum/museen-einrichtungen/vo...,https://www.smb.museum/museen-einrichtungen/vo...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://en.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,https://de.wikipedia.org/wiki/Vorderasiatische...,,,,,Q390030,,Uruk (mod. Warka),Warka (Uruk),"W 09123,d",,VAT 15253,,"Qa XVI,2",,https://database.factgrid.de/entity/Q390030,,Warkāʼ,,Tablet,,tablet,,Q512006,Q512006,Clay tablet,Clay tablet,Lexical,Lexical,Q537054,Q537054,8273,8273,,,,,Uruk IV (ca. 3350-3200 BC),,Q512132,,Q114877809,,,,,,"ATU 3, pl. 081, W 9123,d","W 09123,d",Lexical tablet found in the warkāʼ provenience...,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d"


In [None]:
#export den_df
den_df.to_csv(folder+'ORACC_DFS/den_df.csv')

###4 The final resulting data frame (`factgrid_df`) will include the following fields, with data from `all_merged_df` in single quotation marks (' '). This includes a total of 15 columns new columns added at the beginning of the data frame:





In [None]:
#import den_df
den_df = pd.read_csv(folder + '/ORACC_DFS/den_df.csv', low_memory=False, index_col=0)

#### 4.1 Initial fields with __description__:

| qid | Len | Den | P2 | P131 | id_text_P692 |
| --- | --- | --- | ---- | --- | --- |
| (blank) | 'Cuneiform Tablet `designation`' | __description__ | [Q512006](https://database.factgrid.de/wiki/Item:Q512006) (object type) | [Q389597](https://database.factgrid.de/wiki/Item:Q389597) (for all) | (id_text) |

`len`: language in english

`den`: description

For each object with a 'cdli_id' + 'designation' we will make the description by joining the following fields:

'lang' +

'genre' +

'object' +

" from " +

'provenience = Len' + (the CDLI 'provenience' field has many missing fields, so we will use the 'Len' field instead)

", dated to " +

'period' +

" and currently held in the" +
'collection'

In [None]:
# get existing columns we need
ini = den_df[['Len', 'Den', 'id_text']]

# add columns we need
ini['qid'] = np.empty(ini.shape[0])
#ini['P2'] = ['Q390181'] * ini.shape[0]
ini['P121'] = ['Q389597'] * ini.shape[0]

# rename the columns accordingly
ini = ini.rename(columns={'first_lang_x': 'Len',
                          'den': 'Den',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
ini = ini[['qid', 'Len', 'Den', 'P121', 'id_text_P692']]

ini

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ini['qid'] = np.empty(ini.shape[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ini['P121'] = ['Q389597'] * ini.shape[0]


Unnamed: 0,qid,Len,Den,P121,id_text_P692
0,5.017885e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 065",Lexical tablet found in the warkāʼ provenience...,Q389597,P000001
1,5.017884e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 066",Lexical tablet found in the warkāʼ provenience...,Q389597,P000002
2,5.017885e-310,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d",Lexical tablet found in the warkāʼ provenience...,Q389597,P000003
3,5.017884e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 051",Lexical tablet found in the warkāʼ provenience...,Q389597,P000004
4,0.000000e+00,"Cuneiform Tablet CDLI Lexical 000002, ex. 172",Lexical tablet found in the warkāʼ provenience...,Q389597,P000005
...,...,...,...,...,...
366911,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,Q389597,X096677
366912,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,Q389597,X201001
366913,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,Q389597,X201002
366914,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,Q389597,X225104


In [None]:
ini.to_csv(folder + 'ORACC_DFS/fields/ini.csv')

#### 4.2 __language_df_final__ (continued):

|id_text_P692| Language | P18 | qal155 | P18_2 | qal18 |
|--| ----- |------ | ---- | ---- | --- |
|(id_text)| Akkadian (for example) | [Q471146](https://database.factgrid.de/wiki/Item:Q471146) | [Q22757](https://database.factgrid.de/wiki/Item:Q22757) (if questionable)| Sumerian (for example) |[Q471149](https://database.factgrid.de/wiki/Item:Q471149) (for example)|

* For those with multiple language statements, we can save the second for batch 2
* For those with questionable statements, we can save them for batch 2+ as well



In [None]:
# get existing columns we need
language_df_final = den_df[['first_lang_x', 'first_lang_qid_x', 'P155_lang_x',
                            'other_lang_x', 'id_text']]

# rename the columns accordingly
language_df_final = language_df_final.rename(columns={'first_lang_x': 'Language',
                          'first_lang_qid_x': 'P18',
                          'P155_lang_x': 'qal155_lang',
                          'other_lang_x': 'Lang2',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
language_df_final = language_df_final[['id_text_P692', 'Language', 'P18',
                                       'qal155_lang', 'Lang2']]

language_df_final

Unnamed: 0,id_text_P692,Language,P18,qal155_lang,Lang2
0,P000001,,,,
1,P000002,,,,
2,P000003,,,,
3,P000004,,,,
4,P000005,,,,
...,...,...,...,...,...
366911,X096677,,,,
366912,X201001,,,,
366913,X201002,,,,
366914,X225104,,,,


In [None]:
language_df_final.to_csv(folder + 'ORACC_DFS/fields/language_df_final.csv')

#### 4.3 __material_df__ (continued):

|id_text_P692| Material composition | P401 |
|--| ------ | ---- |
|(id_text)|clay (for example) | [Q471153](https://database.factgrid.de/wiki/Item:Q471153) |



In [None]:
# get existing columns we need
material_df_final = den_df[['first_mat_x', 'first_mat_qid_x', 'P155_mat_x', 'id_text']]

# rename the columns accordingly
material_df_final = material_df_final.rename(columns={'first_mat_x': 'Material composition',
                          'first_mat_qid_x': 'P401',
                          'P155_mat_x': 'qal155_mat',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
material_df_final = material_df_final[['id_text_P692', 'Material composition',
                                       'P401','qal155_mat']]

material_df_final

Unnamed: 0,id_text_P692,Material composition,P401,qal155_mat
0,P000001,clay,Q471153,
1,P000002,clay,Q471153,
2,P000003,clay,Q471153,
3,P000004,clay,Q471153,
4,P000005,clay,Q471153,
...,...,...,...,...
366911,X096677,,,
366912,X201001,,,
366913,X201002,,,
366914,X225104,,,


In [None]:
material_df_final.to_csv(folder + 'ORACC_DFS/fields/material_df_final.csv')

#### 4.4 __museum_df__ (continued):

|id_text_P692| Present holding | P329 | [qal10](https://database.factgrid.de/wiki/Item:Q499887#) |
|--| ------ | ---- | ----------- |
|(id_text)| British Museum (for example) | [Q102010](https://database.factgrid.de/wiki/Item:Q102010) | 'museum_no' |

* For those with questionable statements, we can save them for batch 2



In [None]:
# get existing columns we need
museum_df_final = den_df[['collection_x', 'collection_qid_x', 'P155_collection_x', 'id_text', 'excavation_no_x','museum_no_x']]

# rename the columns accordingly
museum_df_final = museum_df_final.rename(columns={'collection_x': 'Present holding',
                          'collection_qid_x': 'P329',
                          'P155_collection_x': 'qal155_museum',
                          'id_text': 'id_text_P692',
                          'excavation_no_x': 'qal804',
                          'museum_no_x': 'qal10'})

# reorder the columns accordingly
museum_df_final = museum_df_final[['id_text_P692', 'Present holding',
                                   'P329','qal155_museum','qal804','qal10']]

museum_df_final

Unnamed: 0,id_text_P692,Present holding,P329,qal155_museum,qal804,qal10
0,P000001,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 06435,a",VAT 01533
1,P000002,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 06435,b",VAT 15263
2,P000003,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 09123,d",VAT 15253
3,P000004,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 09169,d",VAT 15168
4,P000005,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 09206,k",VAT 15153
...,...,...,...,...,...,...
366911,X096677,,,,,
366912,X201001,,,,,
366913,X201002,,,,,
366914,X225104,,,,,


In [None]:
museum_df_final.to_csv(folder + 'ORACC_DFS/fields/museum_df_final.csv')

#### 4.5 __provenience_df__ (continued):
|id_text_P692| Provenience | P695 | qal804 | qal425 |
|--| ----------- | ---- | ---- | ---- |
|(id_text)| 'provenience' = Kanesh (for example)| [Q390036](https://database.factgrid.de/wiki/Item:Q390036) | "kt a/k 0353" |  |

  * Note: the qualifiers P425 for 'findspot_square' could make issues for QuickStatements, because there will be a lot of null values. We can add the texts that have this statement first, and then add those that don't in batch 2




In [None]:
# get existing columns we need
provenience_df_final = den_df[['Len', 'findspot_square_x', 'id_text']]

# rename the columns accordingly
provenience_df_final = provenience_df_final.rename(columns={'Len': 'Provenience',
                          'findspot_square_x': 'qal425',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
provenience_df_final = provenience_df_final[['id_text_P692', 'Provenience',
                                   'qal425']]

provenience_df_final

Unnamed: 0,id_text_P692,Provenience,qal425
0,P000001,"Cuneiform Tablet CDLI Lexical 000002, ex. 065","M XVIII,?"
1,P000002,"Cuneiform Tablet CDLI Lexical 000002, ex. 066","M XVIII,?"
2,P000003,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d","Qa XVI,2"
3,P000004,"Cuneiform Tablet CDLI Lexical 000002, ex. 051","Qa XVI,2"
4,P000005,"Cuneiform Tablet CDLI Lexical 000002, ex. 172","Qa XVI,2"
...,...,...,...
366911,X096677,Cuneiform artifact,
366912,X201001,Cuneiform artifact,
366913,X201002,Cuneiform artifact,
366914,X225104,Cuneiform artifact,


In [None]:
provenience_df_final.to_csv(folder + 'ORACC_DFS/fields/provenience_df_final.csv')

#### 4.6 __object_type_df__ (continued): to be added in batch 2

|id_text_P692| Instance of | P2 |
|--| ---- | ---- |
|(id_text)| Tablet (for example) | [Q512006](https://database.factgrid.de/wiki/Item:Q512006) |



In [None]:
# get existing columns we need
object_type_df_final = den_df[['object_type_x','object_FG_qid_x','id_text']]

# rename the columns accordingly
object_type_df_final = object_type_df_final.rename(columns={'object_type_x': 'Instance of',
                          'object_FG_qid_x':'P2',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
object_type_df_final = object_type_df_final[['id_text_P692', 'Instance of','P2']]

object_type_df_final

Unnamed: 0,id_text_P692,Instance of,P2
0,P000001,Tablet,Q512006
1,P000002,Tablet,Q512006
2,P000003,Tablet,Q512006
3,P000004,Tablet,Q512006
4,P000005,Tablet,Q512006
...,...,...,...
366911,X096677,,
366912,X201001,,
366913,X201002,,
366914,X225104,,


In [None]:
object_type_df_final.to_csv(folder + 'ORACC_DFS/fields/object_type_df_final.csv')

#### 4.7 __genre_df__ (continued):

|id_text_P692| Type of work | P121 | P608 |
|--| ---- | ---- | ---- |
|(id_text)| Letter (for example) | [Q10510](https://database.factgrid.de/wiki/Item:Q10510) (for example)| [Q257175](https://database.factgrid.de/wiki/Item:Q257175) (for example)|

* For those with questionable statements, we can save them for batch 2



In [None]:
# get existing columns we need
genre_df_final = den_df[['genre_x', 'genre_qid_x', 'P155_genre_x', 'id_text']]

# rename the columns accordingly
genre_df_final = genre_df_final.rename(columns={'genre_x': 'Type of work',
                          'genre_qid_x': 'P121',
                          'P155_genre_x': 'qal155_genre',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
genre_df_final = genre_df_final[['id_text_P692', 'Type of work',
                                   'P121','qal155_genre']]

genre_df_final

Unnamed: 0,id_text_P692,Type of work,P121,qal155_genre
0,P000001,Lexical,Q537054,
1,P000002,Lexical,Q537054,
2,P000003,Lexical,Q537054,
3,P000004,Lexical,Q537054,
4,P000005,Lexical,Q537054,
...,...,...,...,...
366911,X096677,,,
366912,X201001,,,
366913,X201002,,,
366914,X225104,,,


In [None]:
genre_df_final.to_csv(folder + 'ORACC_DFS/fields/genre_df_final.csv')

#### 4.8 __period_df__ (continued):

|id_text_P692| Period | P853 | P155 |
|--| ---- | ---- | ---- |
|(id_text)| Old Assyrian Period (for example) | [Q512151](https://database.factgrid.de/wiki/Item:Q512151) | [Q22757](https://database.factgrid.de/wiki/Item:Q22757) (if questionable)|

* For those with questionable statements, we can save them for batch 2




In [None]:
# get existing columns we need
period_df_final = den_df[['period_x', 'period_qid_x', 'P155_period_x', 'id_text']]

# rename the columns accordingly
period_df_final = period_df_final.rename(columns={'period_x': 'Period',
                          'period_qid_x': 'P853',
                          'P155_period_x': 'qal155_period',
                          'id_text': 'id_text_P692'})

# reorder the columns accordingly
period_df_final = period_df_final[['id_text_P692', 'Period',
                                   'P853','qal155_period']]

period_df_final

Unnamed: 0,id_text_P692,Period,P853,qal155_period
0,P000001,Uruk III (ca. 3200-3000 BC),Q512134,
1,P000002,Uruk III (ca. 3200-3000 BC),Q512134,
2,P000003,Uruk IV (ca. 3350-3200 BC),Q512132,
3,P000004,Uruk IV (ca. 3350-3200 BC),Q512132,
4,P000005,Uruk IV (ca. 3350-3200 BC),Q512132,
...,...,...,...,...
366911,X096677,,,
366912,X201001,,,
366913,X201002,,,
366914,X225104,,,


In [None]:
period_df_final.to_csv(folder + 'ORACC_DFS/fields/period_df_final.csv')

###5 __Final CSV (with examples):

| qid | Len | Den | [P2](https://database.factgrid.de/wiki/Property:P2) | [P747](https://database.factgrid.de/wiki/Property:P747) | [P131](https://database.factgrid.de/wiki/Property:P131) | [P692](https://database.factgrid.de/wiki/Property:P692) | [P18](https://database.factgrid.de/wiki/Property:P18) | [P401](https://database.factgrid.de/wiki/Property:P401) | [P329](https://database.factgrid.de/wiki/Property:P329) | qal[10](https://database.factgrid.de/wiki/Property:P10) | [P695](https://database.factgrid.de/wiki/Property:P695) | qal[804](https://database.factgrid.de/wiki/Property:P804) | qal[425](https://database.factgrid.de/wiki/Property:P425) | [P121](https://database.factgrid.de/wiki/Property:P121) | [P853](https://database.factgrid.de/wiki/Property:P853) |
| --- | --- | --- | ---- | --- | --- | --- | --- | ---- | ---- | --- | ---- | ---- | ---- | ---- | ---- |
| (blank) | 'Cuneiform Tablet `designation`' | __description__ | [Q512006](https://database.factgrid.de/wiki/Item:Q512006) (object type) | [Q390181](https://database.factgrid.de/wiki/Item:Q390181) (for all) | [Q389597](https://database.factgrid.de/wiki/Item:Q389597) (for all) | (id_text) | (Q-lang) | (Q-material) | (Q-museum) | 'museum_no' (string) |(Q-provenience) | "kt a/k 0353" (string) | (string) |(Q-genre) | (Q-period) |

* Note that wherever a `?` occurs we use the qualifying statement qal[155](https://database.factgrid.de/wiki/Property:P155) with [Q22757](https://database.factgrid.de/wiki/Item:Q22757) 'questionable statement'.


### Finalized outcome (example in FactGrid):
[__ICK 4 - I 437 (Q499899)__](https://database.factgrid.de/wiki/Item:Q499899)

This example shows how many of these statements will look for each text we are adding to FactGrid using QuickStatements. Since the text here is undated, it doesn't illustrate the final section for 'dates'.

In [None]:
ini = pd.read_csv(folder + '/ORACC_DFS/fields/ini.csv', low_memory=False, index_col=0)
language_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/language_df_final.csv', low_memory=False, index_col=0)
material_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/material_df_final.csv', low_memory=False, index_col=0)
museum_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/museum_df_final.csv', low_memory=False, index_col=0)
provenience_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/provenience_df_final.csv', low_memory=False, index_col=0)
object_type_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/object_type_df_final.csv', low_memory=False, index_col=0)
genre_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/genre_df_final.csv', low_memory=False, index_col=0)
period_df_final = pd.read_csv(folder + '/ORACC_DFS/fields/period_df_final.csv', low_memory=False, index_col=0)

In [None]:
factgrid_df = ini
dfs = [language_df_final, material_df_final, museum_df_final, provenience_df_final, object_type_df_final, genre_df_final, period_df_final]
for i in range(len(dfs)):
  factgrid_df = factgrid_df.merge(dfs[i], on = 'id_text_P692')
  #print(factgrid_df.shape[0])

factgrid_df = factgrid_df.rename(columns = {'id_text_P692': 'P692'})
factgrid_df['P747'] = ['Q390181'] * ini.shape[0]

In [None]:
factgrid_df.head(3)

Unnamed: 0,qid,Len,Den,P121_x,P692,Language,P18,qal155_lang,Lang2,Material composition,P401,qal155_mat,Present holding,P329,qal155_museum,qal804,qal10,Provenience,qal425,Instance of,P2,Type of work,P121_y,qal155_genre,Period,P853,qal155_period,P747
0,5.017885e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 065",Lexical tablet found in the warkāʼ provenience...,Q389597,P000001,,,,,clay,Q471153,,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 06435,a",VAT 01533,"Cuneiform Tablet CDLI Lexical 000002, ex. 065","M XVIII,?",Tablet,Q512006,Lexical,Q537054,,Uruk III (ca. 3200-3000 BC),Q512134,,Q390181
1,5.017884e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 066",Lexical tablet found in the warkāʼ provenience...,Q389597,P000002,,,,,clay,Q471153,,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 06435,b",VAT 15263,"Cuneiform Tablet CDLI Lexical 000002, ex. 066","M XVIII,?",Tablet,Q512006,Lexical,Q537054,,Uruk III (ca. 3200-3000 BC),Q512134,,Q390181
2,5.017885e-310,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d",Lexical tablet found in the warkāʼ provenience...,Q389597,P000003,,,,,clay,Q471153,,"Vorderasiatisches Museum, Berlin, Germany",Q510658,,"W 09123,d",VAT 15253,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d","Qa XVI,2",Tablet,Q512006,Lexical,Q537054,,Uruk IV (ca. 3350-3200 BC),Q512132,,Q390181


In [None]:
factgrid_df_adjusted = factgrid_df[['qid', 'Len',	'Den', 'P2', 'P747', 'P121_x', 'P692', 'P18', 'P401', 'P329',	'qal10',	'P695',	'qal804',	'qal425',	'P121_y', 'P853']]
factgrid_df_adjusted

KeyError: "['P695'] not in index"

In [None]:
factgrid_df_adjusted2 = factgrid_df[['qid', 'Len',	'Den', 'P2', 'P747', 'P121_x', 'P692', 'P18', 'P401', 'P329', 'qal10', 'qal425',	'P121_y', 'P853']]
factgrid_df_adjusted2

Unnamed: 0,qid,Len,Den,P2,P747,P121_x,P692,P18,P401,P329,qal10,qal425,P121_y,P853
0,5.017885e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 065",Lexical tablet found in the warkāʼ provenience...,Q512006,Q390181,Q389597,P000001,,Q471153,Q510658,VAT 01533,"M XVIII,?",Q537054,Q512134
1,5.017884e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 066",Lexical tablet found in the warkāʼ provenience...,Q512006,Q390181,Q389597,P000002,,Q471153,Q510658,VAT 15263,"M XVIII,?",Q537054,Q512134
2,5.017885e-310,"Cuneiform Tablet ATU 3, pl. 081, W 9123,d",Lexical tablet found in the warkāʼ provenience...,Q512006,Q390181,Q389597,P000003,,Q471153,Q510658,VAT 15253,"Qa XVI,2",Q537054,Q512132
3,5.017884e-310,"Cuneiform Tablet CDLI Lexical 000002, ex. 051",Lexical tablet found in the warkāʼ provenience...,Q512006,Q390181,Q389597,P000004,,Q471153,Q510658,VAT 15168,"Qa XVI,2",Q537054,Q512132
4,0.000000e+00,"Cuneiform Tablet CDLI Lexical 000002, ex. 172",Lexical tablet found in the warkāʼ provenience...,Q512006,Q390181,Q389597,P000005,,Q471153,Q510658,VAT 15153,"Qa XVI,2",Q537054,Q512132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
366911,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,,Q390181,Q389597,X096677,,,,,,,
366912,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,,Q390181,Q389597,X201001,,,,,,,
366913,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,,Q390181,Q389597,X201002,,,,,,,
366914,0.000000e+00,Cuneiform artifact,An object of an unknown type from an unspecifi...,,Q390181,Q389597,X225104,,,,,,,


In [None]:
# saving the dataframe to designated folder
factgrid_df_adjusted2.to_csv(folder+'ORACC_DFS/factgrid_df.csv')