# Capturing Places and Persons

## This notebook processes text from music personalities' biographies and extract historical meetups information 
 Pre-requirements:
 Entity annotations after executing "02_queryDbpedia.ipynb" Notebook

 For each file with entity annotations
 - Read files from cacheSpotlightResponse/
 - Identify People entities:
   - "http://dbpedia.org/ontology/Person"
   - "http://dbpedia.org/ontology/MusicalArtist"
 - Identify Place entities:
   - 'http://dbpedia.org/ontology/Place'
 - When an entity has an empty type
   - Read corresponding emptyEntity response from DBpedia in cacheSpotlightResponse/ directory
 - Store annotations in extractedEntitiesPersonPlaceOnly/

 Directories information:
 - cacheSpotlightResponse/ : collection of biographies in CSV format. Each biography contains the list of entities identified using DBpedia Spotlight, each linked to its corresponding sentence
 - extractedEntitiesPersonPlaceOnly/ : response from DBpedia Spotlight entity annotation grouped by biography

In [1]:
import json
import os
import pandas as pd
from _datetime import date
import time
from operator import itemgetter

# For DBpedia spotlight, PPE entities
import requests
import pycurl
from urllib.request import urlopen
from urllib.parse import quote

In [2]:
# extract only the ones that do not exist in folder
files_list = [f for f in os.listdir('extractedEntitiesPersonPlaceOnly') if not f.startswith('.')]
# parse to dataframe
df_query = pd.DataFrame(files_list, columns=['file_name'])
print("Extracted PP: ",len(df_query))
# Read file with the 33k biographies
bioListMEETUPS_df = pd.read_csv('TOTAL_download_biographies.csv')
bioListMEETUPS_df.rename(columns={'id': 'file_name','s':'resource'}, inplace=True)
bioListMEETUPS_df['file_name'] = bioListMEETUPS_df['file_name'].astype(str) + '.csv'
print("Total biographies: ",len(bioListMEETUPS_df))
df_result = bioListMEETUPS_df[~bioListMEETUPS_df['file_name'].isin(df_query['file_name'])]
print("Left to process: ",len(df_result))
df_result.to_csv('totalBiosToProcessPP.csv',index=False)
df_result.info()
df_result.head()

Extracted PP:  0
Total biographies:  33309
Left to process:  33309
<class 'pandas.core.frame.DataFrame'>
Int64Index: 33309 entries, 0 to 33308
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   resource   33309 non-null  object
 1   file_name  33309 non-null  object
dtypes: object(2)
memory usage: 780.7+ KB


Unnamed: 0,resource,file_name
0,http://dbpedia.org/resource/Abbe_Lane,2205692.csv
1,http://dbpedia.org/resource/Jimena_Barón,17441602.csv
2,http://dbpedia.org/resource/Guy_Lafitte,10925966.csv
3,http://dbpedia.org/resource/Dominique_Jameux,54550593.csv
4,http://dbpedia.org/resource/Phil_Berg_(talent_...,65723345.csv


In [3]:
biospp_df_1 = df_result.iloc[:11103]
biospp_df_2 = df_result.iloc[11103:22206]
biospp_df_3 = df_result.iloc[22206:]

biospp_df_1.to_csv('totalBiosToProcessPP_1.csv',index=False)
biospp_df_2.to_csv('totalBiosToProcessPP_2.csv',index=False)
biospp_df_3.to_csv('totalBiosToProcessPP_3.csv',index=False)

In [7]:
# USING ONLY FOR SAMPLING - 1002
files_list = [f for f in os.listdir('extractedEntities') if not f.startswith('.')]
# parse to dataframe
df_query = pd.DataFrame(files_list, columns=['file_name'])
df_query.to_csv('totalBiographiesEntities.csv',index=False)

## 1. Process DBpedial Spotlight entity annotation: functions

In [2]:
def executeQueryDbpedia(q, f='application/json'):
    epr = "http://dbpedia.org/sparql"
    try:
        params = {'query': q}
        resp = requests.get(epr, params=params, headers={'Accept': f})
        # return resp.text
        return resp
    except Exception as e:
        # print(e, file=sys.stdout)
        if hasattr(e, 'message'):
            print(e.message)
        else:
            print(e)
        raise
        
# retrieve entities information when they are not in cache
def queryEntityLeft(uri,item):
    # retrieve the next id to store the empty entitites
    df_master = pd.read_csv('cacheSpotlightResponse/emptyTypes_master.csv')
    df_master['id'] = df_master['file_name'].str.replace('emptyTypes_','')
    df_master['id'] = df_master['id'].str.replace('.csv','')
    df_master['id'] = df_master['id'].astype(str).astype(int)
    df_master = df_master.sort_values(by='id', ascending=False)
    last_file_id = df_master['id'].loc[df_master.index[0]]
    last_file_id = int(last_file_id)+1
    # last_file_id +=1
    
    df_results = pd.DataFrame()
    query_text = "SELECT * WHERE { <" + uri + "> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>  ?o }"
    try:
        # Execute query against sparql endpoint, query types
        results = executeQueryDbpedia(query_text).json()
        # print(results)
        # if query returns a response
        if 'results' in results:
            # to obtain the list of types
            res_1 = list(map(itemgetter('o'), results['results']['bindings']))
            res_2 = list(map(itemgetter('value'), res_1))

            df_results['types'] = res_2
            df_results['URI'] = item.URI
            df_results['entity'] = item.entity
            df_results['support'] = item.support
            df_results['offset'] = item.offset
            df_results['similarityScore'] = item.similarityScore
            df_results['percentageOfSecondRank'] = item.percentageOfSecondRank

            df_results['sentenceIndex']=item.sentenceIndex
            df_results['paragraphIndex'] = item.paragraphIndex
            df_results['section'] = item.section

            df_new_master_row = pd.DataFrame({'URI':[item.URI],'entity':[item.entity],
                                                          'file_name':['emptyTypes_{}.csv'.format(str(last_file_id))]})
            df_new_master_row.to_csv('cacheSpotlightResponse/emptyTypes_master.csv',mode='a',
                                 index=False,header=False)
            # print("Saved master file: " + str(last_file_id) + ". Len: " + str(len(df_new_master_row)))

            file_exists = os.path.isfile('cacheSpotlightResponse/emptyTypes_'+str(last_file_id)+'.csv')
            if not file_exists:
                df_results.to_csv('cacheSpotlightResponse/emptyTypes_'+str(last_file_id)+'.csv',index=False)
            else:
                df_results.to_csv('cacheSpotlightResponse/emptyTypes_'+str(last_file_id)+'.csv',mode='a',
                                 index=False,header=False)
    except Exception as ex:
        print("Blank type: ****")
        if hasattr(ex, 'message'):
            print(ex.message)
        else:
            print(ex)
        # print(ex, file=sys.stdout)
    return df_results

In [5]:
# Create error file for later review
df_error = pd.DataFrame(columns=['file_name'])
df_error.to_csv('temp_error.csv',index=False)

df_error.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  0 non-null      object
dtypes: object(1)
memory usage: 0.0+ bytes


## 1. Extract entities: People, places

In [3]:
# If the entity is classified as person return True
# include a validation to identify other types such as 
# dbo:Composer, Tenor that are not persons
# http://dbpedia.org/resource/Contralto
# False
# http://dbpedia.org/resource/Edward_Elgar
# True
# http://dbpedia.org/resource/Composer
# dct:subject dbc:Idioms
# http://dbpedia.org/resource/Bassoon rdf:type dbo:Person false

# call function to check if entity is a place
# Input: URI of candidate entity
# Output: boolean value, False it is not a place, True if it is a place
def checkPlaceEntity(URI_string, entTypes_df):
    # it is always False unless the query returns True
    item = ''
    response = False
    entitiesPlace_df = readCachedFilePlaceEntity()

    localEntitiesPlace_df = entitiesPlace_df.loc[(entitiesPlace_df['URI'] == URI_string)]
    if len(localEntitiesPlace_df) == 1:
        # It is not a person
        return localEntitiesPlace_df['response'].loc[localEntitiesPlace_df.index[0]],localEntitiesPlace_df['type'].loc[localEntitiesPlace_df.index[0]]
    
    entitiesPlace_df = entitiesPlace_df.loc[(entitiesPlace_df['URI'] == URI_string) & (entitiesPlace_df['response'] == False)]
    if len(entitiesPlace_df) > 0:
        return False, localEntitiesPlace_df['type'].loc[localEntitiesPlace_df.index[0]]
    
    subClassesPlace_df = pd.read_csv('cacheSpotlightResponse/otherPlacesEntities.csv')
    subClassesPlace_df = subClassesPlace_df.set_index(['URI'])
    
    temp_df = subClassesPlace_df.loc[subClassesPlace_df.index.isin(entTypes_df['types'].to_list())]
    if temp_df.empty:
        return False,""
    
    if len(temp_df) >0:
        temp_df = temp_df.reset_index()
        response = True
        item = temp_df["URI"].loc[temp_df.index[0]]
        # Save the data
        datarow = pd.Series(data={'URI':URI_string,'response':response,'type':item})
        # entitiesPlace_df = entitiesPlace_df.append(datarow, ignore_index=True)
        entitiesPlace_df = pd.concat([entitiesPlace_df, datarow], ignore_index=True)
        entitiesPlace_df.to_csv('cacheSpotlightResponse/validatePlacesEntities.csv',index=False)

    return response, item

# cache the answers
def readCachedFilePlaceEntity():
    # check if the file exists
    if os.path.isfile('cacheSpotlightResponse/validatePlacesEntities.csv'):
        return pd.read_csv('cacheSpotlightResponse/validatePlacesEntities.csv')
    else:
        otherEntitiePlaces_df = pd.DataFrame(columns=['URI','response',"type"])
         # create the file to store the list of biographies with empty entities 
        otherEntitiePlaces_df.to_csv('cacheSpotlightResponse/validatePlacesEntities.csv',index=False)
    return otherEntitiePlaces_df

# call function, search all the values in DBpedia or locally, 
# returns True if the URI is of a person, False otherwise
# it is always False unless the query returns True
def checkPersonEntity(URI_string, entTypes_df):
    res = False
    
    #two steps
    # check if the person has a DOB. If it does have a DOB then it confirms it is a person and returns TRUE, otherwise
    # res = checkDOBEntity(URI_string)
    # if res:
    #     return True
    # check if the person is of one of the types in the following list.
    # if the URI is one of the triples, then it is not a Person.
    # else:
    res = checkPersonRolesEntity(URI_string, entTypes_df)
    if res:
        return False
    else:
        return True
    return res

def checkDOBEntity(URI_string):
    entitiesDOB_df = readCachedFileDOBEntity()
    # perform a validation with date of birth first
    localEntitiesDOB_df = entitiesDOB_df.loc[(entitiesDOB_df['URI'] == URI_string)]
    if len(localEntitiesDOB_df) > 0:
        # It is not a person
        return localEntitiesDOB_df['response'].loc[localEntitiesDOB_df.index[0]]

    query_text = "ASK  { <"+URI_string+"> dbo:birthDate ?birthDate }"
    results = executeQueryDbpedia(query_text).json()
        # if query returns a response
    if 'boolean' in results:
        response = results['boolean']
        # Save the data
        if response:
            datarow = pd.Series(data={'URI':URI_string,'response':True,'type':"dbo:birthDate ?birthDate"})
            # entitiesDOB_df = entitiesDOB_df.append(datarow, ignore_index=True)
            entitiesDOB_df = pd.concat([entitiesDOB_df, datarow], ignore_index=True)
            entitiesDOB_df.to_csv('cacheSpotlightResponse/otherPersonEntitiesDOB.csv',index=False)
            return True
        else:
            datarow = pd.Series(data={'URI':URI_string,'response':False,'type':"dbo:birthDate ?birthDate"})
            # entitiesDOB_df = entitiesDOB_df.append(datarow, ignore_index=True)
            entitiesDOB_df = pd.concat([entitiesDOB_df, datarow], ignore_index=True)
            entitiesDOB_df.to_csv('cacheSpotlightResponse/otherPersonEntitiesDOB.csv',index=False)
            return False
    
# call function, search all the values in DBpedia or locally, returs True if the URI is of a person function/role, False otherwise
# Input: entity URI
# Output is always True(a person fuction/role) unless the query returns False
def checkPersonRolesEntity(URI_string,entTypes_df):
    response = False
    # Verify first if we already queried DBpedia spotlight for entity validation
    # read file with list of entities
    entities_df = readCachedFileTypeEntity()
    localEntities_df = entities_df.loc[(entities_df['URI'] == URI_string) & (entities_df['response'] == True)]
    if len(localEntities_df) > 0:
        # It is not a person
        return True
    
    object_type_list = ["http://dbpedia.org/ontology/PersonFunction"]
    temp_df = entTypes_df.set_index(['types'])
    temp_df = temp_df.loc[temp_df.index.isin(object_type_list)]
    if len(temp_df) > 0:
        temp_df = temp_df.reset_index()
        response = True
        item = temp_df["types"].loc[temp_df.index[0]]
        # Save the data
        datarow = pd.Series(data={'URI':URI_string,'response':response,'type':item})
        entities_df = entities_df.append(datarow, ignore_index=True)
        entities_df.to_csv('cacheSpotlightResponse/otherPersonEntities.csv',index=False)
        return response
    
    object_dct_list = ["dct:subject dbc:Musical_terminology","dct:subject dbc:Idioms",
                  "dct:subject dbc:Orchestral_instruments", "dct:subject dbc:Concert_band_instruments", "dct:subject dbc:Occupations_in_music"]
    
    localEntities_int = len(entities_df.loc[(entities_df['URI'] == URI_string) & (entities_df['response'] == False)])
    if localEntities_int == len(object_dct_list):
        return False

    for item in object_dct_list:
        localEntities_df = entities_df.loc[(entities_df['URI'] == URI_string) & (entities_df['type'] == item)]
        if len(localEntities_df) == 0:
            # build query
            query_text = "PREFIX dct: <http://purl.org/dc/terms/> \
            ASK  { <"+URI_string+"> "+item+" }"
            print(query_text)
            # E.g.: ASK  { <http://dbpedia.org/resource/Conducting> rdf:type dbo:PersonFunction } 
            # true for composer, solicitor, conducting
            # query DBpedia
            results = executeQueryDbpedia(query_text).json()
            # if query returns a response
            if 'boolean' in results:
                # to obtain the list of types
                response = results['boolean']
                # Save the data
                datarow = pd.Series(data={'URI':URI_string,'response':response,'type':item})
                # entities_df = entities_df.append(datarow, ignore_index=True)
                entities_df = pd.concat([entities_df, datarow], ignore_index=True)
                entities_df.to_csv('cacheSpotlightResponse/otherPersonEntities.csv',index=False)
                if response:
                    break 
    return response

# cache the answers
def readCachedFileTypeEntity():
    # check if the file exists
    if os.path.isfile('cacheSpotlightResponse/otherPersonEntities.csv'):
        return pd.read_csv('cacheSpotlightResponse/otherPersonEntities.csv')
    else:
        otherEntities_df = pd.DataFrame(columns=['URI','response',"type"])
         # create the file to store the list of biographies with empty entities 
        otherEntities_df.to_csv('cacheSpotlightResponse/otherPersonEntities.csv',index=False)
    return otherEntities_df

# cache the answers
def readCachedFileDOBEntity():
    # check if the file exists
    if os.path.isfile('cacheSpotlightResponse/otherPersonEntitiesDOB.csv'):
        return pd.read_csv('cacheSpotlightResponse/otherPersonEntitiesDOB.csv')
    else:
        otherEntitieDOBs_df = pd.DataFrame(columns=['URI','response',"type"])
         # create the file to store the list of biographies with empty entities 
        otherEntitiesDOBs_df.to_csv('cacheSpotlightResponse/otherPersonEntitiesDOB.csv',index=False)
    return otherEntitiesDOBs_df

# # URI_string = "http://dbpedia.org/resource/Alan_Turing"
# # URI_string = "http://dbpedia.org/resource/Composer"
# # URI_string = "http://dbpedia.org/resource/Contralto"
# # URI_string = "http://dbpedia.org/resource/Man"
# URI_string = "http://dbpedia.org/resource/Edward_Elgar"
# role = checkPersonEntity(URI_string)
# print("Finale: ")
# print(role)

# # response, types = checkPlaceEntity(item_queriedType.URI, df_empty_type)
# response = checkPersonEntity(item_queriedType.URI, df_empty_type)
# print(response)
# # print(types)

In [4]:
# !python 021_Identify_PP.py > out.txt
# 1. Extract People and Places entities
entitiyTypes = ['DBpedia:Person','DBpedia:MusicalArtist','DBpedia:Place','DBpedia:SocietalEvent']
# Entity place types
entityPlaceTypesExt = ['http://www.wikidata.org/entity/Q41176','http://www.wikidata.org/entity/Q486972',
                      'http://dbpedia.org/ontology/Place','http://dbpedia.org/ontology/Location','http://www.wikidata.org/entity/Q6256']
entityPeopleTypesExt = ['http://dbpedia.org/ontology/Person','http://dbpedia.org/ontology/MusicalArtist',
                       'http://www.wikidata.org/entity/Q215627']
object_dct_list = ["dct:subject dbc:Musical_terminology","dct:subject dbc:Idioms","dct:subject dbc:Orchestral_instruments", 
                   "dct:subject dbc:Concert_band_instruments", "dct:subject dbc:Occupations_in_music","http://dbpedia.org/ontology/PersonFunction"]
# http://dbpedia.org/ontology/Person
# df_entityType = pd.DataFrame({'id':[1,2],'types':['http://dbpedia.org/ontology/Person',
#                                                       'http://dbpedia.org/ontology/MusicalArtist']})
# read master with index of entities
df_master_cache = pd.read_csv('cacheSpotlightResponse/emptyTypes_master.csv')

# use chunk to load a small number of files in memory
for chunk in pd.read_csv('totalBiosToProcessPP_1.csv', chunksize=50):
# for chunk in pd.read_csv('totalTest.csv', chunksize=50):
# for chunk in pd.read_csv('totalBiographiesBenchmark.csv', chunksize=30):
# for chunk in pd.read_csv('list_wikiIdSample.csv', chunksize=30):
    df_files = pd.DataFrame()
    df_files['file_name'] = chunk['file_name']
    
    # iterate over files
    for file_name_item in df_files.itertuples():
        if os.path.isfile('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name):
            print(file_name_item.file_name, " file exists.")
            continue
        try:
            # start = time.time()
            print(file_name_item.file_name)
            # check if the file exists, e.g., 10085.csv
            # file should exist, it was create during queryDBpedia process
            file_exists = os.path.isfile('cacheSpotlightResponse/'+file_name_item.file_name)

            if file_exists:
                # read the cached results from the query
                entitiesByBiography_df = pd.read_csv('cacheSpotlightResponse/'+file_name_item.file_name)
                # FILTER
                # Validate that the "SIMILARITYSCORE" given by DBpedia Spotlight is greater that 0.6
                # entitiesByBiography_df = entitiesByBiography_df.loc[entitiesByBiography_df['similarityScore'] > 0.6].copy()
                # FILTER
                # not taking into account paragraphs with external links and references
                entitiesByBiography_df = entitiesByBiography_df[~entitiesByBiography_df['section'].isin(['== References ==',
                                                                                                         '== Filmography ==','== External links =='])]
                
                # surfaceForm: entity as found in the text
                # entitiesByBiography_df.rename(columns = {'surfaceForm':'entity'},inplace = True)
                # column types contain DBpedia classes, e.g., dbo:Person, Schema:Place, etc.
                # select all rows where types <> NA, not empty = ne
                df_ne = entitiesByBiography_df.loc[~entitiesByBiography_df['types'].isna()] 

                df_result = pd.DataFrame()
                if len(df_ne) >0:
                    for entity in entitiyTypes:
                        df_temp = df_ne[df_ne['types'].str.contains(entity)].copy()

                        if not df_temp.empty:
                            if entity == 'DBpedia:Person' or entity == 'DBpedia:MusicalArtist':
                                df_temp['entType'] = 'person'
                            elif entity == 'DBpedia:Place':
                                df_temp['entType'] = 'place'
                            elif entity == 'DBpedia:SocietalEvent' or entity == 'DBpedia:Event':
                                df_temp['entType'] = 'event'

                            # df_result = df_result.append(df_temp)
                            df_result = pd.concat([df_result, df_temp], ignore_index=True)
                            
                # print("First len df_result: ",len(df_result))

                # now filter all the entities that are empty, this df contains all the info of the matching
                # e.g., surfaceForm, similarityScore, etc
                df_e = entitiesByBiography_df.loc[entitiesByBiography_df['types'].isna()] 
                
                # only if the dataframe has rows with empty value in column "entity"
                # Notebook 02 was in charge of identify empty entities, query and download to cache the real type
                if not df_e.empty:
                    # list of entities and the name of the files that store the results from the query
                    # when merging, only the entities that are stored in cache will have a file name value in column "file_name"
                    
                    df_merge = df_master_cache.merge(df_e, on=['URI','entity'],how='right')
                    
                    # df_merge.to_csv('extractedEntitiesPersonPlaceOnly/df_merge.csv',index=False)

                    # for all documents not in cache, meaning that value in column "file_name" is empty
                    df_cache = df_merge.loc[df_merge['file_name'].isna()]
                    if len(df_cache)>0:
                        # iterate over the df with all the entities that not in cache
                        for item_empty in df_cache.itertuples():
                            # query DBpedia, store results in local folder and update the master file
                            df_temp = queryEntityLeft(item_empty.URI,item_empty)
                        
                        # read the master cache again with the latest updates after checking for URIs in previous line
                        df_master_cache = pd.read_csv('cacheSpotlightResponse/emptyTypes_master.csv')
                        # merge again all the entities that had column "types" value as empty to find the name of the file
                        # where the response from DBpedia is stored
                        
                        df_merge = df_master_cache.merge(df_e, on=['URI','entity'],how='right')
                        df_merge.drop_duplicates(subset=['URI','entity'],keep='first',inplace=True)
                    # df_merge.to_csv('extractedEntitiesPersonPlaceOnly/df_merge.csv',index=False)

                    # df_cache will contain all the entities and the file_name value from where to read its URIs
                    # filter all the rows that have column "file_name" as having a value
                    df_cache = df_merge.loc[~df_merge['file_name'].isna()] 
                    # print("df_cache")
                    # print(df_cache)
                    for item_queriedType in df_cache.itertuples():
                        appendRow = False
                        typeEntity = ''

                        # read cache file
                        file_exists = os.path.isfile('cacheSpotlightResponse/'+item_queriedType.file_name)
                        if file_exists:
                            # once the file is located, filter the rows that match entity and URI
                            df_empty_type = pd.read_csv('cacheSpotlightResponse/'+item_queriedType.file_name)
                            # print("df_empty_type ",df_empty_type)
                            df_empty_type.drop_duplicates(subset=['entity','URI','types'],keep='first',inplace=True)
                            # print("df_empty_type ",df_empty_type)
                            try:
                                df_empty_type = df_empty_type.query("""entity == "{}" and URI=="{}" """.format(item_queriedType.entity,item_queriedType.URI))
                            except SyntaxError as ex:
                                # df_empty_type = df_empty_type.query("""entity == '{}' and URI=='{}'""".format(item_queriedType.entity,item_queriedType.URI))
                                df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
                            except TokenError as te:
                                df_empty_type = df_empty_type.query("entity == '{}' and URI=='{}'".format(item_queriedType.entity,item_queriedType.URI))
                                    
                            # print("df_empty_type AFTER filtering",df_empty_type)
                                # # filter the rows that match entity and URI
                            # df_empty_place = df_empty_type[df_empty_type['types'].isin(entityPlaceTypesExt)]
                            if not df_empty_type.empty:
                                # filter the rows that match entity and URI
                                df_empty_place = df_empty_type[df_empty_type['types'].isin(entityPlaceTypesExt)].copy()

                                # Classify entities according to type
                                # First, identify entity types Place, filter all rows with column "types" value equal to Place URI
                                # df_temp = df_empty_type[df_empty_type['types'].str.contains('http://dbpedia.org/ontology/Place')].copy()
                                # df_temp = df_empty_type.query('types == "http://dbpedia.org/ontology/Place"')
                                # if not df_temp.empty:
                                # response, types = checkPlaceEntity(item_queriedType.URI, df_empty_type)
                                # if response:
                                    # Before adding this entity to the list, verify if it is a place, by asking about alternative triples
                                    # E.g. having a capital
                                    # response = checkPlaceEntity(item_queriedType.URI)
                                    # types = df_temp['types'].loc[df_temp.index[0]]
                                    # types = "DBpedia:Place,DBpedia:Country"
                                    # types = 
                                if not df_empty_place.empty:
                                    # print(df_empty_place)
                                    types = "DBpedia:Place,DBpedia:Location,wikidata:Q486972,wikidata:Q41176"
                                    entType = 'place'
                                    #appendRow = True
                                    #if appendRow:
                                    df_new_row = pd.DataFrame([[item_queriedType.URI, item_queriedType.support, types, item_queriedType.entity, item_queriedType.offset, 
                                                                item_queriedType.similarityScore,item_queriedType.percentageOfSecondRank, item_queriedType.sentence, 
                                                                item_queriedType.sentenceIndex, item_queriedType.paragraphIndex, 
                                                               item_queriedType.section]],columns=list(entitiesByBiography_df))
                                    df_new_row['entType'] = entType
                                    # print("df_new_row: ", df_new_row)
                                    # df_result = df_result.append(df_new_row,ignore_index=True)
                                    df_result = pd.concat([df_result, df_new_row], ignore_index=True)
                                    # print("Len df_result after adding places: ",len(df_result))
                                    appendRow = False
                                    continue

                                # Second, identify entity types Person, filter all rows with column "types" value equal to Person URI
                                df_empty_people = df_empty_type[df_empty_type['types'].isin(entityPeopleTypesExt)].copy()
                                
                                # df_temp = df_empty_type.query('types == "http://dbpedia.org/ontology/Person" or types=="http://dbpedia.org/ontology/MusicalArtist"')
                                # df_temp = df_temp.sort_values(by='types', ascending=False)

                                if not df_empty_people.empty:
                                    # print(df_empty_people)
                                    # specific types that do not refer to people
                                    if len(df_empty_type[df_empty_type['types'].isin(object_dct_list)])>0:
                                        
                                        continue
                                    # Before adding this entity to the list, review that it is a person and it is not a PersonFunction or MusicalTerminology
                                    # call function, search all the values in DBpedia, returs True if the URI is a person, False otherwise
                                    # response = checkPersonEntity(item_queriedType.URI,df_empty_type)

                                    # if response:
                                    # types = df_temp['types'].loc[df_temp.index[0]]
                                    types = "DBpedia:Person,DBpedia:MusicalArtist,wikidata:Q215627"
                                    entType = 'person'
                                    #     appendRow = True
                                    # else:
                                    #     appendRow = False
                                    # if appendRow:
                                    df_new_row = pd.DataFrame([[item_queriedType.URI, item_queriedType.support, types, item_queriedType.entity, item_queriedType.offset, 
                                                                item_queriedType.similarityScore,item_queriedType.percentageOfSecondRank, item_queriedType.sentence, 
                                                                item_queriedType.sentenceIndex, item_queriedType.paragraphIndex, 
                                                               item_queriedType.section]],columns=list(entitiesByBiography_df))
                                    df_new_row['entType'] = entType
                                    # print("df_new_row: ", df_new_row)
                                    # df_result = df_result.append(df_new_row,ignore_index=True)
                                    df_result = pd.concat([df_result, df_new_row], ignore_index=True)
                                    # print("Len df_result after adding people: ",len(df_result))

                                    appendRow = False

                    # print('No empty types')
            # IF file does not exists, then the 
            else:
                print("[INFO] - File does not exist, run 020_queryDbpedia book. File: " + file_name_item.file_name)

            df_result['wikiPageID'] = file_name_item.file_name.replace('.csv','')
            # delete duplicate rows
            df_result.drop_duplicates(subset=['entity','URI','offset','sentenceIndex','paragraphIndex'],keep='first',inplace=True)
            df_result.to_csv('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name,index=False)
            # end = time.time()
            # print("The time of execution of above program is :", end-start)
        except (pd.errors.ParserError, KeyError) as pe:
            print('Error: ' + file_name_item.file_name)
            if hasattr(pe, 'message'):
                print(pe.message)
            else:
                print(pe)
            df_error = pd.DataFrame([[file_name_item.file_name]],columns=['file_name'])
            df_error.to_csv('temp_error.csv',mode='a',index=False,header=False)
    time.sleep(90)

2205692.csv  file exists.
17441602.csv  file exists.
10925966.csv  file exists.
54550593.csv  file exists.
65723345.csv  file exists.
52401446.csv  file exists.
34854217.csv  file exists.
3634591.csv  file exists.
54393783.csv  file exists.
3256685.csv  file exists.
705165.csv  file exists.
6393313.csv  file exists.
18047601.csv  file exists.
32442400.csv  file exists.
1355287.csv  file exists.
39637341.csv  file exists.
31509353.csv  file exists.
25522049.csv  file exists.
3942744.csv  file exists.
43499348.csv  file exists.
15707096.csv  file exists.
2655534.csv  file exists.
23757092.csv  file exists.
6242990.csv  file exists.
7340504.csv  file exists.
762840.csv  file exists.
196224.csv  file exists.
26944201.csv  file exists.
4432855.csv  file exists.
207591.csv  file exists.
2300926.csv  file exists.
34006449.csv  file exists.
5764028.csv  file exists.
64462922.csv  file exists.
25646577.csv  file exists.
34684163.csv  file exists.
16837034.csv  file exists.
978583.csv  file exis

  df_master['id'] = df_master['id'].str.replace('.csv','')


2176016.csv
22492855.csv
10508305.csv
56007683.csv
39757273.csv
51440798.csv
1823477.csv
455138.csv
19588768.csv
8855.csv
6310838.csv
7265624.csv
703488.csv
18778044.csv
1530914.csv
5138490.csv
6997149.csv
870043.csv
14947236.csv
13241032.csv
443772.csv
39396828.csv
24972730.csv
33948077.csv
17168689.csv
29285596.csv
846297.csv
34363493.csv
17885427.csv
32963865.csv
36532598.csv
46995861.csv
920660.csv
9581533.csv
2549987.csv
15374313.csv
13631044.csv
36418371.csv
50322721.csv
40365002.csv
4982329.csv
7651004.csv
15228478.csv
67501861.csv
34683790.csv
19539389.csv
46416893.csv
12721618.csv
32328278.csv
706282.csv
11381309.csv
5001211.csv
1254005.csv
38473068.csv
13974129.csv
3657182.csv
34315511.csv
23919644.csv
14080671.csv
61539916.csv
12125143.csv
9711198.csv
22121840.csv
1394553.csv
54108487.csv
54966252.csv
42386286.csv
1924456.csv
17776820.csv
1092824.csv
2592629.csv
2934315.csv
5143989.csv
37029067.csv
34546475.csv
27539399.csv
68265086.csv
33429767.csv
47128108.csv
9888835.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


4437640.csv
13984595.csv
14293844.csv
708292.csv
17055253.csv
2938274.csv
61613808.csv
16005995.csv
26566588.csv
14053372.csv
47943699.csv
16765590.csv
12869278.csv
53127398.csv
52601239.csv
556342.csv
48777788.csv
45365187.csv
8423286.csv
4324315.csv
68407464.csv
8278967.csv
402061.csv
31843000.csv
22492854.csv
12604499.csv
46313541.csv
45482553.csv
1174350.csv
3622246.csv
53869851.csv
2425874.csv
15876499.csv
338818.csv
2187788.csv
37862340.csv
43282.csv
43858028.csv
6803715.csv
42190461.csv
6754268.csv
2129507.csv
15263646.csv
13770757.csv
2637210.csv
10700938.csv
14449877.csv
22492840.csv
6241486.csv
509652.csv
26368.csv
2192391.csv
35397160.csv
65013168.csv
32713030.csv
7021949.csv
19160022.csv
21786305.csv
39657272.csv
15277155.csv
20112364.csv
35732059.csv
4013515.csv
1860229.csv
24439193.csv
23987535.csv
42475183.csv
38512332.csv
14432590.csv
1434568.csv
309384.csv
29255211.csv
40482414.csv
4303033.csv
11387962.csv
45506860.csv
1994357.csv
3263012.csv
42545732.csv
23936103.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


19503216.csv
35123131.csv
22397949.csv
2403441.csv
22688334.csv
62815854.csv
6471520.csv
4503903.csv
2415545.csv
33365520.csv
395378.csv
8450042.csv
733748.csv
43530537.csv
4878938.csv
1812481.csv
39550943.csv
42901738.csv
155696.csv
43228679.csv
32717726.csv
1143926.csv
11195851.csv
17239655.csv
24590665.csv
3486038.csv
177379.csv
54281742.csv
33612205.csv
554757.csv
3141790.csv
26123747.csv
27175628.csv
35201466.csv
846902.csv
21979290.csv
20269.csv
1524592.csv
12770668.csv
40118252.csv
1072537.csv
26490008.csv
17503792.csv
624208.csv
67310908.csv
13068141.csv
36074263.csv
53313293.csv
55715943.csv
28941562.csv
627701.csv
2303140.csv
3031263.csv
2981949.csv
3201318.csv
18181485.csv
66698452.csv
21211098.csv
31144461.csv
9194848.csv
5916338.csv
3850791.csv
1861335.csv
38155067.csv
15647063.csv
39582187.csv
6017308.csv
191830.csv
42534322.csv
6718512.csv
57397316.csv
164477.csv
4644372.csv
38187647.csv
13210724.csv
1213603.csv
4914160.csv
2217135.csv
25732155.csv
2173379.csv
4371894.cs

  df_master['id'] = df_master['id'].str.replace('.csv','')


60426049.csv
12417688.csv
29309988.csv
2227747.csv
58429783.csv
553945.csv
41180849.csv
7670236.csv
27150325.csv
42488218.csv
1480595.csv
97206.csv
193028.csv
58937134.csv
50175429.csv
22710384.csv
13525507.csv
29869676.csv
4434188.csv
3032971.csv
14959586.csv
6570819.csv
37355841.csv
22591150.csv
27197108.csv
26279354.csv
6792824.csv
56048426.csv
3940233.csv
2978485.csv
21883499.csv
21133242.csv
17846690.csv
29589479.csv
32162653.csv
564125.csv
5195698.csv
53536.csv
3717982.csv
676599.csv
38242560.csv
60788518.csv
67021562.csv
38023126.csv
24534662.csv
2391393.csv
91105.csv
18366271.csv
991041.csv
10568625.csv
27912768.csv
1419808.csv
48760021.csv
15941792.csv
66065009.csv
2598852.csv
37009638.csv
655340.csv
39868672.csv
30416548.csv
8397626.csv
20183737.csv
13965435.csv
9716580.csv
48363640.csv
9027274.csv
365841.csv
40138974.csv
22080335.csv
30204288.csv
40436204.csv
10710875.csv
10686803.csv
704084.csv
36597253.csv
864355.csv
33653477.csv
48411099.csv
3000848.csv
19672537.csv
25169

  df_master['id'] = df_master['id'].str.replace('.csv','')


10321495.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


1078340.csv
32270162.csv
41531322.csv
38044230.csv
30681273.csv
10956965.csv
162348.csv
17940178.csv
29811332.csv
43286960.csv
12389406.csv
29728182.csv
17608531.csv
31799073.csv
2805913.csv
17487910.csv
29146400.csv
3718928.csv
262482.csv
61418103.csv
212165.csv
50816441.csv
35916486.csv
287368.csv
42902631.csv
1707917.csv
1181083.csv
39383398.csv
147692.csv
21487845.csv
57764455.csv
21711647.csv
54403898.csv
38536337.csv
9342646.csv
31535335.csv
49652751.csv
41118235.csv
26821306.csv
3212980.csv
21418803.csv
261835.csv
48583931.csv
15537128.csv
42344934.csv
5429633.csv
66206857.csv
18624110.csv
47157684.csv
50861187.csv
64531373.csv
49361636.csv
21395940.csv
2330834.csv
36905182.csv
5851473.csv
18524816.csv
35847193.csv
47777069.csv
721002.csv
1297671.csv
1158906.csv
15062484.csv
59083263.csv
40703997.csv
5859991.csv
29806672.csv
5021926.csv
20014671.csv
45422859.csv
65763968.csv
4509806.csv
38268992.csv
355574.csv
32817065.csv
9523961.csv
17903069.csv
1355256.csv
1829759.csv
5810114

  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()


2458253.csv
8343467.csv
66745019.csv
30814895.csv
1630407.csv
1035724.csv
26039647.csv
45571959.csv
64427049.csv
615199.csv
16678843.csv
13828525.csv
27413602.csv
28322773.csv
12077204.csv
51895663.csv
865729.csv
1236295.csv
16548086.csv
47682976.csv
53046428.csv
34350.csv
15770783.csv
29439553.csv
7995710.csv
17216922.csv
2604580.csv
1220391.csv
13245133.csv
29832925.csv
3792807.csv
15902724.csv
14245194.csv
5094409.csv
1470925.csv
1417766.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


25153864.csv
1758787.csv
4928931.csv
49813670.csv
2180381.csv
29213863.csv
4946030.csv
4744570.csv
22614051.csv
29999344.csv
869885.csv
163268.csv
708692.csv
47849978.csv
16891352.csv
3116974.csv
838374.csv
414565.csv
987948.csv
398710.csv
26826125.csv
63914085.csv
30151553.csv
1288235.csv
40704544.csv
16917651.csv
13086842.csv
24126644.csv
3414358.csv
564096.csv
28628095.csv
18396025.csv
15829589.csv
9669378.csv
8543614.csv
5427408.csv
35289992.csv
40867388.csv
50237077.csv
43165869.csv
32685320.csv
4047222.csv
24257777.csv
31392851.csv
2437864.csv
577798.csv
6795034.csv
415109.csv
13453250.csv
35500447.csv
36251671.csv
19773163.csv
1427699.csv
41956268.csv
44115497.csv
7770391.csv
18078636.csv
2026220.csv
40757108.csv
40352287.csv
89835.csv
13630767.csv
16276284.csv
63109053.csv
43397272.csv
61437241.csv
649751.csv
25331888.csv
742176.csv
467400.csv
18586680.csv
11914512.csv
11849173.csv
1888578.csv
1147577.csv
2081666.csv
5125163.csv
2253954.csv
31012810.csv
800006.csv
57473556.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


1043705.csv
28027076.csv
19361.csv
55691306.csv
33631355.csv
34635194.csv
850631.csv
62436369.csv
4128008.csv
27702172.csv
3530417.csv
161292.csv
1211789.csv
1181322.csv
171081.csv
22258899.csv
564109.csv
15624271.csv
23487351.csv
25950468.csv
23727639.csv
37564627.csv
2057234.csv
63693336.csv
189048.csv
13363731.csv
30928765.csv
20166608.csv
34615250.csv
2273448.csv
23921679.csv
84242.csv
18128820.csv
2667812.csv
17256269.csv
6254500.csv
55656881.csv
22079145.csv
330065.csv
40879983.csv
16088860.csv
46653631.csv
7770232.csv
3586941.csv
27856884.csv
10806571.csv
35560576.csv
67832370.csv
100122.csv
1465509.csv
5129765.csv
27479544.csv
25080209.csv
2605701.csv
33175002.csv
382199.csv
13764281.csv
44044009.csv
53012858.csv
15410150.csv
61363463.csv
15416521.csv
1106517.csv
12049261.csv
27943537.csv
59870620.csv
34220020.csv
49218040.csv
6036315.csv
55549925.csv
3055736.csv
64587406.csv
10475116.csv
2951007.csv
18904544.csv
156416.csv
7202335.csv
10062845.csv
588823.csv
891723.csv
5958084

  df_master['id'] = df_master['id'].str.replace('.csv','')


1120249.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


531849.csv
11165556.csv
22617362.csv
583037.csv
43298182.csv
3288510.csv
47859389.csv
41213910.csv
25713809.csv
406983.csv
47572585.csv
1955916.csv
48791037.csv
185869.csv
44137018.csv
24676360.csv
18028149.csv
4466635.csv
32475123.csv
20362367.csv
34685680.csv
23807669.csv
12384240.csv
53914302.csv
28276242.csv
31684340.csv
32330156.csv
58846722.csv
22150266.csv
30802629.csv
48573750.csv
88493.csv
42434332.csv
168249.csv
637264.csv
16258186.csv
1922208.csv
22883668.csv
18175500.csv
960719.csv
47244250.csv
24300983.csv
18102788.csv
18210452.csv
44900948.csv
215554.csv
29187102.csv
991086.csv
56912185.csv
29143626.csv
2440232.csv
5365514.csv
20140624.csv
33441187.csv
36345016.csv
1341155.csv
47634540.csv
26795866.csv
44144960.csv
48453494.csv
3786090.csv
19931601.csv
1120275.csv
38924796.csv
965107.csv
24287146.csv
1276935.csv
43614401.csv
19066127.csv
65422344.csv
29193605.csv
13128213.csv
23268454.csv
5845764.csv
29107716.csv
6649759.csv
13467081.csv
15718205.csv
174112.csv
3138257.cs

  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()


17685013.csv
48711421.csv
6211289.csv
55871364.csv
2564804.csv
43100411.csv
1679365.csv
4464220.csv
64724856.csv
21087278.csv
10848391.csv
36288528.csv
6744057.csv
1866112.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


18761504.csv
63621023.csv
2278904.csv
26150593.csv
60319569.csv
54907865.csv
47129574.csv
14594384.csv
16706033.csv
2758189.csv
4204957.csv
29286930.csv
53266422.csv
1558475.csv
2180961.csv
2250736.csv
24611847.csv
37418049.csv
21530989.csv
61392115.csv
62862616.csv
7966288.csv
30479404.csv
29112643.csv
655813.csv
7040357.csv
26933104.csv
9241549.csv
8402516.csv
14976944.csv
36395223.csv
1549236.csv
18851974.csv
439959.csv
41046201.csv
46258034.csv
1272951.csv
8027561.csv
4197163.csv
60771378.csv
2638641.csv
33504815.csv
8816868.csv
17253846.csv
4930.csv
28059999.csv
8305778.csv
541148.csv
62241147.csv
29706445.csv
8057086.csv
51032856.csv
13196933.csv
51359929.csv
22492844.csv
268795.csv
14606110.csv
39530255.csv
3459890.csv
16664852.csv
35262489.csv
296339.csv
921221.csv
40018040.csv
17269264.csv
20508865.csv
659973.csv
35638216.csv
11327291.csv
14473537.csv
25724525.csv
333791.csv
16110436.csv
5248684.csv
17118343.csv
22492850.csv
23033826.csv
53562351.csv
1156693.csv
7584874.csv
50

  df_master['id'] = df_master['id'].str.replace('.csv','')


246327.csv
19636760.csv
50790360.csv
4767790.csv
37925026.csv
4849048.csv
44674179.csv
1805488.csv
3574081.csv
39002993.csv
47111954.csv
55875671.csv
1197794.csv
30237257.csv
42374772.csv
40857100.csv
19559479.csv
24309540.csv
3720468.csv
171241.csv
49081633.csv
1498136.csv
39752441.csv
36553176.csv
25805681.csv
61920612.csv
58585849.csv
4997052.csv
51158318.csv
28057037.csv
539970.csv
23428766.csv
2202065.csv
12200425.csv
1773891.csv
5900007.csv
33613334.csv
7803405.csv
55215757.csv
63621036.csv
30189273.csv
4214589.csv
2314175.csv
64837.csv
47862685.csv
34148133.csv
64380457.csv
18105233.csv
4043496.csv
17391509.csv
16217445.csv
37543919.csv
14219211.csv
21488342.csv
10853029.csv
23209518.csv
21278598.csv
31313023.csv
53506601.csv
38341508.csv
17947951.csv
10481294.csv
14563094.csv
25860780.csv
5105098.csv
23348550.csv
15130274.csv
5594204.csv
9142760.csv
322138.csv
1855240.csv
3802561.csv
35987201.csv
2281387.csv
8962695.csv
5643282.csv
41574086.csv
67424398.csv
31728524.csv
2788131

  df_master['id'] = df_master['id'].str.replace('.csv','')


5093549.csv
7051065.csv
51025683.csv
15743566.csv
4305862.csv
1799904.csv
26226682.csv
2176576.csv
14477234.csv
5221357.csv
5605016.csv
1150787.csv
37292300.csv
49610629.csv
47620246.csv
27410242.csv
42688272.csv
1036370.csv
56722462.csv
43738854.csv
30181363.csv
42560762.csv
41497876.csv
265918.csv
44494579.csv
7232234.csv
52856557.csv
18210447.csv
57977435.csv
6368575.csv
9926881.csv
43710666.csv
17229443.csv
33846506.csv
49044745.csv
67814407.csv
50938385.csv
62584525.csv
20225763.csv
164466.csv
15910269.csv
15468983.csv
2351603.csv
5541834.csv
2438388.csv
7197156.csv
3023110.csv
11767655.csv
7041504.csv
13209270.csv
549071.csv
187931.csv
7235229.csv
6368561.csv
23135932.csv
1442174.csv
58224748.csv
47676772.csv
31717267.csv
50462364.csv
1526394.csv
15433993.csv
48650130.csv
51553284.csv
3606685.csv
2928274.csv
1672755.csv
43576215.csv
31520329.csv
46407540.csv
5875842.csv
495126.csv
16844340.csv
35473292.csv
49453794.csv
29634810.csv
2024218.csv
1994193.csv
3961213.csv
1159947.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


34212118.csv
5411839.csv
15653629.csv
22903121.csv
65719237.csv
32210481.csv
55763026.csv
1335789.csv
395209.csv
177220.csv
67729599.csv
41953339.csv
40453499.csv
41144749.csv
7193278.csv
24621359.csv
29140266.csv
45377800.csv
31036810.csv
5696050.csv
47718849.csv
17755347.csv
55584163.csv
19803088.csv
10731672.csv
624351.csv
13266099.csv
14755171.csv
57674323.csv
56560979.csv
3631161.csv
583156.csv
19743937.csv
24681851.csv
1461183.csv
34345040.csv
13146497.csv
1144876.csv
5678189.csv
5329125.csv
10916824.csv
45619309.csv
21271812.csv
31231686.csv
296927.csv
36545489.csv
24373171.csv
1597832.csv
32659289.csv
1192565.csv
2562643.csv
51565695.csv
18110548.csv
7963078.csv
2611173.csv
22289347.csv
13699537.csv
50024065.csv
540690.csv
46577138.csv
38267875.csv
19504385.csv
55997569.csv
54686368.csv
5933988.csv
6828634.csv
22108187.csv
797602.csv
33186955.csv
1547805.csv
35245396.csv
2186726.csv
1058278.csv
8485929.csv
29408770.csv
44290395.csv
147308.csv
19566266.csv
29715799.csv
35617155.

  df_master['id'] = df_master['id'].str.replace('.csv','')


23964244.csv
3114623.csv
31435693.csv
12232722.csv
21509270.csv
26289698.csv
222514.csv
2682727.csv
48414655.csv
19844466.csv
29011798.csv
3032828.csv
11686911.csv
30349819.csv
31479102.csv
975711.csv
15086065.csv
29005262.csv
2490600.csv
8790185.csv
4600657.csv
40152028.csv
2965770.csv
66986648.csv
16849248.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


383899.csv
26441052.csv
4161794.csv
29616045.csv
20398180.csv
50559084.csv
24676995.csv
1239399.csv
30849775.csv
439838.csv
16717711.csv
30420438.csv
32844428.csv
305430.csv
26860401.csv
934995.csv
21694116.csv
32780729.csv
7826097.csv
26518133.csv
4269364.csv
2296758.csv
35892963.csv
38051600.csv
64222141.csv
48800726.csv
50010673.csv
23869589.csv
1315673.csv
52989525.csv
48982043.csv
47456778.csv
457893.csv
27169166.csv
60431849.csv
52112609.csv
38810988.csv
33512864.csv
30015106.csv
1679238.csv
31461759.csv
231795.csv
14336423.csv
48675950.csv
4816376.csv
36486603.csv
12380379.csv
2012392.csv
33296095.csv
61322953.csv
4463412.csv
3392603.csv
48514494.csv
5493392.csv
19001585.csv
86939.csv
7369608.csv
26773064.csv
40460245.csv
1705867.csv
40906549.csv
1958823.csv
40926841.csv
7963285.csv
56248712.csv
15054679.csv
31600451.csv
2581725.csv
19921995.csv
21530624.csv
29149054.csv
56504025.csv
1031755.csv
2354357.csv
49101147.csv
60185699.csv
38813517.csv
18500989.csv
54137061.csv
407732.

  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')


23954193.csv
33305172.csv
686241.csv
41938997.csv
1158736.csv
2118642.csv
56633828.csv
2291960.csv


  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()
  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()


51459344.csv
42065772.csv
737688.csv
5972256.csv
30865504.csv
32178234.csv
25729189.csv
43437698.csv
16141083.csv
36791.csv
57526178.csv
50936037.csv
10027922.csv
57331887.csv
44504587.csv
7203410.csv
51793387.csv
54692135.csv
51818791.csv
1568416.csv
29865315.csv
158766.csv
3342811.csv
34918030.csv
34898624.csv
6081782.csv
43795281.csv
55373573.csv
7344997.csv
1097147.csv
17357202.csv
425220.csv
36521333.csv
14058397.csv
36958245.csv
1655061.csv
4417420.csv
2686769.csv
41120756.csv
63538442.csv
15724892.csv
26859241.csv
32264699.csv
181985.csv
3657769.csv
1437890.csv
6868010.csv
30796095.csv
57292862.csv
53307632.csv
48538183.csv
29153797.csv
1182210.csv
25086519.csv
2905045.csv
182532.csv
7957863.csv
47616325.csv
1576600.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


22412882.csv
3423044.csv
30698912.csv
338770.csv
46609138.csv
27431649.csv
62848177.csv
21244303.csv
7396153.csv
60882007.csv
47577005.csv
47154.csv
4133761.csv
55847986.csv
28877362.csv
18120847.csv
42638174.csv
16087198.csv
48548316.csv
33075405.csv
206846.csv
519297.csv
237443.csv
29787789.csv
47421257.csv
57693021.csv
45075288.csv
51020458.csv
37946346.csv
35608513.csv
7273646.csv
23595391.csv
63042694.csv
22945412.csv
15821.csv
4572521.csv
22055.csv
60215230.csv
47669268.csv
2429867.csv
11403023.csv
48266529.csv
4817724.csv
13342449.csv
2591534.csv
33930914.csv
26749488.csv
784481.csv
3264815.csv
23889331.csv
29175977.csv
64743762.csv
1881198.csv
3002282.csv
47049485.csv
51905.csv
62135721.csv
30328215.csv
23190778.csv
53700355.csv
2848810.csv
10411068.csv
2769223.csv
38242822.csv
5646515.csv
1059399.csv
288058.csv
64277142.csv
24383069.csv
57575285.csv
3172003.csv
64529265.csv
4321336.csv
45964.csv
21747370.csv
17322138.csv
3878436.csv
44409712.csv
32285498.csv
37024342.csv
31917

  df_master['id'] = df_master['id'].str.replace('.csv','')


27047299.csv
14750221.csv
1414734.csv
20060681.csv
41701500.csv
159493.csv
54347514.csv
50323571.csv
11440132.csv
9409621.csv
4959463.csv
21104748.csv
1174545.csv
395747.csv
12637646.csv
23674841.csv
39685535.csv
13116936.csv
17710433.csv
65444.csv
3402049.csv
16948626.csv
2484677.csv
4354959.csv
2395253.csv
1863121.csv
1296578.csv
6271422.csv
196590.csv
4029088.csv
231797.csv
48499956.csv
2773253.csv
36135628.csv
1328296.csv
5325245.csv
878551.csv
43156708.csv
48443294.csv
18670224.csv
53785363.csv
53718532.csv
60202002.csv
3537794.csv
64665941.csv
971561.csv
27232866.csv
24906841.csv
12311476.csv
42389789.csv
11936439.csv
27136306.csv
2397336.csv
63747.csv
2369942.csv
36294738.csv
11474283.csv
34665429.csv
26870604.csv
46248350.csv
32393985.csv
2177087.csv
20446894.csv
21317565.csv
36792343.csv
36375049.csv
34189427.csv
994062.csv
34062492.csv
28969404.csv
41634752.csv
18904369.csv
33209238.csv
15761896.csv
5798902.csv
23271933.csv
54533555.csv
26194101.csv
42560.csv
58534851.csv
146

  df_master['id'] = df_master['id'].str.replace('.csv','')


2657736.csv
45716158.csv
3733827.csv
55198620.csv
47696135.csv
5895793.csv
24424310.csv
48216421.csv
41667524.csv
211080.csv
18544298.csv
40023572.csv
1797730.csv
3101416.csv
1119420.csv
6740971.csv
2310846.csv
2430067.csv
473338.csv
28646834.csv
4265825.csv
17751902.csv
3221018.csv
23633101.csv
25478752.csv
55590935.csv
57853556.csv
18882736.csv
7359332.csv
13482967.csv
57889058.csv
44647758.csv
39301777.csv
33464489.csv
30664175.csv
30037010.csv
59839551.csv
18853379.csv
38191787.csv
613474.csv
30796491.csv
5030223.csv
1090562.csv
19787142.csv
37801409.csv
4123604.csv
526477.csv
62060481.csv
37578543.csv
1005009.csv
49047299.csv
2232359.csv
29962724.csv
935096.csv
13103724.csv
14476699.csv
47008022.csv
1525884.csv
53068891.csv
1196107.csv
6482450.csv
33776035.csv
1353591.csv
36223421.csv
14068650.csv
53257.csv
13439129.csv
1473383.csv
2997876.csv
30036483.csv
1181490.csv
22268024.csv
42093437.csv
11045849.csv
842587.csv
1418522.csv
16147753.csv
41081563.csv
43469323.csv
47737710.csv


  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)].copy()


18273860.csv
61554.csv
26344777.csv
20936871.csv
11913295.csv
66214854.csv
205988.csv
11379852.csv
47131106.csv
48684377.csv
12224963.csv
42785740.csv
2402770.csv
55407446.csv
1549023.csv
35120417.csv
9502370.csv
4329352.csv
765247.csv
18933328.csv
45423198.csv
21255811.csv
33639939.csv
13689849.csv
9903395.csv
39747502.csv
1212268.csv
26463187.csv
163479.csv
39801166.csv
13511672.csv
42086012.csv
4155037.csv
22617349.csv
2176560.csv
62867547.csv
22118553.csv
13027645.csv
58002721.csv
6985013.csv
48232226.csv
44280999.csv
30867930.csv
16687076.csv
2201036.csv
351301.csv
36890788.csv
1594242.csv
2673131.csv
1509413.csv
34764455.csv
968346.csv
49641470.csv
4959067.csv
1313176.csv
1945290.csv
4410950.csv
1442176.csv
1827401.csv
9545749.csv
60679531.csv
62569685.csv
18182835.csv
6271998.csv
2446640.csv
6633784.csv
47209804.csv
21288641.csv
3572431.csv
24465172.csv
16844183.csv
2823860.csv
2750273.csv
40612912.csv
147726.csv
320711.csv
53932107.csv
16869011.csv
37297939.csv
49161060.csv
440

  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')


13301404.csv
41444806.csv
64650124.csv
56766020.csv
2035129.csv
30519.csv
65328633.csv
5579615.csv
54560284.csv
2283551.csv
57169261.csv
30000469.csv
33334542.csv
13044876.csv
1277180.csv
53645033.csv
57357445.csv
15877780.csv
1594732.csv
28142404.csv
38551581.csv
43802447.csv
31553845.csv
30131878.csv
28884349.csv
584217.csv
10924506.csv
34380054.csv
24229862.csv
20899248.csv
295633.csv
2116928.csv
12036105.csv
1440563.csv
10021200.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')
  df_master['id'] = df_master['id'].str.replace('.csv','')


19522787.csv
9266863.csv
21692363.csv
39693237.csv
53267717.csv
44025147.csv
36670729.csv
17402689.csv
22605229.csv
26724062.csv
42817411.csv
2078983.csv
4842137.csv
5034284.csv
47346652.csv
35297615.csv
2566786.csv
442330.csv
29902577.csv
25866442.csv
58759768.csv
18202531.csv
38043415.csv
45002808.csv
26764447.csv
1051840.csv
33664183.csv
20583840.csv
21716710.csv
33278985.csv
1701586.csv
2085827.csv
34593514.csv
30203867.csv
24806920.csv
25933318.csv
40243798.csv
12056828.csv
51619652.csv
4303960.csv
30692423.csv
40483081.csv
1003860.csv
11187923.csv
24445248.csv
8219421.csv
1072727.csv
61519357.csv
39690059.csv
3724631.csv
47869062.csv
43553294.csv
2470697.csv
32972953.csv
29864181.csv
51132698.csv
63928408.csv
49466506.csv
11308502.csv
100487.csv
30635844.csv
62411529.csv
26459018.csv
5641455.csv
46298560.csv
28150767.csv
18738329.csv
31508840.csv
4195762.csv
42091779.csv
18022880.csv
23703570.csv
48228069.csv
8481147.csv
15886778.csv
404370.csv
3626998.csv
13491517.csv
33336432.c

  df_master['id'] = df_master['id'].str.replace('.csv','')


29788530.csv
240999.csv
13060731.csv
48857735.csv
34577862.csv
30752149.csv
28569339.csv
23843559.csv
39830184.csv
2242324.csv
40110422.csv
13818786.csv
624030.csv
519309.csv
43725386.csv
54217518.csv
25113852.csv
49260304.csv
57769007.csv
42158237.csv
31846624.csv
384703.csv
168707.csv
53435223.csv
66251851.csv
56339088.csv
19508635.csv
46920011.csv
21463479.csv
38570572.csv
276581.csv
22617604.csv
922702.csv
25276055.csv
1745307.csv
40601947.csv
771780.csv
28164373.csv
2439254.csv
30872125.csv
1430046.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


29125760.csv
5989807.csv
8750567.csv
41673746.csv
28055804.csv
1082403.csv
24608441.csv
470340.csv
48994795.csv
19046752.csv
37742302.csv
1237214.csv
14766631.csv
12436456.csv
13600261.csv
20687485.csv
54469262.csv
24813336.csv
19980779.csv
60265649.csv
24889208.csv
995745.csv
54273061.csv
1404485.csv
13236524.csv
3682747.csv
25181303.csv
2647054.csv
18798148.csv
1054045.csv
38352563.csv
3495297.csv
14960117.csv
32494136.csv
85406.csv
1891567.csv
3514951.csv
49010868.csv
36801537.csv
28305047.csv
534502.csv
51217339.csv
899619.csv
2560321.csv
33342416.csv
7522914.csv
31663042.csv
35306864.csv
45074791.csv
63033859.csv
49126316.csv
11368547.csv
5262906.csv
14505312.csv
5230926.csv
48739344.csv
1891201.csv
1918357.csv
54701166.csv
2536629.csv
7297476.csv
6232491.csv
351843.csv
23019954.csv
41374590.csv
6743935.csv
4235587.csv
1153702.csv
11339046.csv
5950828.csv
29456237.csv
67803391.csv
42844909.csv
29030345.csv
35827088.csv
23485034.csv
3535009.csv
16617699.csv
44848503.csv
77860.csv
8

  df_master['id'] = df_master['id'].str.replace('.csv','')


42146579.csv
850196.csv
14140888.csv
20376304.csv
25061778.csv
102910.csv
23814165.csv
4344815.csv
55463843.csv
18204419.csv
48520450.csv
53992603.csv
1166578.csv
21693256.csv
30150075.csv
3522978.csv
3400515.csv
41684899.csv
44152696.csv
40522471.csv
2394206.csv
3202159.csv
32157163.csv
62785870.csv
40469672.csv
5011561.csv
32600512.csv
38472615.csv
16639969.csv
44026665.csv
25130537.csv
8120032.csv
48648032.csv
36897970.csv
5308936.csv
11495336.csv
141829.csv
33748131.csv
1720762.csv
168263.csv
44456613.csv
493554.csv
762515.csv
30811657.csv
1059950.csv
44129542.csv
19569963.csv
26579848.csv
3392238.csv
3775132.csv
1326682.csv
10654189.csv
9246026.csv
45312651.csv
14649349.csv
5051623.csv
2834104.csv
22617360.csv
21660453.csv
38249355.csv
20176431.csv
2368730.csv
839861.csv
748465.csv
40712293.csv
740039.csv
6030772.csv
331437.csv
63045267.csv
167805.csv
39897878.csv
1114111.csv
40229958.csv
4780487.csv
24940578.csv
8705994.csv
51465084.csv
53571844.csv
2719458.csv
33496178.csv
22115

  df_master['id'] = df_master['id'].str.replace('.csv','')


459467.csv
8382577.csv
65793125.csv
1452204.csv
32531159.csv
64690077.csv
32876843.csv
39068803.csv
41753045.csv
2416116.csv
2329817.csv
11925652.csv
32805675.csv
54132442.csv
5903653.csv
18774527.csv
3138447.csv
7197035.csv
68306645.csv
66471448.csv
655583.csv
5056004.csv
37295518.csv
19410324.csv
2780872.csv
37642909.csv
43618359.csv
58365105.csv
29188435.csv
2104929.csv
47377934.csv
438927.csv
19990813.csv
47421083.csv
26549633.csv
25386749.csv
61870422.csv
35756687.csv
58425020.csv
517046.csv
6363219.csv
10389719.csv
11772313.csv
419719.csv
734633.csv
22040103.csv
588766.csv
939726.csv
39320155.csv
864347.csv
31463373.csv
61254352.csv
168303.csv
61658947.csv
8938611.csv
2388997.csv
35564699.csv
3402262.csv
26974649.csv
46365957.csv
36403420.csv
31682653.csv
16910666.csv
33735092.csv
63784927.csv
7208785.csv
14109442.csv
49198003.csv
24228823.csv
36413627.csv
15055000.csv
1755289.csv
5565611.csv
31686421.csv
62772432.csv
17991723.csv
1974110.csv
13136601.csv
468692.csv
32723.csv
449

  df_master['id'] = df_master['id'].str.replace('.csv','')


1478850.csv
3032469.csv
47206416.csv
43850207.csv
2751094.csv
14904555.csv
298563.csv
65612138.csv
2060760.csv
12609902.csv
318377.csv
16152066.csv
8226932.csv
44256981.csv
32151165.csv
12135522.csv
548733.csv
4332338.csv
28464068.csv
36455371.csv
29785113.csv
166639.csv
290117.csv
5330606.csv
27734704.csv
2389146.csv
1672447.csv
600095.csv
21374259.csv
30180353.csv
36724256.csv
30132287.csv
27891789.csv
66256793.csv
19558111.csv
62128070.csv
17249099.csv
56512145.csv
42473342.csv
31419876.csv
22753627.csv
4884658.csv
20142.csv
526954.csv
25397353.csv
57868149.csv
52025943.csv
1724615.csv
11726255.csv
4740559.csv
39220456.csv
36282672.csv
30917137.csv
294403.csv
2177234.csv
284610.csv
67136484.csv
5506845.csv
2306461.csv
1776621.csv
9555676.csv
2046943.csv
2749181.csv
23374352.csv
8253361.csv
254627.csv
67154201.csv
61597333.csv
33154320.csv
49351974.csv
35714809.csv
15557685.csv
9465979.csv
24271299.csv
5026976.csv
41610331.csv
41812671.csv
9256130.csv
16382.csv
63398422.csv
45454028.

  df_master['id'] = df_master['id'].str.replace('.csv','')


1677932.csv
365149.csv
36578022.csv
3086457.csv
7079612.csv
3002757.csv
3321888.csv
1003230.csv
1195246.csv
46454961.csv
30522908.csv
44943753.csv
4259758.csv
1790603.csv
40306880.csv
248971.csv
31349765.csv
13337503.csv
898158.csv
44745075.csv
33623126.csv
36888994.csv
4772058.csv
41231444.csv
192586.csv
40480855.csv
7797209.csv
621503.csv
214997.csv
35814960.csv
32818125.csv
36993393.csv
43181.csv
317534.csv
13676757.csv
164576.csv
36270802.csv
190391.csv
14045161.csv
21421567.csv
55071153.csv
1938355.csv
1875241.csv
11915090.csv
1408179.csv
40416837.csv
27737033.csv
397334.csv
26380627.csv
867167.csv
1628205.csv
51042708.csv
208400.csv
7980344.csv
6994018.csv
54637712.csv
3664662.csv
35590757.csv
22226265.csv
12865755.csv
29692123.csv
37024668.csv
2584407.csv
63523.csv
17571563.csv
2208666.csv
49523439.csv
27395671.csv
2207555.csv
5618625.csv
65378300.csv
56322949.csv
30862782.csv
62827071.csv
11506465.csv
11821453.csv
64366217.csv
65371442.csv
2225092.csv
64631812.csv
23532404.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


24621507.csv
4515696.csv
8384773.csv
41894038.csv
32913110.csv
2343505.csv
21770182.csv
983395.csv
13127151.csv
5328317.csv
29005602.csv
43155677.csv
10252998.csv
33846210.csv
18629727.csv
5817735.csv
31872914.csv
2824358.csv
47092807.csv
7840014.csv
4568641.csv
52959600.csv
1176056.csv
105942.csv
690900.csv
47077653.csv
5220987.csv
11184929.csv
12346158.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


3398982.csv
1749943.csv
59692226.csv
46965728.csv
30156463.csv
1555336.csv
17843004.csv
3725004.csv
22617461.csv
28059268.csv
22150203.csv
11945089.csv
36142940.csv
15453795.csv
6573430.csv
47446651.csv
3577088.csv
3026024.csv
4169958.csv
3952865.csv
514448.csv
29281586.csv
15747758.csv
32584815.csv
59658261.csv
2655806.csv
41001305.csv
167651.csv
1036328.csv
6040219.csv
701957.csv
1255179.csv
3297455.csv
40460631.csv
50157170.csv
237960.csv
13827707.csv
3641582.csv
23484826.csv
48068860.csv
5413858.csv
27036980.csv
46871985.csv
1224794.csv
373844.csv
37761999.csv
52346864.csv
577879.csv
54162428.csv
5098450.csv
63961537.csv
2163651.csv
35669216.csv
67759608.csv
23706645.csv
8225181.csv
17508856.csv
24659494.csv
25494777.csv
20442289.csv
17152005.csv
21267153.csv
67793101.csv
300114.csv
16800214.csv
940490.csv
36347880.csv
12081497.csv
39461255.csv
9202171.csv
2919797.csv
34771764.csv
1254515.csv
2346333.csv
43121874.csv
22617475.csv
41890510.csv
45092273.csv
12353958.csv
40626435.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
3163784.csv
38926330.csv
7073706.csv
41816589.csv
1016204.csv
44565730.csv
23826358.csv
30501256.csv
10290819.csv
657047.csv
31489337.csv
12578987.csv
35276329.csv
47048111.csv
313380.csv
25001565.csv
1127973.csv
333050.csv
1755166.csv
64331951.csv
46526702.csv
40724237.csv
21532863.csv
9105328.csv
231653.csv
10624193.csv
36779971.csv
18247963.csv
33389701.csv
13329462.csv
6573814.csv
1104614.csv
61054005.csv
3122914.csv
1333014.csv
213394.csv
1422430.csv
2528459.csv
32388736.csv
45309565.csv
5339227.csv
1021641.csv
15756640.csv
3335793.csv
39525402.csv
26937110.csv
8733267.csv
29382930.csv
2819782.csv
3925420.csv
49554920.csv
3210249.csv
581650.csv
4369739.csv
45458565.csv
22648616.csv
517154.csv
2346454.csv
43156343.csv
17534176.csv
38018891.csv
27481264.csv
12026629.csv
2316263.csv
36945850.csv
42886623.csv
32835854.csv
16437742.csv
22617474.csv
21148109.csv
23406824.csv
58963288.csv
38496293.csv
4326990.csv
19766330.csv
181

  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
15631624.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


16854877.csv
62112070.csv
53993590.csv
21418250.csv
375207.csv
1977292.csv
2368742.csv
66832997.csv
2106037.csv
56922793.csv
184534.csv
59616696.csv
193048.csv
17250126.csv
1208294.csv
29044071.csv
1561694.csv
33010013.csv
20294287.csv
20561257.csv
33322019.csv
15853269.csv
8077078.csv
33798000.csv
18931404.csv
5974495.csv
31450907.csv
548242.csv
2033107.csv
12488057.csv
49998169.csv
15148037.csv
22384821.csv
24346622.csv
273925.csv
11439864.csv
588700.csv
1699829.csv
7946538.csv
40841303.csv
56968705.csv
58703993.csv
2199844.csv
36732151.csv
1009913.csv
20714116.csv
25990947.csv
1407511.csv
39027196.csv
343158.csv
33651200.csv
22617514.csv
56546717.csv
48577368.csv
40626965.csv
57750596.csv
5162489.csv
18555279.csv
47023439.csv
18527189.csv
1932645.csv
1665338.csv
65488281.csv
26087873.csv
1858488.csv
27774123.csv
7782611.csv
2589136.csv
25819187.csv
3306211.csv
3712093.csv
19621127.csv
771335.csv
66852118.csv
21646223.csv
1823012.csv
11825634.csv
29053531.csv
1758085.csv
1085901.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


18481799.csv
54403.csv
52313737.csv
44610.csv
1614264.csv
3246812.csv
1566892.csv
1230643.csv
2851659.csv
42475295.csv
13328350.csv
9630296.csv
60638495.csv
1892342.csv
10978197.csv
558938.csv
719076.csv
52813683.csv
47757652.csv
22591078.csv
21126015.csv
1205820.csv
6415617.csv
46710630.csv
33426959.csv
39017928.csv
2530612.csv
10648459.csv
983237.csv
202845.csv
1259805.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
515810.csv
9589287.csv
4714486.csv
3841993.csv
30722741.csv
22881812.csv
38538829.csv
801552.csv
24083539.csv
4390217.csv
47362350.csv
53694962.csv
41074162.csv
15317560.csv
35943144.csv
53940870.csv
51587107.csv
33292057.csv
146067.csv
59636340.csv
10213872.csv
2955403.csv
66079534.csv
73094.csv
6470919.csv
4535396.csv
46447934.csv
42837407.csv
1701899.csv
21916958.csv
3562269.csv
53139515.csv
33430861.csv
19170696.csv
48632673.csv
27501289.csv
2014235.csv
4783088.csv
23203326.csv
1074091.csv
41593268.csv
2387397.csv
26712170.csv
1858477.csv
2061872.csv
51560207.csv
3095833.csv
21228255.csv
6671842.csv
4596818.csv
16373987.csv
411647.csv
1656282.csv
1458203.csv
5229356.csv
13155854.csv
1929073.csv
32780002.csv
27879552.csv
53523553.csv
52156012.csv
869407.csv
8708839.csv
4980359.csv
1910194.csv
352525.csv
2876033.csv
39786067.csv
11786948.csv
64550919.csv
15852.csv
11310263.csv
482991.csv
41759730.csv
44467906.csv
168615.csv
2

  df_master['id'] = df_master['id'].str.replace('.csv','')


5483866.csv
203756.csv
37376395.csv
6522534.csv
1189487.csv
1673764.csv
408458.csv
49437129.csv
5969836.csv
25340350.csv
441058.csv
2893097.csv
598739.csv
17185402.csv
46251114.csv
559165.csv
48869087.csv
2563074.csv
36236467.csv
36260621.csv
3732800.csv
48594540.csv
42803588.csv
57239516.csv
2096008.csv
36746405.csv
11921863.csv
26063934.csv
39024892.csv
58319731.csv
22474590.csv
42672026.csv
24192178.csv
889256.csv
28929251.csv
6394436.csv
4743938.csv
9982217.csv
26695221.csv
1945618.csv
2412699.csv
321508.csv
1181030.csv
620393.csv
35842070.csv
501483.csv
49720360.csv
49389463.csv
2722084.csv
31590048.csv
43308898.csv
4391451.csv
1790366.csv
49644.csv
239435.csv
354197.csv
41832577.csv
6405606.csv
8464740.csv
30136745.csv
784425.csv
6312653.csv
29558308.csv
2912204.csv
54370320.csv
232574.csv
803609.csv
1923589.csv
35910736.csv
1304096.csv
22727602.csv
18871587.csv
40800834.csv
29812850.csv
1249182.csv
3046464.csv
5316603.csv
3820835.csv
3454297.csv
29126353.csv
43700799.csv
2711105

  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
4272084.csv
378441.csv
38107392.csv
43962263.csv
57647571.csv
3614145.csv
18645549.csv
1615422.csv
40736021.csv
49400746.csv
9130581.csv
55483494.csv
39863343.csv
158729.csv
22196906.csv
35822974.csv
29375943.csv
35335402.csv
36893147.csv
14393039.csv
42484734.csv
16204528.csv
39923015.csv
35227774.csv
3421152.csv
2023843.csv
462642.csv
3170698.csv
34197440.csv
30694197.csv
59857833.csv
1522236.csv
32169362.csv
61715362.csv
17879004.csv
1506380.csv
53746398.csv
33354400.csv
2132782.csv
55523.csv
7436867.csv
21577293.csv
522264.csv
24476989.csv
24004656.csv
58419540.csv
16548209.csv
762834.csv
25239134.csv
5747849.csv
50781395.csv
30968079.csv
2368422.csv
5747691.csv
12905505.csv
45465209.csv
43591774.csv
2430406.csv
31502272.csv
7103397.csv
35312915.csv
63322156.csv
11924925.csv
1611134.csv
26058744.csv
50780935.csv
499380.csv
8313975.csv
10767876.csv
52455063.csv
7023735.csv
8619063.csv
26667937.csv
3143244.csv
43142903.csv
98

  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
394819.csv
366457.csv
348972.csv
42305259.csv
797881.csv
15846869.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
1084348.csv
6835158.csv
48428861.csv
36890114.csv
636083.csv
4484761.csv
30362806.csv
35362000.csv
32014837.csv
952134.csv
10645631.csv


  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
43765315.csv
39517511.csv
14789914.csv
30773524.csv
45260466.csv
604757.csv
2192708.csv
12036216.csv
155964.csv
15532552.csv
4527747.csv
38625801.csv
33799446.csv
1918092.csv
51386083.csv
14771922.csv
1201311.csv
2674029.csv
22990712.csv
27705096.csv
4406945.csv
51390187.csv
10446419.csv
2104959.csv
50867792.csv
5116322.csv
34447325.csv
49252932.csv
38978851.csv
28140672.csv
1329489.csv
63007187.csv
14609655.csv
325591.csv
21326050.csv
57021678.csv
49434620.csv
2979882.csv
44498250.csv
40244497.csv
2170014.csv
5092744.csv
2290158.csv
16941465.csv
427863.csv
23249588.csv
168429.csv
41228670.csv
1944935.csv
59844454.csv
11004938.csv
64830694.csv
417778.csv
19519416.csv
7956701.csv
5421779.csv
11886823.csv
22953995.csv
1050858.csv
66847503.csv
7166916.csv
33711168.csv
4702557.csv
66278653.csv
317537.csv
31272.csv
1484140.csv
4540910.csv
4399580.csv
26885830.csv
37034336.csv
7608516.csv
13825715.csv
6224122.csv
235972.csv
5117766.c

  df_master['id'] = df_master['id'].str.replace('.csv','')


2542480.csv
40263821.csv
35247492.csv
21591304.csv
3900107.csv
15401991.csv
8423025.csv
7235299.csv
7275865.csv
1238433.csv
38307738.csv
4451000.csv
55659530.csv
53709923.csv
7178447.csv
5014482.csv
32979321.csv
1976818.csv
52120233.csv
26208295.csv
12095422.csv
13838195.csv
65283748.csv
1882755.csv
151451.csv
18199531.csv
47522465.csv
54994514.csv
10774598.csv
11705048.csv
7341524.csv
2595496.csv
19800337.csv
25340971.csv
36005413.csv
712041.csv
3954218.csv
43292378.csv
410188.csv
8524776.csv
12800045.csv
6859690.csv
64916900.csv
20167564.csv
57019278.csv
16977048.csv
22677427.csv
18880178.csv
11868865.csv
2461503.csv
34041428.csv
16834617.csv
4726218.csv
49267802.csv
19992127.csv
47473158.csv
47893431.csv
1320240.csv
19994542.csv
63247490.csv
16458436.csv
17661132.csv
46584821.csv
56848560.csv
24008332.csv
29187831.csv
164398.csv
3440828.csv
1651128.csv
29975573.csv
21625143.csv
10554828.csv
2018545.csv
2390979.csv
59385551.csv
2634592.csv
15879535.csv
4359149.csv
68269379.csv
151029

  df_master['id'] = df_master['id'].str.replace('.csv','')


Blank type: ****
Expecting value: line 1 column 1 (char 0)
206214.csv
1503167.csv
353229.csv
22617476.csv
472916.csv
217319.csv
50202690.csv
24759483.csv
40340961.csv
2306072.csv
1479807.csv
292461.csv
876135.csv
17495976.csv
16819020.csv
11610492.csv
42249696.csv
22673682.csv
3173768.csv
7951654.csv
23782546.csv
41777975.csv
21965195.csv
22785100.csv
534957.csv
42561433.csv
11117818.csv
66558684.csv
10100095.csv
49891985.csv
46244889.csv
511533.csv
5095163.csv
16650005.csv
1960713.csv
23692033.csv
45583405.csv
40159536.csv
517142.csv
52378745.csv
2483551.csv


In [None]:
df_empty_type.info()
df_empty_type.head()
df_empty_people = df_empty_type[df_empty_type['types'].isin(entityPlaceTypesExt)]
df_empty_people.info()
df_empty_people.head(28)
# df_empty_type.head(28)

In [5]:
# REVIEW  cache files headers run just once, if needed
# FORMATING SOME FILES WITH ERRORS
files_list = [f for f in os.listdir('cacheSpotlightResponse') if not f.startswith('.')]
# parse to dataframe
df_files = pd.DataFrame(files_list, columns=['file_name'])
# df_files = df_files.query("file_name=='3075.csv'")
df_files.to_csv('temp_cacheFile.csv',index=False)
print(len(df_files))

for chunk in pd.read_csv('temp_cacheFile.csv', chunksize=30):
    df_files = pd.DataFrame()
    df_files['file_name'] = chunk['file_name']
    
    # iterate over files
    for file_name_item in df_files.itertuples():
        print(file_name_item.file_name)
        
        try:
            entitiesByBiography_df = pd.read_csv('cacheSpotlightResponse/'+file_name_item.file_name)

            headers = list(entitiesByBiography_df)
            change = False
            # Cleaning headers:
            # Case 1: headers still have character '@'
            if '@URI' in headers:
                change = True
                print("found 1")

            if '@surfaceForm' in headers:
                change = True
                print("found 2")
            # Case 2: still using surfaceForm instead of entity
            if 'surfaceForm' in headers:
                change = True
                print("found 3")
            # Case 3: no headers

            headers = [col.replace('@','') for col in headers]
            headers = [col.replace('surfaceForm','entity') for col in headers]

            if change:
                print(headers)
                entitiesByBiography_df.columns.values[0:11] = headers
                entitiesByBiography_df.to_csv('cacheSpotlightResponse/'+file_name_item.file_name,index=False)
        except Exception as e: 
            print("ERROR reading file: " + file_name_item.file_name)
            print(e)
            # # open CSV file and assign header
            # with open('cacheSpotlightResponse/'+file_name_item.file_name, 'wb') as file:
            #     dw = csv.DictWriter(file, delimiter=',',fieldnames=cols)
            #     dw.writeheader()

80242
1000228.csv
100273.csv
100487.csv
10085.csv
1009725.csv
ERROR reading file: 1009725.csv
Error tokenizing data. C error: Expected 7 fields in line 171, saw 11

1010510.csv
ERROR reading file: 1010510.csv
Error tokenizing data. C error: Expected 7 fields in line 578, saw 11

1010943.csv
ERROR reading file: 1010943.csv
Error tokenizing data. C error: Expected 7 fields in line 29, saw 11

10120.csv
1013900.csv
ERROR reading file: 1013900.csv
Error tokenizing data. C error: Expected 7 fields in line 490, saw 11

1022191.csv
ERROR reading file: 1022191.csv
Error tokenizing data. C error: Expected 7 fields in line 151, saw 11

1023303.csv
ERROR reading file: 1023303.csv
Error tokenizing data. C error: Expected 7 fields in line 80, saw 11

1024347.csv
ERROR reading file: 1024347.csv
Error tokenizing data. C error: Expected 7 fields in line 99, saw 11

1028178.csv
ERROR reading file: 1028178.csv
Error tokenizing data. C error: Expected 7 fields in line 1145, saw 11

103549.csv
ERROR readi

In [15]:
# 1. Extract People and Places entities
entitiyTypes = ['DBpedia:Person','DBpedia:MusicalArtist','DBpedia:Place','DBpedia:SocietalEvent']
# http://dbpedia.org/ontology/Person
# df_entityType = pd.DataFrame({'id':[1,2],'types':['http://dbpedia.org/ontology/Person',
#                                                       'http://dbpedia.org/ontology/MusicalArtist']})
# read master with index of entities
df_master_cache = pd.read_csv('cacheSpotlightResponse/emptyTypes_master.csv')

# use chunk to load a small number of files in memory
for chunk in pd.read_csv('totalBiographiesEntities.csv', chunksize=30):
    df_files = pd.DataFrame()
    df_files['file_name'] = chunk['file_name']
    
    # iterate over files
    for file_name_item in df_files.itertuples():
        try:
            # start = time.time()
            print(file_name_item.file_name)
            # check if the file exists, e.g., 10085.csv
            # file should exist, it was create during queryDBpedia process
            file_exists = os.path.isfile('cacheSpotlightResponse/'+file_name_item.file_name)

            if file_exists:
                # read the cached results from the query
                entitiesByBiography_df = pd.read_csv('cacheSpotlightResponse/'+file_name_item.file_name)
                # surfaceForm: entity as found in the text
                # entitiesByBiography_df.rename(columns = {'surfaceForm':'entity'},inplace = True)
                # column types contain DBpedia classes, e.g., dbo:Person, Schema:Place, etc.
                # select all rows where types <> NA, not empty = ne
                df_ne = entitiesByBiography_df.loc[~entitiesByBiography_df['types'].isna()] 

                df_result = pd.DataFrame()
                if len(df_ne) >0:
                    for entity in entitiyTypes:
                        df_temp = df_ne[df_ne['types'].str.contains(entity)].copy()

                        if not df_temp.empty:
                            if entity == 'DBpedia:Person' or entity == 'DBpedia:MusicalArtist':
                                df_temp['entType'] = 'person'
                            elif entity == 'DBpedia:Place':
                                df_temp['entType'] = 'place'
                            elif entity == 'DBpedia:SocietalEvent' or entity == 'DBpedia:Event':
                                df_temp['entType'] = 'event'

                            df_result = df_result.append(df_temp)

                # now filter all the entities that are empty, this df contains all the info of the matching
                # e.g., surfaceForm, similarityScore, etc
                df_e = entitiesByBiography_df.loc[entitiesByBiography_df['types'].isna()] 
                
                # only if the dataframe has some rows with empty value in column "entity"
                if not df_e.empty:
                    # list of entities and the name of the files that store the results from the query
                    # when merging, only the entities that are stored in cache will have a file name value in column "file_name"
                    df_merge = df_master_cache.merge(df_e, on=['URI','entity'],how='right')
                    # df_merge.to_csv('extractedEntitiesPersonPlaceOnly/df_merge.csv',index=False)

                    # for all documents not in cache, meaning that value in column "file_name" is empty
                    df_cache = df_merge.loc[df_merge['file_name'].isna()]
                    print(len(df_cache))
                    if len(df_cache)>0:
                        # iterate over the df with all the entities that not in cache
                        for item_empty in df_cache.itertuples():
                            # query DBpedia, store results in local folder and update the master file
                            df_temp = queryEntityLeft(item_empty.URI,item_empty)
                        
                        # read the master cache again with the latest updates after checking for URIs in previous line
                        df_master_cache = pd.read_csv('cacheSpotlightResponse/emptyTypes_master.csv')
                        # merge again all the entities that had column "types" value as empty to find the name of the file
                        # where the response from DBpedia is stored
                        df_merge = df_master_cache.merge(df_e, on=['URI','entity'],how='right')
                    # df_merge.to_csv('extractedEntitiesPersonPlaceOnly/df_merge.csv',index=False)

                    # df_cache will contain all the entities and the file_name value from where to read its URIs
                    # filter all the rows that have column "file_name" as having a value
                    df_cache = df_merge.loc[~df_merge['file_name'].isna()] 
                    for item_queriedType in df_cache.itertuples():
                        # print(item_queriedType.entity)
                        appendRow = False
                        typeEntity = ''

                        # read cache file
                        file_exists = os.path.isfile('cacheSpotlightResponse/'+item_queriedType.file_name)
                        if file_exists:
                            df_empty_type = pd.read_csv('cacheSpotlightResponse/'+item_queriedType.file_name)
                            df_empty_type.drop_duplicates(subset=['entity','URI','types'],keep='first')
                            
                            # once the file is located, filter the rows that match entity and URI
                            try:
                                df_empty_type = df_empty_type.query("""entity == "{}" and URI=="{}" """.format(item_queriedType.entity,item_queriedType.URI))
                            except SyntaxError as ex:
                                # df_empty_type = df_empty_type.query("""entity == '{}' and URI=='{}'""".format(item_queriedType.entity,item_queriedType.URI))
                                df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)]
                            except TokenError as te:
                                df_empty_type = df_empty_type.query("entity == '{}' and URI=='{}'".format(item_queriedType.entity,item_queriedType.URI))

                            # Classify entities according to type
                            # First, identify entity types Place, filter all rows with column "types" value equal to Place URI
                            # print(df_temp.URI)
                            # print(df_temp.types)
                            
                            df_temp = df_empty_type[df_empty_type['types'].str.contains('http://dbpedia.org/ontology/Place')].copy()
                            if not df_temp.empty:
                                types = df_temp['types'].loc[df_temp.index[0]]
                                entType = 'place'
                                appendRow = True
                            # Second, identify entity types Person, filter all rows with column "types" value equal to Person URI
                            df_temp = df_empty_type.query('types == "http://dbpedia.org/ontology/Person" or types=="http://dbpedia.org/ontology/MusicalArtist"')
                            df_temp = df_temp.sort_values(by='types', ascending=False)
                            
                            if not df_temp.empty:
                                # Before adding this entity to the list, review that it is a person and it is not a PersonFunction or MusicalTerminology
                                # call function, search all the values in DBpedia, returs True if the URI is of a person, False otherwise
                                response = checkPersonEntity(item_queriedType.URI)
                                if not response:
                                    appendRow = False
                                    continue
                                types = df_temp['types'].loc[df_temp.index[0]]
                                entType = 'person'
                                appendRow = True
                            # else:
                            #     print(item_queriedType.URI)
                            #     print("Empty: " + item_queriedType.file_name)

                            if appendRow:
                                df_new_row = pd.DataFrame([[item_queriedType.URI, item_queriedType.support, types, item_queriedType.entity, item_queriedType.offset, 
                                                            item_queriedType.similarityScore,item_queriedType.percentageOfSecondRank, item_queriedType.sentence, 
                                                            item_queriedType.sentenceIndex, item_queriedType.paragraphIndex, 
                                                           item_queriedType.section]],columns=list(entitiesByBiography_df))
                                df_new_row['entType'] = entType
                                df_result = df_result.append(df_new_row,ignore_index=True)
                        else:
                            print(df_merge.file_name)
                            print(df_merge.URI)
                            break

                    # print('No empty types')
            # IF file does not exists, then the 
            else:
                print("[INFO] - File does not exist, run 020_queryDbpedia book. File: " + file_name_item.file_name)

            # df_result['wikiPageID'] = file_name_item.file_name.replace('.csv','')
            df_result.drop(['sentence','wikiPageID','section'], axis=1,inplace=True)
            df_result.drop_duplicates(subset=['paragraphIndex','sentenceIndex','offset','entity','URI'],keep='first',inplace=True)
            
            df_result.to_csv('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name,index=False)
            # end = time.time()
            # print("The time of execution of above program is :", end-start)
        except (pd.errors.ParserError, KeyError) as pe:
            print('Error: ' + file_name_item.file_name)
            if hasattr(pe, 'message'):
                print(pe.message)
            else:
                print(pe)
            df_error = pd.DataFrame([[file_name_item.file_name]],columns=['file_name'])
            df_error.to_csv('temp_error.csv',mode='a',index=False,header=False)
    time.sleep(120)

1009725.csv
Error: 1009725.csv
Error tokenizing data. C error: Expected 7 fields in line 171, saw 11

1010510.csv
Error: 1010510.csv
Error tokenizing data. C error: Expected 7 fields in line 578, saw 11

1010943.csv
Error: 1010943.csv
Error tokenizing data. C error: Expected 7 fields in line 29, saw 11

1013900.csv
Error: 1013900.csv
Error tokenizing data. C error: Expected 7 fields in line 490, saw 11

1022191.csv
Error: 1022191.csv
Error tokenizing data. C error: Expected 7 fields in line 151, saw 11

1023303.csv
Error: 1023303.csv
Error tokenizing data. C error: Expected 7 fields in line 80, saw 11

1024347.csv
Error: 1024347.csv
Error tokenizing data. C error: Expected 7 fields in line 99, saw 11

1028178.csv
Error: 1028178.csv
Error tokenizing data. C error: Expected 7 fields in line 1145, saw 11

103549.csv
Error: 103549.csv
Error tokenizing data. C error: Expected 7 fields in line 336, saw 11

103566.csv
Error: 103566.csv
Error tokenizing data. C error: Expected 7 fields in line

  df_master['id'] = df_master['id'].str.replace('.csv','')


3136059.csv
0
313835.csv
2
3141790.csv
1
3143897.csv
2
314487.csv
10
3152159.csv
0
3155747.csv
0
316711.csv
0
3180265.csv
0
3180885.csv
0
3190132.csv
1
319074.csv
11
3197212.csv
0
3204508.csv
0
320685.csv
4
3212385.csv
1
3217228.csv
0
322567.csv
0
3225681.csv
0
3227606.csv
1
3228328.csv
0
323499.csv
3
3236079.csv
0
3236194.csv
2
325790.csv
0
3263983.csv
0
3266294.csv
0
3273.csv
[INFO] - File does not exist, run 020_queryDbpedia book. File: 3273.csv
3280230.csv
0
328330.csv
1
328940.csv
0
3291750.csv
0
329722.csv
0
330103.csv
0
3301109.csv
1
3302723.csv
11
331463.csv
22
3337272.csv
2
3344033.csv
4
3347663.csv
3
3358257.csv
2
3365880.csv
3
3372135.csv
0
3373682.csv
0
3388232.csv
3
3391490.csv
1
3394339.csv
2
3396268.csv
0
340945.csv
0
3411455.csv
0
3418089.csv
0
341837.csv
2
3419345.csv
0
343176.csv
0
3445142.csv
0
344562.csv
0
3450382.csv
0
3450589.csv
1
345114.csv
6
3452637.csv
1
3460078.csv
1
346303.csv
0
3463240.csv
4
347201.csv
0
3478434.csv
0
348348.csv
0
3488554.csv
2
3496560.csv


  df_empty_type = df_empty_type[df_empty_type['URI'].str.contains(item_queriedType.URI)]


67892.csv
3
679806.csv
0
68308.csv
0
690900.csv
0
695088.csv
0
695584.csv
1
696261.csv
0
696825.csv
0
699151.csv
0
70020.csv
1
701860.csv
0
702942.csv
0
70315.csv
2
703488.csv
0
706650.csv
0
707930.csv
0
708521.csv
1
709620.csv
0
70986.csv
3
711489.csv
1
712322.csv
1
715167.csv
0
716617.csv
0
719979.csv
0
720273.csv
0
720783.csv
7
722501.csv
4
726151.csv
2
728429.csv
6
734633.csv
0
735176.csv
0
736528.csv
6
739770.csv
0
741791.csv
0
742176.csv
0
746774.csv
1
747500.csv
2
75200.csv
2
752694.csv
1
754282.csv
1
756836.csv
0
757387.csv
0
7585.csv
3
759988.csv
1
760333.csv
0
761492.csv
0
762314.csv
1
76252.csv
5
763791.csv
1
766300.csv
1
7672.csv
2
767288.csv
1
770812.csv
0
772715.csv
0
777352.csv
0
777623.csv
0
78231.csv
2
782651.csv
0
787927.csv
0
787997.csv
0
788959.csv
0
790333.csv
0
793333.csv
0
795427.csv
1
797572.csv
1
806373.csv
0
80696.csv
6
8095.csv
4
817466.csv
0
820730.csv
1
82413.csv
3
825643.csv
3
827409.csv
0
827624.csv
0
8300.csv
0
833109.csv
7
833172.csv
5
8351.csv
4
837456

KeyboardInterrupt: 

In [4]:
# Reprocess data to delete information not needed.
# use chunk to load a small number of files in memory
# list_wikiIdSample
# totalBiographiesBenchmark
for chunk in pd.read_csv('list_wikiIdSample.csv', chunksize=30):
# for chunk in pd.read_csv('totalBiographiesBenchmark.csv', chunksize=30):
    df_files = pd.DataFrame()
    df_files['file_name'] = chunk['file_name']
    
    # iterate over files
    for file_name_item in df_files.itertuples():
        try:
            # start = time.time()
            print(file_name_item.file_name)
            # check if the file exists, e.g., 10085.csv
            # file should exist, it was create during queryDBpedia process
            file_exists = os.path.isfile('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name)

            if file_exists:
                df_pp = pd.read_csv('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name)
                df_pp.drop(['sentence','wikiPageID','section'], axis=1,inplace=True)
                df_pp.drop_duplicates(subset=['paragraphIndex','sentenceIndex','offset','entity','URI'],keep='first',inplace=True)
                df_pp.to_csv('extractedEntitiesPersonPlaceOnly/'+file_name_item.file_name,index=False)
            else:
                print("[INFO] - File does not exist, run 020_queryDbpedia book. File: " + file_name_item.file_name)
        except (pd.errors.ParserError, KeyError) as pe:
            print('Error: ' + file_name_item.file_name)
            if hasattr(pe, 'message'):
                print(pe.message)
            else:
                print(pe)

608845.csv
1048151.csv
2232977.csv
1913885.csv
1790990.csv
409969.csv
1709886.csv
1551347.csv
50782750.csv
181946.csv
579599.csv
50963136.csv
226142.csv
1422240.csv
1174545.csv
70020.csv
752694.csv
312781.csv
2898019.csv
2253021.csv
3450382.csv
2553865.csv
3263983.csv
2320846.csv
144624.csv
827409.csv
50902387.csv
671637.csv
562392.csv
1396921.csv
2815597.csv
858538.csv
2269540.csv
1205991.csv
701860.csv
2334176.csv
252147.csv
652114.csv
320685.csv
8716.csv
3606266.csv
576282.csv
1232492.csv
113049.csv
43165.csv
3770842.csv
221191.csv
1491559.csv
1790137.csv
529161.csv
167975.csv
3126224.csv
78231.csv
1022191.csv
154038.csv
223497.csv
356414.csv
2625004.csv
1359335.csv
3081864.csv
2450365.csv
63747.csv
739770.csv
1566844.csv
50230.csv
439467.csv
181985.csv
2296293.csv
2520364.csv
2682932.csv
3302723.csv
51560453.csv
891378.csv
3236079.csv
1914666.csv
1593504.csv
152097.csv
720273.csv
419012.csv
51012206.csv
1234422.csv
994118.csv
1181499.csv
341837.csv
788959.csv
1394504.csv
3818460.cs