# A Correspondence Contextualised
This is a script for enriching existing data about keywords or person information with information from [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page).

In [59]:
import pandas as pd
from json import JSONDecodeError
from qwikidata.sparql  import return_sparql_query_results

In [60]:
df = pd.read_excel('../data/cds_sheets/Kopie_von_20220816_sachindex_cds.xlsx')

# Cells that include entries with more than one value and that are seperated by a backslash are split into separate cells.
df_merged = (df.set_index(df.columns.drop('Deutsch',1).tolist()).Deutsch.str.split('/', expand=True).stack().reset_index().rename(columns={0:'Deutsch'}).loc[:, df.columns])
df_merged.to_csv('../data/retrieved/sachindex_singular_values.csv')



In [75]:
def enrich_data():
    index = 0
    for string in df_merged['Deutsch']:
        try:
            query = f'' \
                    f'SELECT ?item ?label_fr ' \
                    f'  WHERE {{' \
                    f'      ?item rdfs:label "{string}"@de. ' \
                    f'      ?item rdfs:label ?label_fr filter (lang(?label_fr) = "fr").' \
                    f'  }}'
            res = return_sparql_query_results(query)
            df_merged['Wikidata'][index] = [item['item']['value'] for item in res['results']['bindings']]
            df_merged['Französisch'][index] = [item['label_fr']['value'] for item in res['results']['bindings']]

            index += 1
        except JSONDecodeError:
            index += 1
            continue

    df_merged.to_csv('../data/retrieved/sachindex_additional_data.csv')

In [None]:
enrich_data()