In [1]:
import spacy
import pandas as pd
import sqlite3


In [2]:
def run_query(database_path, query):
    # Connect to the SQLite database
    # If the database does not exist, it will be created
    connection = sqlite3.connect(database_path)
    
    try:
        # Create a cursor object using the connection
        cursor = connection.cursor()
        
        # Execute the SQL query
        cursor.execute(query)
        
        # Fetch all results from the executed query
        results = cursor.fetchall()
        
        # Turning list of tuples to list of strings for further processing
        return [str(t[0]) for t in results]

            
    except sqlite3.Error as e:
        print(f"An error occurred: {e}")
        
    finally:
        
        # Close the connection to the database
        if connection:
            connection.close()


In [7]:


# Load the German transformer model
de_dep_nlp = spacy.load('de_core_news_lg')

database_path = '../openregister.db'  # Path to your SQLite database file
query = 'SELECT name FROM company LIMIT 250;'  # Replace 'your_table' with your actual table name

company_names = run_query(database_path, query)

results = []
for company_name in company_names:
    # Create a spaCy document for the company name
    doc = de_dep_nlp(company_name)

    # Iterate over each token and perform NER
    for token in doc:
        # Store each token's text and entity type in a dictionary
        results.append({
            'Company Name': company_name,
            'Token': token.text,
            'Entity': token.ent_type_ if token.ent_type_ else 'No entity'
        })

# Convert results to a pandas DataFrame for better structure and analysis
df_results = pd.DataFrame(results)

# Display the DataFrame
print(df_results)

# Optionally, save the DataFrame to a CSV file
#df_results.to_csv('ner_results.csv', index=False)

KeyboardInterrupt: 