In [11]:
import pandas as pd
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('gdpr_fines.db')


query = "SELECT * FROM fines WHERE country = 'spain'"  
df_fines = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Display the DataFrame
df_fines.head()

Unnamed: 0,id,country,flag_url,authority,date,fine_eur,company,sectors,gdpr_articles,violation,summary,verdict_link,case_url
0,ETid-2737,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-23,60000,"AIRE NETWORKS DEL MEDITERRÃNEO, S.L.","Media, Telecoms and Broadcasting",Art. 5 (1) f) GDPR,Insufficient technical and organisational meas...,"The Spanish DPA imposed a fine of EUR 60,000 o...",https://www.aepd.es/documento/ps-00025-2025.pdf,https://www.enforcementtracker.com/ETid-2737
1,ETid-2736,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-21,200000,"TELEFÓNICA MÓVILES ESPAÑA, S.A.","Media, Telecoms and Broadcasting",Art. 6 (1) GDPR,Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00159-2024.pdf,https://www.enforcementtracker.com/ETid-2736
2,ETid-2735,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-20,360,RED ESPAÑOLA DE IDENTIFICACIÓN DE ANIMALES DE ...,Individuals and Private Associations,Art. 58 (1) GDPR,Insufficient cooperation with supervisory auth...,The Spanish DPA imposed a fine of EUR 360 on R...,https://www.aepd.es/documento/ps-00212-2025.pdf,https://www.enforcementtracker.com/ETid-2735
3,ETid-2734,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-19,200000,"ASNEF-EQUIFAX, SERVICIOS DE INFORMACIÓN SOBRE ...","Finance, Insurance and Consulting","Art. 6 (1) GDPR, Art. 17 GDPR",Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00157-2024.pdf,https://www.enforcementtracker.com/ETid-2734
4,ETid-2733,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-16,30000,ATRESMEDIA CORPORACIÓN DE MEDIOS DE COMUNICACI...,"Media, Telecoms and Broadcasting",Art. 5 (1) c) GDPR,Non-compliance with general data processing pr...,"The Spanish DPA imposed a fine of EUR 30,000 o...",https://www.aepd.es/documento/ps-00175-2024.pdf,https://www.enforcementtracker.com/ETid-2733


In [None]:
from openai import OpenAI

from pydantic import BaseModel

from dotenv import load_dotenv
import os

load_dotenv()

openai = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


class RevenueInfo(BaseModel):
    annual_revenue_in_euro: int


def get_revenue(row):
    try:
        company_name = row['company']
        sector = row['sectors']
        year = row['date'].split('-')[0]

        # Create a prompt for the AI to search for company revenue
        prompt = f"""
        Please search for the annual revenue of the company "{company_name}" in the {sector} sector for the year {year} or the most recent available data.
        If applicable, please use zoominfo.com as a datasource as they have comprehensive financial data.
        Please provide the annual revenue in Euros. If the revenue is reported in another currency, please convert it to Euros using historical exchange rates for that year.
        """

        # Call OpenAI with web search capabilities
        response = openai.responses.parse(
            model="gpt-4o-2024-08-06",
            tools=[{
                "type": "web_search_preview",
                "search_context_size": "low",
            }],
            input=[
                {"role": "system", "content": "You are a helpful assistant that can search the web for company financial information. Please provide accurate revenue data based on your search results."},
                {"role": "user", "content": prompt}
            ],
            text_format=RevenueInfo,
        )
        revenue_data = response.output_parsed.annual_revenue_in_euro
        return revenue_data

    except Exception as e:
        print(f"Error getting revenue for {row['company']}: {e}")
        return None


print(
    f"Company: {df_fines.iloc[0]['company']}, Revenue: {get_revenue(df_fines.iloc[0])}")

Company: AIRE NETWORKS DEL MEDITERRÃNEO, S.L., Revenue: 107746223


In [35]:
from pandarallel import pandarallel

pandarallel.initialize(progress_bar=True, nb_workers=16)
print("Getting revenue data for companies (this may take a while)...")
df_fines['revenue'] = df_fines.parallel_apply(get_revenue, axis=1)

INFO: Pandarallel will run on 16 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.
Getting revenue data for companies (this may take a while)...


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=62), Label(value='0 / 62'))), HBox…

In [None]:
df_fines.head()


Unnamed: 0,id,country,flag_url,authority,date,fine_eur,company,sectors,gdpr_articles,violation,summary,verdict_link,case_url,revenue
0,ETid-2737,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-23,60000,"AIRE NETWORKS DEL MEDITERRÃNEO, S.L.","Media, Telecoms and Broadcasting",Art. 5 (1) f) GDPR,Insufficient technical and organisational meas...,"The Spanish DPA imposed a fine of EUR 60,000 o...",https://www.aepd.es/documento/ps-00025-2025.pdf,https://www.enforcementtracker.com/ETid-2737,107800000
1,ETid-2736,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-21,200000,"TELEFÓNICA MÓVILES ESPAÑA, S.A.","Media, Telecoms and Broadcasting",Art. 6 (1) GDPR,Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00159-2024.pdf,https://www.enforcementtracker.com/ETid-2736,41315000000
2,ETid-2735,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-20,360,RED ESPAÑOLA DE IDENTIFICACIÓN DE ANIMALES DE ...,Individuals and Private Associations,Art. 58 (1) GDPR,Insufficient cooperation with supervisory auth...,The Spanish DPA imposed a fine of EUR 360 on R...,https://www.aepd.es/documento/ps-00212-2025.pdf,https://www.enforcementtracker.com/ETid-2735,0
3,ETid-2734,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-19,200000,"ASNEF-EQUIFAX, SERVICIOS DE INFORMACIÓN SOBRE ...","Finance, Insurance and Consulting","Art. 6 (1) GDPR, Art. 17 GDPR",Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00157-2024.pdf,https://www.enforcementtracker.com/ETid-2734,31085753
4,ETid-2733,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-16,30000,ATRESMEDIA CORPORACIÓN DE MEDIOS DE COMUNICACI...,"Media, Telecoms and Broadcasting",Art. 5 (1) c) GDPR,Non-compliance with general data processing pr...,"The Spanish DPA imposed a fine of EUR 30,000 o...",https://www.aepd.es/documento/ps-00175-2024.pdf,https://www.enforcementtracker.com/ETid-2733,918950000


In [37]:
df_fines['fine_eur'] = df_fines['fine_eur'].str.replace(',', '').astype(int)

In [38]:
df_fines.head()

Unnamed: 0,id,country,flag_url,authority,date,fine_eur,company,sectors,gdpr_articles,violation,summary,verdict_link,case_url,revenue
0,ETid-2737,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-23,60000,"AIRE NETWORKS DEL MEDITERRÃNEO, S.L.","Media, Telecoms and Broadcasting",Art. 5 (1) f) GDPR,Insufficient technical and organisational meas...,"The Spanish DPA imposed a fine of EUR 60,000 o...",https://www.aepd.es/documento/ps-00025-2025.pdf,https://www.enforcementtracker.com/ETid-2737,107800000
1,ETid-2736,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-21,200000,"TELEFÓNICA MÓVILES ESPAÑA, S.A.","Media, Telecoms and Broadcasting",Art. 6 (1) GDPR,Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00159-2024.pdf,https://www.enforcementtracker.com/ETid-2736,41315000000
2,ETid-2735,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-20,360,RED ESPAÑOLA DE IDENTIFICACIÓN DE ANIMALES DE ...,Individuals and Private Associations,Art. 58 (1) GDPR,Insufficient cooperation with supervisory auth...,The Spanish DPA imposed a fine of EUR 360 on R...,https://www.aepd.es/documento/ps-00212-2025.pdf,https://www.enforcementtracker.com/ETid-2735,0
3,ETid-2734,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-19,200000,"ASNEF-EQUIFAX, SERVICIOS DE INFORMACIÓN SOBRE ...","Finance, Insurance and Consulting","Art. 6 (1) GDPR, Art. 17 GDPR",Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00157-2024.pdf,https://www.enforcementtracker.com/ETid-2734,31085753
4,ETid-2733,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-16,30000,ATRESMEDIA CORPORACIÓN DE MEDIOS DE COMUNICACI...,"Media, Telecoms and Broadcasting",Art. 5 (1) c) GDPR,Non-compliance with general data processing pr...,"The Spanish DPA imposed a fine of EUR 30,000 o...",https://www.aepd.es/documento/ps-00175-2024.pdf,https://www.enforcementtracker.com/ETid-2733,918950000


In [39]:
# Create a connection to the new SQLite database
conn_new = sqlite3.connect('spain_gdpr_fines.db')

# Write the dataframe to the new database
df_fines.to_sql('fines', conn_new, if_exists='replace', index=False)

# Close the connection
conn_new.close()

In [40]:
import pandas as pd
import sqlite3

# Connect to the SQLite database
conn = sqlite3.connect('spain_gdpr_fines.db')


query = "SELECT * FROM fines"  
df_fines_spain = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Display the DataFrame
df_fines_spain.head()

Unnamed: 0,id,country,flag_url,authority,date,fine_eur,company,sectors,gdpr_articles,violation,summary,verdict_link,case_url,revenue
0,ETid-2737,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-23,60000,"AIRE NETWORKS DEL MEDITERRÃNEO, S.L.","Media, Telecoms and Broadcasting",Art. 5 (1) f) GDPR,Insufficient technical and organisational meas...,"The Spanish DPA imposed a fine of EUR 60,000 o...",https://www.aepd.es/documento/ps-00025-2025.pdf,https://www.enforcementtracker.com/ETid-2737,107800000
1,ETid-2736,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-21,200000,"TELEFÓNICA MÓVILES ESPAÑA, S.A.","Media, Telecoms and Broadcasting",Art. 6 (1) GDPR,Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00159-2024.pdf,https://www.enforcementtracker.com/ETid-2736,41315000000
2,ETid-2735,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-20,360,RED ESPAÑOLA DE IDENTIFICACIÓN DE ANIMALES DE ...,Individuals and Private Associations,Art. 58 (1) GDPR,Insufficient cooperation with supervisory auth...,The Spanish DPA imposed a fine of EUR 360 on R...,https://www.aepd.es/documento/ps-00212-2025.pdf,https://www.enforcementtracker.com/ETid-2735,0
3,ETid-2734,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-19,200000,"ASNEF-EQUIFAX, SERVICIOS DE INFORMACIÓN SOBRE ...","Finance, Insurance and Consulting","Art. 6 (1) GDPR, Art. 17 GDPR",Insufficient legal basis for data processing,"The Spanish DPA imposed a fine of EUR 200,000 ...",https://www.aepd.es/documento/ps-00157-2024.pdf,https://www.enforcementtracker.com/ETid-2734,31085753
4,ETid-2733,spain,./flags/flag_spain.png,Spanish Data Protection Authority (aepd),2025-05-16,30000,ATRESMEDIA CORPORACIÓN DE MEDIOS DE COMUNICACI...,"Media, Telecoms and Broadcasting",Art. 5 (1) c) GDPR,Non-compliance with general data processing pr...,"The Spanish DPA imposed a fine of EUR 30,000 o...",https://www.aepd.es/documento/ps-00175-2024.pdf,https://www.enforcementtracker.com/ETid-2733,918950000
