# DETAILS

**Project Name:** Visa Jobs NL

**Description:** Notebook to collect, process, and analyze job listings related to visa sponsorship opportunities in the Netherlands.

**Author(s):**  
+ Daniel Willians <daniel.wis@outlook.com>


**Start Date (YYYY-MM-DD):** 2025-05-27

**Version:** 1.0.0

In [0]:
%pip install lxml beautifulsoup4 


In [0]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from pyspark.sql import SparkSession
from pyspark.sql.functions import current_date


spark = SparkSession.builder.getOrCreate()

def get_ind_data():
    url = "https://ind.nl/en/public-register-recognised-sponsors/public-register-regular-labour-and-highly-skilled-migrants"
    response = requests.get(url)

    if "iframe" in response.text:
        soup = BeautifulSoup(response.text, "html.parser")
        iframe_url = soup.find("iframe")["src"]
        iframe_response = requests.get(iframe_url)
        tables = pd.read_html(iframe_response.text)
    else:
        tables = pd.read_html(response.text)
    
    df = tables[0]
    return df

pdf = get_ind_data()

df = spark.createDataFrame(pdf)

df = df.withColumn("load_date", current_date())

bronze_path = "/mnt/netherlands-tracker/bronze/company_skilled_migrants"

(df.write
   .mode("overwrite")
   .partitionBy("load_date")
   .parquet(bronze_path)
)

print("✅ Dados salvos em bronze/company_skilled_migrants com partição load_date")

In [0]:
df = spark.read.parquet(bronze_path)
display(df)