In [0]:
# importing required libraries
import requests,json 
from pyspark.sql import SparkSession

In [0]:
# defining the Spark Session named Google_JobETL_Bronze
app_name = "Google_JobETL_Bronze"
spark = SparkSession.builder.appName(app_name).getOrCreate()

In [0]:
# checking spark Session name (appname)
print('App Name :',app_name)

In [0]:
# Checking all the catalogs in unity-catalog
display(spark.sql('SHOW CATALOGS'))

In [0]:
display( spark.sql('SHOW SCHEMAS') )

In [0]:
# Checking the current unity-catalog and creating catalog if not present and using it
spark.sql(' CREATE CATALOG IF NOT EXISTS job_marketplace ')
spark.sql(' USE CATALOG job_marketplace ')

In [0]:
# displaying default present schemas in the job_marketplace catalog
display( spark.sql('SHOW SCHEMAS') )

In [0]:
# checking and creating the bronze_layer schema in the job_marketplace catalog
spark.sql('CREATE SCHEMA IF NOT EXISTS job_marketplace.bronze_layer')

In [0]:
# Checking if the bronze layer schema is present in the job_marketplace catalog
display( spark.sql('SHOW SCHEMAS') )

In [0]:
display( spark.sql('SHOW TABLES') )

In [0]:
%sql
-- table for securing api key
CREATE TABLE IF NOT EXISTS secure_config (
  key STRING,
  value STRING
);


In [0]:
# defining the API key
api_key = (
    spark.table("secure_config")
    .filter("key = 'openai_api'")
    .select("value")
    .first()[0]
)
# Defining the roles and location parameters to get from the google jobs API
roles = ["Data Engineer", "Python Developer", "ETL Developer", "Spark Engineer", "Data Analyst"]
location = "India"
# Defining the all_jobs lists to store all the jobs search results from the Google Jobs API
all_jobs = []

In [0]:
# Running loop over all the job roles
for role in roles:
    params={
        "engine":"google_jobs",
        "q":role,
        "location":location,
        "api_key":api_key
    }
    # making a request to the google jobs API for each job
    res = requests.get("https://serpapi.com/search.json", params=params)
    print(f"✅Reading 🔴LIVE Data from Google Jobs API for {role} role")
    # storing the jobs search results in the all_jobs list
    jobs = res.json().get("jobs_results", [])
    # appending the searched_role name to each job
    for job in jobs:
        job["search_role"] = role
    print(f"Appended {role} role Data to all_jobs list")
    all_jobs.extend(jobs)

In [0]:
# checking the all_jobs list with the all the jobs
print(all_jobs)

In [0]:
# Writing the data to the bronze layer tables
bronze_df = spark.createDataFrame(all_jobs)

bronze_df.write.format("delta")\
                .option("mergeSchema", "true")\
                .mode("overwrite")\
                .saveAsTable("job_marketplace.bronze_layer.daily_jobs")

bronze_df.write.format("delta")\
                .option("mergeSchema", "true")\
                .mode("append")\
                .saveAsTable("job_marketplace.bronze_layer.raw_jobs")

print("✅Data Written to Bronze Layer")

In [0]:
%sql
-- checking if the data loaded to the bronze table
select * from job_marketplace.bronze_layer.raw_jobs;

In [0]:
%sql
select * from job_marketplace.bronze_layer.daily_jobs

In [0]:
%sql 
select * from job_marketplace.bronze_layer.raw_jobs limit 1;