In [None]:
import psycopg2
import os
from dotenv import load_dotenv
import pandas as pd
from matplotlib.pyplot import subplots
from bs4 import BeautifulSoup

load_dotenv()

database_password = os.environ.get("DATABASE_PASSWORD")
database_username = os.environ.get("DATABASE_USERNAME")
database_host = os.environ.get("DATABASE_HOST")
database_port = os.environ.get("DATABASE_PORT")
database_name = os.environ.get("DATABASE_NAME")

connection = psycopg2.connect(database=database_name,
                        host=database_host,
                        user=database_username,
                        password=database_password,
                        port=database_port)

In [None]:
def check_and_create_table(conn, table_name: str, create_table_sql: str) -> int:
    try:
        cursor = conn.cursor()
        cursor.execute("SELECT EXISTS(SELECT * FROM information_schema.tables WHERE table_name=%s);", (table_name,))
        
        exists = cursor.fetchone()[0]

        if not exists:
            cursor.execute(create_table_sql)
            conn.commit()
            print(f"Table {table_name} created successfully")
        else:
            print(f"Table {table_name} already exists")

        cursor.close()
        return 0

    except (Exception, psycopg2.DatabaseError) as error:
        print(f"Error: {error}”")
        cursor.close()
        return 1

In [None]:
create_scrapes_extracted_sql = """
CREATE TABLE scrapes_extracted (scrapeid INT PRIMARY KEY)"""

check_and_create_table(conn=connection, table_name="scrapes_extracted", create_table_sql=create_scrapes_extracted_sql)

In [None]:
# Add scrapes_extracted columns
cursor = connection.cursor()
try:
    cursor.execute("""ALTER TABLE scrapes_extracted
                   ADD COLUMN IF NOT EXISTS title TEXT;              
""")  
except (Exception, psycopg2.DatabaseError) as error:
    print(f"Error: {error}”") 
cursor.close()
connection.commit()

In [None]:
cursor = connection.cursor()
try:
    cursor.execute("""SELECT scrapes.scrapeid, scrapes.html
FROM scrapes
LEFT JOIN scrapes_extracted on scrapes.scrapeid = scrapes_extracted.scrapeid
WHERE scrapes_extracted.scrapeid IS NULL
AND scrapes.scrapeid > 5
""")
except (Exception, psycopg2.DatabaseError) as error:
    print(f"Error: {error}”")
    cursor.close()

for row in cursor:
    soup = BeautifulSoup(markup=row[1], features="lxml")
    job_title = soup.find("h1")
    if job_title:
        job_title_text = job_title.text.strip()
        print(job_title_text)
        print(int(row[0]))
        inner_cursor = connection.cursor()
        try:
            inner_cursor.execute("""INSERT INTO scrapes_extracted (scrapeid, title)
                           VALUES (%s, %s)""", (int(row[0]), job_title_text))
        except (Exception, psycopg2.DatabaseError) as error:
            print(f"Error: {error}”")

        inner_cursor.close()
    else:
        print(f"Job title not found for scrapeid {row[0]}")

cursor.close()
connection.commit()