In [1]:
#Install GRPC to connect to remote Spark Session
%pip install grpcio grpcio-status


Note: you may need to restart the kernel to use updated packages.


In [2]:
# Check if GRPC is installed
import grpc
from grpc_status import rpc_status
print(f"gRPC version: {grpc.__version__}")

gRPC version: 1.78.0


In [None]:
# Check connection to remote Spark Session
from pyspark.sql import SparkSession
import sys

IP = 

# 1. Initialize the Spark Connect Session
try:
    spark = SparkSession.builder \
        .remote(f"sc://{IP}:15002") \
        .getOrCreate()

    print("‚úÖ Successfully connected to Remote Spark!")
    
    # 2. Run a small test job
    print("Running test calculation...")
    df = spark.range(10).toDF("number")
    avg = df.agg({"number": "avg"}).collect()[0][0]
    
    print(f"üìä Test Result: The average of 0-9 is {avg}")
    print(f"Spark Version: {spark.version}")

except Exception as e:
    print("‚ùå Connection Failed!")
    print(f"Error details: {e}")


In [None]:
#
#PostgreSQL Port Connection Test
#
#If the below script fails, it is most likely because the containers were
# started manually separately.
#
#Execute the following commands in the terminal to get them in the same network:
#docker network create my-data-network
#docker network connect my-data-network pyspark-notebook
#docker network connect my-data-network Postgres
#
#After those commands, the two containers will be in the same network.

In [None]:
import socket

# Replace with your actual Postgres container name (from `docker ps`)
# Example: 'db', 'postgres', 'my-postgres-container'
db_host = "Postgres" 
port = 5432

try:
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(2) # 2 second timeout
    s.connect((db_host, port))
    print(f"‚úÖ Success! Jupyter can reach {db_host} on port {port}.")
    s.close()
except Exception as e:
    print(f"‚ùå Failed to connect: {e}")
    print("Tip: Ensure both containers are on the same Docker network.")

In [5]:
%pip install sqlalchemy psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl.metadata (4.9 kB)
Downloading psycopg2_binary-2.9.11-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl (4.4 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m4.4/4.4 MB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.11
Note: you may need to restart the kernel to use updated packages.


In [None]:
#
#Connection test to laAPI database and create sample table.
#

In [7]:
from sqlalchemy import create_engine, text

# 1. Configuration
db_user = "myuser"
db_pass = "abc"
db_host = "Postgres" # Replace with your actual container name
db_port = "5432"
db_name = "laAPI"  # The specific database

# 2. Create the Connection Engine
# Format: postgresql://user:password@host:port/database
connection_str = f"postgresql://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}"
engine = create_engine(connection_str)

# 3. Create the Table
create_table_sql = """
CREATE TABLE IF NOT EXISTS sample_table (
    id SERIAL PRIMARY KEY,
    name VARCHAR(50),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""

# 4. Execute
try:
    with engine.connect() as connection:
        connection.execute(text(create_table_sql))
        connection.commit() # Important: Commit the change!
        print(f"‚úÖ Table 'sample_table' created successfully in '{db_name}' as user '{db_user}'.")
        
        # Verify it exists
        result = connection.execute(text("SELECT table_name FROM information_schema.tables WHERE table_name = 'sample_table';"))
        if result.fetchone():
            print("   (Verification passed: Table was found in the schema.)")

except Exception as e:
    print(f"‚ùå Failed to create table: {e}")
    print("Tip: Ensure 'myUser' has CREATE privileges on the public schema.")

‚úÖ Table 'sample_table' created successfully in 'laAPI' as user 'myuser'.
   (Verification passed: Table was found in the schema.)
