Using python to load and manipulate POSTGRES data

In [2]:
# Boiled plate code - (You could delete or comment out the packages which are not being used)

import sys 
import json
import csv
import yaml

import pandas as pd
import numpy as np

import matplotlib as mpl

In [3]:
# Install psycopg2 (PostgreSQL database adapter for the Python programming language)

!pip install psycopg2



In [4]:
# Importing the package

import psycopg2

In [5]:
# Setting up Postgres connection. Note database name is "discussion2".
# https://earthly.dev/blog/psycopg2-postgres-python/

In [6]:
# Save database configuration to an INI file

ini_content = """
[postgresql]
database=discussion2
user=postgres
password=postgres
host=localhost
port=5432
"""

with open('database.ini', 'w') as f:
    f.write(ini_content.strip())

print("Configuration file created successfully.")

Configuration file created successfully.


In [7]:
from configparser import ConfigParser

In [8]:
# Select the database.ini file 

def config(filename='database.ini', section='postgresql'):
    # Create a parser
    parser = ConfigParser()
    # Read config file
    parser.read(filename)

    # Get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception(f'Section {section} not found in the {filename} file')

    return db

In [9]:
db_conn = None  # Define db_conn at the beginning

In [10]:
try:
    # Read connection parameters
    params = config()
    print(config())
    

    # Connect to the PostgreSQL database
    db_conn = psycopg2.connect(**params)

    print("Successfully connected to the database.")

    # Create a cursor object
    cur = db_conn.cursor()
        
    # Example query (Modify as needed)
    q1 = '''
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema='company'
          AND table_type='BASE TABLE';
    '''
    cur.execute(q1)
    
except Exception as e:
    error = e

{'database': 'discussion2-new', 'user': 'postgres', 'password': 'postgres', 'host': 'localhost', 'port': '5432'}
Successfully connected to the database.


In [11]:
print(cur.fetchmany(10))

[('dept_locations',), ('project',), ('department',), ('dependent',), ('employee',), ('works_on',), ('employee_feedback',)]


In [12]:
q2 = '''set search_path to company'''
cur.execute(q2)
db_conn.commit()

In [13]:
q3 = '''
SELECT *
FROM department
'''
cur.execute(q3)
print(cur.fetchmany(20))

[('Research', 5, '333445555', '5/22/88'), ('Administration', 4, '987654321', '1/1/95'), ('Headquarters', 1, '888665555', '6/19/81')]


In [14]:
# goal is to get employees whose first names start with a specified initial
#   the WHERE from the query will look like: WHERE fname LIKE 'J%'  (where J will be a parameter)

q4start = """
SELECT *
FROM employee
WHERE fname LIKE '""" 

q4end = """%'"""

finit = 'J'

q4 = q4start + finit + q4end

print(q4)

cur.execute(q4)

print(cur.fetchmany(20))

# COMMITING THE TRANSACTION
db_conn.commit()


SELECT *
FROM employee
WHERE fname LIKE 'J%'
[('John', 'B', 'Smith', '123456789', '1/9/65', '731-Fondren-Houston-TX', 'M', 30000, '333445555', 5), ('Jennifer', 'S', 'Wallace', '987654321', '6/20/41', '291-Berry-Bellaire-TX', 'F', 43000, '888665555', 4), ('Joyce', 'A', 'English', '453453453', '7/31/72', '5631-Rice-Houston-TX', 'F', 25000, '333445555', 5), ('James', 'E', 'Borg', '888665555', '11/10/37', '450-Stone-Houston-TX', 'M', 55000, None, 1)]


<span style="color:blue">
.commit() is used to revert (or undo) all the changes made in the current transaction. It is an essential feature for maintaining data integrity and consistency in database operations
</span>

In [15]:
import pprint
pprint.pp(cur.fetchmany(20))

[]


<span style=color:blue>Why is the above command giving us empty?  It is because the fetchone() and fetchmany() cycle through the answer and then finish.  To get the full answer we have to execute the query again</span>

### Updates and constraints

In [16]:
# IMPORTANT! Only if you have constraint errors!!!!

q1000 = '''
    ALTER TABLE works_on DROP CONSTRAINT fk_employee;
'''

q1001 = '''
    ALTER TABLE employee DROP CONSTRAINT unique_ssn;
'''

cur.execute(q1000)
cur.execute(q1001)
db_conn.commit()

In [None]:

q4 = '''
ALTER TABLE works_on
ADD CONSTRAINT fk_employee
FOREIGN KEY (essn)
REFERENCES employee(ssn);
'''
cur.execute(q4)

InvalidForeignKey, indicates that the foreign key constraint you are trying to add on the works_on table cannot be enforced because the referenced column in the employee table (ssn) does not have a unique constraint. A foreign key must reference a primary key or a unique column in the referenced table.

In [None]:
#Rolling back the transaction
db_conn.rollback()

# Check if the ssn column in employee has only unique values
q5='''
SELECT ssn, COUNT(*) FROM employee GROUP BY ssn HAVING COUNT(*) > 1;
'''
cur.execute(q5)
print(cur.fetchall())

In [None]:
q5 = '''
ALTER TABLE employee
ADD CONSTRAINT unique_ssn UNIQUE (ssn);
'''

cur.execute(q5)
print('Column unique_ssn updated to only values')

<span style="color:blue;"> Now that we have set column ssn to be unique, we can go ahead and create a foreign key in works_on for column essn which references ssn in the table employee</span>

In [None]:
# Setting a foreign key on essn (works_on) which references ssn (employee)

q6 = '''
ALTER TABLE works_on
ADD CONSTRAINT fk_employee
FOREIGN KEY (essn)
REFERENCES employee(ssn);
'''
cur.execute(q6)
print('Foreign Key Created')

# Let's commit this 
db_conn.commit()

<span style="color:blue;">Let's try to add an employee which does not exist to the table works_on and see what happens</span>

In [None]:
# Inserting into works_on and employee with ssn 999999999

q7 = '''
INSERT INTO works_on (essn, pno, hours)
VALUES ('999999999', 101, 30);
'''

cur.execute(q7)

<span style="color:blue;">Employee with ssn 999999999 does not exist hence when we try to add that employee to the works_on table, we get a violation</span>

In [None]:
#Rolling back the transaction
db_conn.rollback()

# Let's add an employee to wokrs_on which does exist
q8 = '''
    INSERT INTO works_on (essn, pno, hours) 
    VALUES ('123456789', 10, 20);
'''

cur.execute(q8);
print('Employee added to works_on')


In [None]:
# Checking if the previous operation was successful
q9 = '''
    SELECT * FROM works_on;
'''

cur.execute(q9)
cur.fetchall()

In [None]:
db_conn.commit()

# Deleting an employee now that it is referenced

q10 = '''
    DELETE FROM employee WHERE ssn='123456789';
'''

cur.execute(q10);

<span style="color:blue;">Creating a table with constraints and keys</span>

In [None]:
db_conn.rollback()

#Let's create a table named employee_benefits which will record benefits each employee receives. The table will include:
# A unique benefit ID as the primary key.
# The SSN of the employee, which references the ssn column in the employee table (assuming ssn is either a primary key or at least a unique key in employee).
# Columns for the type of benefit and the benefit value.

q10 = '''
    CREATE TABLE employee_benefits (
    benefit_id SERIAL PRIMARY KEY,
    employee_ssn CHAR(9),
    benefit_type VARCHAR(100),
    benefit_value VARCHAR(100),
    FOREIGN KEY (employee_ssn)
    REFERENCES employee(ssn)
    ON DELETE CASCADE
);
'''

cur.execute(q10)
print('Table created')

In [None]:
# Insert some values into the table 

q11 = '''
    INSERT INTO employee_benefits (employee_ssn, benefit_type, benefit_value)
    VALUES
    ('123456789', 'Health Insurance', 'Full Coverage'),
    ('987654321', 'Stock Options', '500 Shares');
'''

cur.execute(q11)

In [None]:
# What if we try adding an employee that does not exist?
# Remember the foreign key constraints

q12 = '''
    INSERT INTO employee_benefits (employee_ssn, benefit_type, benefit_value)
    VALUES
    ('000000000', 'Health Insurance', 'Full Coverage');
'''

cur.execute(q12)

In [None]:
# Check if an error occurred and handle it
if 'error' in locals():
    print("An error occurred:", error)

In [None]:
db_conn.close()

<span style=color:blue>It is a good practice to "close" your connection to a database before exiting</span>