# This Notebook contains the python code that performs operations on database

## Import all necessary libraries

In [46]:
# import data manipulation libraries use in data science and analytics
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
import itertools

# import psycopg library to talk to the database
import psycopg2

# import pydaantic for type checking and insuring typesafety 
from pydantic import (BaseModel, EmailStr, Field, SecretStr, ValidationError)
from typing import (Optional, Any, Dict, List)

# import sqlalchemy to manage database connections and carry out db operations
from sqlalchemy import (create_engine, inspect)
from sqlalchemy.engine import Engine

# importing python-decouple to help us access the secrets from a .env file
from decouple import (config, Config, RepositoryEnv)
# import path lib to set path for the env file
from pathlib import Path

## Create database connection model and function to validate the database parameters. 

In [47]:
# A pydantic model for database connection
# The reason I am using frozen in the fields in this model is because I don't want the fields to change after its setup for the first time
class DbConnManagerModel (BaseModel):
    host : str = Field(default=None, examples=["localhost"], frozen=True)
    database : str = Field(default=None, examples=["db_name"], frozen=True)
    user : str = Field(default=None, examples=["username"], frozen=True)
    password : SecretStr = Field(default=None, examples=["your_password"], frozen=True)
    port : int = Field(default=None, examples=[5432], frozen=True)           

## Tell jupyter notebook where to look for .env file

### **NOTICE :** 
If your os platform is windows and you have created an .env file using file explorer like what I used to do in linux then windows has a tendency adding ```.text``` after the env file so your env file becomes like this ```.env.txt```. Beware of this when trying to use windows file exporer to create an env file. Use Vs code or jupyter notebook instead to create a ```.env``` file. <br>
I will have to look into it a bit more no biggy.

In [48]:
env_path = Path("D:/training/credencys-training-pyspark/python/database_operation/.env")
config = Config(RepositoryEnv(env_path))

### Explaination : 
#### RepositoryEnv:
- This comes from the ```python-decouple``` library.
- This library knows how to read a ```.env``` file.
- It parses through the ```.env``` file and makes the key-value pair available.
#### Config:
- This ```Config``` library comes from ```python-decouple``` library.
- ```Config``` is a wrapper around different configuration sources like (```.env``` files)
- When I created ```Config(RepositoryEnv(env_path))``` I am telling it to load configuration values from the given ```.env``` file (via ```RepositoryEnv```)

## Create a database connection using sql alchemy and create a connection object

In [64]:
# get the database connection prameters from the .env file and create a dictionary out of it
db_connection_params = {
    "host":config('host'),
    "database":config('database'),
    "user":config('user'),
    "password":config('password'),
    "port":config('port'),
}

# create a function to manage database connection
def database_engine(model_config:DbConnManagerModel) -> Engine:
    try:
        # validate connection parameters before creating a database connection object
        DBConnObj = DbConnManagerModel.model_validate(db_connection_params)
        print(f"DBConnObj : {DBConnObj}")
    
        # create a database engine
        engine = create_engine(
            f"postgresql+psycopg2://{DBConnObj.user}:{DBConnObj.password}@{DBConnObj.host}:{DBConnObj.port}/{DBConnObj.database}"
        )
        return engine
    except Exception as e:
        print(e)

database_engine(db_connection_params)

DBConnObj : host='localhost' database='training' user='postgres' password=SecretStr('**********') port=5432


Engine(postgresql+psycopg2://postgres:***@localhost:5432/training)

## Read a Table from the database table

In [50]:
# Created a pydantic model.
# This model is used to validate the parameters required to read the data from a table from the database
class ReadDbTableModel(BaseModel):
    table_name:str=Field(default=None)
    columns:List[str]=Field(default=[])
    engine:Engine
    model_config = {
        "arbitrary_types_allowed":True
    }

# Created a function to read the data from the table in the database
# This function will return a padnas dataFrame as a result
def read_db_table(config:ReadDbTableModel) -> pd.DataFrame:
    if not config.table_name:
        raise ValidationError("Table name is required!")
    try:
        # If column parameter is not found then return all the columns
        if not len(config.columns):
            df = pd.read_sql_table(config.table_name, con=config.engine)
        # If columns are specified by the user then return only those specified columns
        else:
            df = pd.read_sql_table(config.table_name,con=config.engine,columns=config.columns)
        return df
    except Exception as e:
        raise RuntimeError(f"Failed to read the table {config.table_name} : {e}")

In [51]:
config_read_customers_table = ReadDbTableModel(table_name="customers", engine=engine_obj)
customers_df = read_db_table(config_read_customers_table)
customers_df

NameError: name 'engine_obj' is not defined

In [3]:
conn = psycopg2.connect(
    host="localhost",
    database="training",
    user="postgres",
    password="1212",
    port=5432
)
conn


    

# Open a cursor to perform database operations
cur = conn.cursor()
# Execute a command: create datacamp_courses table
cur.execute("""CREATE TABLE datacamp_courses(
            course_id SERIAL PRIMARY KEY,
            course_name VARCHAR (50) UNIQUE NOT NULL,
            course_instructor VARCHAR (100) NOT NULL,
            topic VARCHAR (20) NOT NULL);
            """)
# Make the changes to the database persistent
conn.commit()
# Close cursor and communication with the database
cur.close()
conn.close()