In [1]:
!pip install psycopg2



In [15]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.0


In [16]:
import psycopg2 # library for working with Postgres database
from dotenv import load_dotenv # library for working with environment variables
import os
import csv
import io

In [3]:
load_dotenv()  # Load environment variables from .env file

True

In [4]:
# Read connection paramenters from environment variables (BEST PRACTICE!)
host = os.environ.get('DB_HOST')
dbname = os.environ.get('DB_NAME')
user = os.environ.get('DB_USER')
password = os.environ.get('DB_PASS')

In [5]:
# Other parameters
path = os.environ.get('FILE_PATH')
# tbl_airport = 'airports'
# tbl_flight = 'flights'
tbl_sales = 'Sales_2015'
tbl_products = 'Products'
tbl_territories = 'Territories'
tbl_customers = 'Customers'
tbl_product_subcategories = 'Product_Subcategories'


In [6]:
print(f"host={host}") # Testing if env variables were successfully loaded

host=localhost


In [7]:
print(f"host={dbname}") # Testing if env variables were successfully loaded

host=raw_inventory


In [6]:
full_path = f'{path}/{tbl_sales}.csv' # Testing if env variables were successfully loaded
print(full_path)

C:/Users/angel/Desktop/DE PROJECTS/Professional-Portfolio/Building Data Model and Writing ETL Job/data/Sales_2015.csv


In [27]:
# Start connection to Postgres Database 
# Using WITH (context manager) to ensure the connection is properly closed (BEST PRACTICE!), 
try:
    with psycopg2.connect(f"host={host} dbname={dbname} user={user} password={password}") as conn:
        # Create a cursor object
        with conn.cursor() as cursor:
            # Set the datestyle configuration
            cursor.execute("SET datestyle = 'ISO, MDY'")

            # Create tables with primary & foreign keys
            # create_airport_table = "CREATE TABLE IF NOT EXISTS territories (airport_id INT, city VARCHAR, state VARCHAR, name VARCHAR)"
            # create_flights_table = "CREATE TABLE IF NOT EXISTS flights (DayofMonth INT, DayOfWeek INT, Carrier VARCHAR, OriginAirportID INT, DestAirportID INT, DepDelay INT, ArrDelay INT)"
            
            create_territory_table = f"CREATE TABLE IF NOT EXISTS {tbl_territories} (SalesTerritoryKey INT PRIMARY KEY, Region VARCHAR, Country VARCHAR, Continent VARCHAR)"
            create_customers_table = f"CREATE TABLE IF NOT EXISTS {tbl_customers} (CustomerKey INT PRIMARY KEY, Prefix VARCHAR, FirstName VARCHAR, LastName VARCHAR, BirthDate DATE, MaritalStatus CHAR(3), Gender CHAR(3), EmailAddress VARCHAR, AnnualIncome VARCHAR, TotalChildren INT, EducationLevel VARCHAR, Occupation VARCHAR, HomeOwner CHAR(3))"
            create_product_sub_table = f"CREATE TABLE IF NOT EXISTS {tbl_product_subcategories} (ProductSubcategoryKey INT PRIMARY KEY, SubcategoryName VARCHAR, ProductCategoryKey INT)"
            
            create_products_table = f"CREATE TABLE IF NOT EXISTS {tbl_products} (ProductKey INT PRIMARY KEY, ProductSubcategoryKey INT REFERENCES Product_Subcategories(ProductSubcategoryKey), ProductSKU VARCHAR, ProductName VARCHAR, ModelName VARCHAR, ProductDescription VARCHAR, ProductColor VARCHAR, ProductSize VARCHAR, ProductStyle VARCHAR, ProductCost NUMERIC(10,2), ProductPrice NUMERIC(10,2))"
            create_sales_table = f"CREATE TABLE IF NOT EXISTS {tbl_sales} (OrderDate DATE, StockDate DATE, OrderNumber VARCHAR PRIMARY KEY, ProductKey INT REFERENCES Products(ProductKey), CustomerKey INT REFERENCES Customers(CustomerKey), TerritoryKey INT REFERENCES Territories(SalesTerritoryKey), OrderLineItem INT, OrderQuantity INT)"
            
            # cursor.execute(create_airport_table)
            # cursor.execute(create_flights_table)
            cursor.execute(create_territory_table)
            cursor.execute(create_customers_table)
            cursor.execute(create_product_sub_table)
            cursor.execute(create_products_table)
            cursor.execute(create_sales_table)

            # Read airports CSV file and load data into airports table
            # with open(f'{path}/{tbl_airport}.csv', 'r') as file:
            #     next(file) #skip first row
            #     cursor.execute(f"TRUNCATE TABLE {tbl_airport}")
            #     cursor.copy_from(file, tbl_airport, sep=',' )

            # # Read flights CSV file and load data into flights table
            # with open(f'{path}/{tbl_flight}.csv', 'r') as file:
            #     next(file) #skip first row
            #     cursor.execute(f"TRUNCATE TABLE {tbl_flight}")
            #     cursor.copy_from(file, tbl_flight, sep=',' )
            

            # Reading data from CSV files and loading then into tables

            ### TERRITORY TABLE
            with open(f'{path}/{tbl_territories}.csv', 'r') as file:
                next(file) #skip first row
                # cursor.execute(f"TRUNCATE TABLE {tbl_territories}")
                cursor.copy_from(file, tbl_territories.lower() , sep=',' )
            
            ### PRODUCT_SUBCATEGORIES TABLE
            with open(f'{path}/{tbl_product_subcategories}.csv', 'r') as file:  #ok
                next(file) #skip first row
                # cursor.execute(f"TRUNCATE TABLE {tbl_product_subcategories}")
                cursor.copy_from(file, tbl_product_subcategories.lower(), sep=',' )

            ### PRODUCTS TABLEt
            # products.csv containes fields enclosed in quotation marks ("") because there are commas (,) inside the field that are not delimiters.
            # that's why we use .copy_expert() funtion
            with open(f'{path}/{tbl_products}.csv', 'r') as file:  # ok
                file_content = file.read()
                # Create a file-like object from the string content
                file_obj = io.StringIO(file_content)
                # Load the CSV data into the PostgreSQL table using copy_expert()
                cursor.copy_expert("COPY products FROM STDIN WITH (FORMAT CSV, DELIMITER ',', HEADER TRUE, QUOTE '\"', ESCAPE '\"')", file_obj)

            ### CUSTOMERS TABLE
            # customers.csv containes fields enclosed in quotation marks ("") because there are commas (,) inside the field that are not delimiters.
            # that's why we use .copy_expert() funtion
            with open(f'{path}/{tbl_customers}.csv', 'r', encoding='iso-8859-1') as file:  # ok
                file_content = file.read()
                # Create a file-like object from the string content
                file_obj = io.StringIO(file_content)
                # Load the CSV data into the PostgreSQL table using copy_expert()
                cursor.copy_expert("COPY customers FROM STDIN WITH (FORMAT CSV, DELIMITER ',', HEADER TRUE, QUOTE '\"', ESCAPE '\"')", file_obj)

            ### SALES_2015 TABLE
            # sales.csv containes fields enclosed in quotation marks ("") because there are commas (,) inside the field that are not delimiters.
            # that's why we use .copy_expert() funtion
            with open(f'{path}/{tbl_sales}.csv', 'r') as file:  
                file_content = file.read()
                # Create a file-like object from the string content
                file_obj = io.StringIO(file_content)
                # Load the CSV data into the PostgreSQL table using copy_expert()
                cursor.copy_expert("COPY sales_2015 FROM STDIN WITH (FORMAT CSV, DELIMITER ',', HEADER TRUE, QUOTE '\"', ESCAPE '\"')", file_obj)

    conn.commit()
except psycopg2.OperationalError as e:
    print("Could not start a connection to the database")
    print(e)
except psycopg2.DatabaseError as e:
    conn.rollback() # Rollback the transaction in case of any error
    print("An error occur while working with the database, Rolling back!")
    print(e)

In [29]:

# import string

greeting = 'HOLA'
print(greeting.lower())

hola
