In [None]:
# Create SQL database, schema, and table

import psycopg2
from psycopg2 import sql

# Create database block
parameters = {
    "dbname" : "postgres",
    "user" : "postgres",
    "password" : "postgres",
    "host" : "localhost"
}

# Create a connection without a transaction block
connection_default = psycopg2.connect(**parameters)
connection_default.autocommit = True    

# Check if the database already exists
with connection_default.cursor() as cursor_default:
    cursor_default.execute(sql.SQL("SELECT 1 FROM pg_catalog.pg_database WHERE datname = {}")
                           .format(sql.Literal("wind_energy_psycopg")))
    
    exists = cursor_default.fetchone()
    
    if not exists: 
        cursor_default.execute("CREATE DATABASE wind_energy_psycopg;")

connection_default.close()

# Create schema/table block
wind_params = {
    "dbname" : "wind_energy_psycopg",
    "user" : "postgres",
    "password" : "postgres",
    "host" : "localhost"
}

with psycopg2.connect(**wind_params) as connection:
    with connection.cursor() as cursor:
        # Create schema
        cursor.execute(
            "Create SCHEMA IF NOT EXISTS wind_sites;"
            )

        # Create table
        cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS wind_sites.upd_wind_site (
        id SERIAL PRIMARY KEY,
        date_time TIMESTAMP NOT NULL,
        wind_speed DECIMAL NOT NULL,
        gust_speed DECIMAL NOT NULL,
        wind_direction DECIMAL NOT NULL
    );
    """
        )
        
        connection.commit() 

In [None]:
# ELT Process. Extract and Transform data via Pandas. Load into SQL database. 

import pandas as pd
import os
import psycopg2

def pandas_extract_transform(input_path : str, output_name : str = None) -> str:
    """
    Performs ET by extracting data from an excel file, cleaning data by removing rows with null values, and then writing to a csv.
    If output_name is not provided, CSV file is named after the excel file and is generated in the same directory

    Parameters:
    - input_path (str): The file path to the source Excel file.
    - output_name (str, optional): The desired name for the output CSV file.

    Raises:
    - FileNotFoundError: If the Excel file specified by 'input_path' does not exist.
    - ValueError: If the Excel file cannot be read.
        
    Returns:
    - str: The file path to the created CSV file.
    """
    
    # Check if file exists
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The file does not exist: {input_path}")
    
    # Check if valid extension
    file_extension = os.path.splitext(input_path)[1].lower()
    if file_extension != ".xlsx":
        raise ValueError("File is not an Excel file.")
    
    # Try to load excel file
    try:
        df = pd.read_excel(
            io=input_path,
            engine="openpyxl",
            names=["date_time","wind_speed","gust_speed","wind_direction"]
        )
    except Exception as e:
        raise ValueError(f"Could not read excel file: {e}")
    
    # Remove rows with null values
    df = df.dropna().reset_index(drop=True)
    
    # Provide output_name if not given
    if output_name is None:
        base = os.path.splitext(input_path)[0]
        output_name = f"{base}.csv"
    
    # Assigns output path to be the same directory    
    output_path = os.path.join(os.path.dirname(input_path), output_name)
    
    if os.path.isfile(output_path):
        overwrite = input(f"File {output_name} already exists. Overwrite? y/n").lower()
        if overwrite != "y":
            print("Operation cancelled by user")
            return None
    
    # Generates csv
    df.to_csv(path_or_buf=output_path, index=False, encoding="utf-8")
    
    return output_path

def sql_load(input_path : str, dbname : str = "wind_energy_psycopg",
             user: str = "postgres", password : str = "postgres", 
             host: str = "localhost", schema : str = "wind_sites", 
             table : str = "upd_wind_site") -> None:
    """
    Loads data from a CSV file into a PostgreSQL table using psycopg2.

    This function opens a CSV file from the specified path and loads its contents into the given PostgreSQL table. 
    The CSV file must have a header row with the fields corresponding to the database table columns. 
    The database connection is managed within the function, and it uses COPY command for efficient bulk data loading.

    Parameters:
    - input_path (str): Absolute path to the CSV file to be loaded.
    - dbname (str): Name of the database to connect to. Default is "wind_energy_psycopg".
    - user (str): Username for authentication. Default is "postgres".
    - password (str): Password for authentication. Default is "postgres".
    - host (str): Host address of the database. Default is "localhost".
    - schema (str): Schema name of the target table. Default is "wind_sites".
    - table (str): Table name where data will be loaded. Default is "upd_wind_site".

    Returns:
    - None

    Raises:
    - FileNotFoundError: If the CSV file does not exist at the specified path.
    - ValueError: If the specified file is not a CSV file.
    - psycopg2.DatabaseError: If an error occurs during the database operation.

    Note:
    - The function will commit the transaction if the COPY command is successful, or rollback the transaction if an exception occurs.
    - Ensure that the PostgreSQL user has the required permissions to perform a COPY operation on the specified table.
    - The function assumes that the CSV file is formatted correctly with the necessary headers and delimiters.
    """

    # Check if file exists
    if not os.path.isfile(input_path):
        raise FileNotFoundError(f"The file does not exist: {input_path}")
    
    # Check if valid file extension
    file_extension = os.path.splitext(input_path)[1].lower()
    if file_extension != ".csv":
        raise ValueError("File is not a csv file")
    
    parameters = {
        "dbname" : dbname,
        "user" : user,
        "password" : password,
        "host" : host
    }
    
    # pscyopg2 connection block that does the actual loading process
    with psycopg2.connect(**parameters) as connection:
        with connection.cursor() as cursor:
            with open(input_path, "r") as csv:
                try: 
                    cursor.copy_expert(
                        f"""
                        COPY {schema}.{table} (date_time, wind_speed, gust_speed, wind_direction)
                        FROM STDIN
                        DELIMITER ','
                        CSV HEADER
                        """,
                        csv)
                
                except Exception as e:
                    connection.rollback()
                    raise e


In [None]:
# Wind Site Object has diurnal variation, wind-rose behavior, frequency distribution
class WindSite():
    

In [None]:
# Wind Turbine Object has powercurve, wind shea
class WindTurbine():
    