In [1]:
%%file tran_total_energy.py

import pandas as pd

def transform_data():
    csv_file = "Resources/organised_Gen.csv"
    energy = pd.read_csv(csv_file)
    
    tf_energy_data = energy.drop(columns=['Unnamed: 0'])
    
    tf_energy_data = tf_energy_data.rename(columns={
        "YEAR": "year", 
        "MONTH": "month",
        "STATE": "state",
        "TYPE OF PRODUCER": "producer",
        "ENERGY SOURCE": "source",
        "GENERATION (Megawatthours)": "generated"})


    tf_energy_data['producer'] = tf_energy_data['producer'].apply(lambda x: x.replace(',','/'))
    tf_energy_data = tf_energy_data[tf_energy_data.state == 'US-TOTAL']
    tf_energy_data = tf_energy_data[tf_energy_data.producer == 'Total Electric Power Industry']
    tf_energy_data = tf_energy_data[tf_energy_data.source == 'Total']
    
    return tf_energy_data.reset_index(drop = True)

Writing tran_total_energy.py


In [2]:
from tran_total_energy import transform_data

x = transform_data()
x

Unnamed: 0,year,month,state,producer,source,generated
0,2001,1,US-TOTAL,Total Electric Power Industry,Total,332493160.0
1,2001,2,US-TOTAL,Total Electric Power Industry,Total,282940198.0
2,2001,3,US-TOTAL,Total Electric Power Industry,Total,300706544.0
3,2001,4,US-TOTAL,Total Electric Power Industry,Total,278078871.0
4,2001,5,US-TOTAL,Total Electric Power Industry,Total,300491621.0
...,...,...,...,...,...,...
252,2022,1,US-TOTAL,Total Electric Power Industry,Total,378966702.0
253,2022,2,US-TOTAL,Total Electric Power Industry,Total,327766846.0
254,2022,3,US-TOTAL,Total Electric Power Industry,Total,325952002.0
255,2022,4,US-TOTAL,Total Electric Power Industry,Total,304348709.0


In [6]:
%%file query_total_energy.py 

from tran_total_energy import transform_data
import sys

import psycopg2

from psycopg2 import OperationalError, errorcodes, errors
import psycopg2.extras as extras
import pandas as pd
from io import StringIO
import numpy as np

import plotly.express as px
from timeit import default_timer as timer

edf = transform_data()

params_dic = {
    "host"      : "localhost",
    "user"      : "postgres",
    "password"  : "postgres",
    "port"      : "5432"
}

def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn
conn = connect(params_dic)

#install psycopg2-binary for MacOs if you don't have it
    #!pip install psycopg2-binary

# Define a function that handles and parses psycopg2 exceptions
def show_psycopg2_exception(err):
    # get details about the exception
    err_type, err_obj, traceback = sys.exc_info()    
    # get the line number when exception occured
    line_n = traceback.tb_lineno    
    # print the connect() error
    print ("\npsycopg2 ERROR:", err, "on line number:", line_n)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type) 
    # psycopg2 extensions.Diagnostics object attribute
    print ("\nextensions.Diagnostics:", err.diag)    
    # print the pgcode and pgerror exceptions
    print ("pgerror:", err.pgerror)
    print ("pgcode:", err.pgcode, "\n")

def create_table(cursor):
    try:
        # 
        cursor.execute("DROP TABLE IF EXISTS energy;")
        sql = '''CREATE TABLE energy(
        year INT NOT NULL, 
        month INT NOT NULL, 
        state VARCHAR NOT NULL, 
        producer TEXT NOT NULL,
        source VARCHAR NOT NULL,
        generated FLOAT NOT NULL
        )'''
        # Creating a table
        cursor.execute(sql);
        print("energy table is created successfully...............")  
    except OperationalError as err:
        # pass exception to function
        show_psycopg2_exception(err)
        # set the connection to 'None' in case of error
        conn = None

# Define function using copy_from() with StringIO to insert the dataframe
def copy_from_dataFile_StringIO(conn, datafrm, table):
    
    #save dataframe to an in memory buffer
    buffer = StringIO()
    datafrm.to_csv(buffer, header=False, index = False)
    buffer.seek(0)
    
    cursor = conn.cursor()
    try:
        cursor.copy_from(buffer, table, sep=",")
        print("Data inserted using copy_from_datafile_StringIO() successfully....")
    except (Exception, psycopg2.DatabaseError) as err:
        # pass exception to function
        show_psycopg2_exception(err)
        cursor.close()

conn = connect(params_dic)
# We set autocommit=True so every command we execute will produce results immediately.
conn.autocommit = True
cursor = conn.cursor()
create_table(cursor)

copy_from_dataFile_StringIO(conn, edf, 'energy')


def query_data():
    #start = timer()

    conn.autocommit = True
    cursor = conn.cursor()
  
    sql = '''SELECT YEAR, (SUM(generated)/1000000)
            FROM energy 
            WHERE producer = 'Total Electric Power Industry' 
                AND source = 'Total' 
                AND state = 'US-TOTAL'
                AND YEAR != 2022
            Group By YEAR, State
                ;''' 
  
    cursor.execute(sql)
    results = cursor.fetchall()
    df = pd.DataFrame (results, columns = ['Year', 'Total Generated(in millions)'])
    #fig = px.bar(df, x="Year", y="Total Generated(in millions)", title="TOTAL Energy Generated Each Year")
    conn.commit()
    #end = timer()
    #k = end - start 
    #print(k)
    return df#fig.show()

Overwriting query_total_energy.py


In [7]:
from query_total_energy import query_data

query_data()

Connecting to the PostgreSQL database...
Connection successful
Connecting to the PostgreSQL database...
Connection successful
energy table is created successfully...............
Data inserted using copy_from_datafile_StringIO() successfully....


Unnamed: 0,Year,Total Generated(in millions)
0,2001,3736.643649
1,2002,3858.452251
2,2003,3883.185204
3,2004,3970.555264
4,2005,4055.42275
5,2006,4064.702227
6,2007,4156.744725
7,2008,4119.387759
8,2009,3950.330927
9,2010,4125.059899


In [12]:
%%file test_total_energy.py

from query_total_energy import query_data
from tran_total_energy import transform_data

def test_transform_data():
    df = transform_data()
    assert len(df.columns) == 6

def test_transform_data_row_count():
    df = transform_data()
    assert len(df.index) == 257
    
def test_query_data_column_count():
    df = query_data()
    assert len(df.columns) == 2

def test_query_data_row_count():
    df = query_data()
    assert len(df.index) == 21

Overwriting test_total_energy.py


In [13]:
# Run the test_import_data.py file with pytest. 
!python -m pytest test_total_energy.py

platform darwin -- Python 3.9.7, pytest-6.2.4, py-1.10.0, pluggy-0.13.1
rootdir: /Users/slimykid/Group4_NFLX_MIDTERM
plugins: anyio-2.2.0
collected 4 items                                                              [0m[1m

test_total_energy.py [32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                [100%][0m

