<a href="https://colab.research.google.com/github/ahakobia/Group4_NFLX_MIDTERM/blob/main/Adehs_US_Energy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [58]:
import os
import sys
# import the connect library for psycopg2
import psycopg2
# import the error handling libraries for psycopg2
from psycopg2 import OperationalError, errorcodes, errors
import psycopg2.extras as extras
import pandas as pd
from io import StringIO
import numpy as np

import matplotlib.pyplot as plt

In [6]:
params_dic = {
    "host"      : "localhost",
    "user"      : "postgres",
    "password"  : "postgres",
    "port"      : "5432"
}

In [8]:
def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn
conn = connect(params_dic)

Connecting to the PostgreSQL database...
Connection successful


In [40]:
csv_file = "Resources/organised_Gen.csv"
energy_data = pd.read_csv(csv_file)
energy_data = energy_data.drop(columns=['Unnamed: 0'])

energy_df = energy_data.rename(columns={
    "YEAR": "year", 
    "MONTH": "month",
    "STATE": "state",
    "TYPE OF PRODUCER": "producer",
    "ENERGY SOURCE": "source",
    "GENERATION (Megawatthours)": "generated"})


energy_df['producer'] = energy_df['producer'].apply(lambda x: x.replace(',','/'))
    
energy_df.head(10)

Unnamed: 0,year,month,state,producer,source,generated
0,2001,1,AK,Total Electric Power Industry,Coal,46903.0
1,2001,1,AK,Total Electric Power Industry,Petroleum,71085.0
2,2001,1,AK,Total Electric Power Industry,Natural Gas,367521.0
3,2001,1,AK,Total Electric Power Industry,Hydroelectric Conventional,104549.0
4,2001,1,AK,Total Electric Power Industry,Wind,87.0
5,2001,1,AK,Total Electric Power Industry,Total,590145.0
6,2001,1,AK,Electric Generators/ Electric Utilities,Coal,18410.0
7,2001,1,AK,Electric Generators/ Electric Utilities,Petroleum,64883.0
8,2001,1,AK,Electric Generators/ Electric Utilities,Natural Gas,305277.0
9,2001,1,AK,Electric Generators/ Electric Utilities,Hydroelectric Conventional,104549.0


In [41]:
!pip install psycopg2-binary



In [42]:
# Define a function that handles and parses psycopg2 exceptions
def show_psycopg2_exception(err):
    # get details about the exception
    err_type, err_obj, traceback = sys.exc_info()    
    # get the line number when exception occured
    line_n = traceback.tb_lineno    
    # print the connect() error
    print ("\npsycopg2 ERROR:", err, "on line number:", line_n)
    print ("psycopg2 traceback:", traceback, "-- type:", err_type) 
    # psycopg2 extensions.Diagnostics object attribute
    print ("\nextensions.Diagnostics:", err.diag)    
    # print the pgcode and pgerror exceptions
    print ("pgerror:", err.pgerror)
    print ("pgcode:", err.pgcode, "\n")

In [43]:
def create_table(cursor):
    try:
        # Dropping table iris if exists
        cursor.execute("DROP TABLE IF EXISTS energy;")
        sql = '''CREATE TABLE energy(
        year INT NOT NULL, 
        month INT NOT NULL, 
        state VARCHAR NOT NULL, 
        producer TEXT NOT NULL,
        source VARCHAR NOT NULL,
        generated FLOAT NOT NULL
        )'''
        # Creating a table
        cursor.execute(sql);
        print("energy table is created successfully...............")  
    except OperationalError as err:
        # pass exception to function
        show_psycopg2_exception(err)
        # set the connection to 'None' in case of error
        conn = None

In [44]:
def run_method(n):
    for i in range(n):
        3 ** n
from timeit import default_timer as timer
start_time = timer()
run_method(10000)
end_time = timer()
elapsed = end_time-start_time
print('function took {:.3f} ms'.format((elapsed)*1000.0))

function took 661.312 ms


In [45]:
# Define function using copy_from() with StringIO to insert the dataframe
def copy_from_dataFile_StringIO(conn, datafrm, table):
    
  # save dataframe to an in memory buffer
    buffer = StringIO()
    datafrm.to_csv(buffer, header=False, index = False)
    buffer.seek(0)
    
    cursor = conn.cursor()
    try:
        cursor.copy_from(buffer, table, sep=",")
        print("Data inserted using copy_from_datafile_StringIO() successfully....")
    except (Exception, psycopg2.DatabaseError) as err:
        # pass exception to function
        show_psycopg2_exception(err)
        cursor.close()

In [46]:
conn = connect(params_dic)
# We set autocommit=True so every command we execute will produce results immediately.
conn.autocommit = True
cursor = conn.cursor()
create_table(cursor)

Connecting to the PostgreSQL database...
Connection successful
energy table is created successfully...............


In [47]:
copy_from_dataFile_StringIO(conn, energy_df, 'energy')

Data inserted using copy_from_datafile_StringIO() successfully....


In [72]:
conn.autocommit = True
cursor = conn.cursor()
  
sql = '''SELECT * 
            FROM energy 
            WHERE producer = 'Total Electric Power Industry' 
                AND source = 'Total' 
                AND state = 'US-TOTAL'
                ;'''  
cursor.execute(sql)
results = cursor.fetchall()
print(results)
  
conn.commit()
#conn.close()

[(2001, 1, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 332493160.0), (2001, 2, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 282940198.0), (2001, 3, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 300706544.0), (2001, 4, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 278078871.0), (2001, 5, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 300491621.0), (2001, 6, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 327693978.0), (2001, 7, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 357613700.0), (2001, 8, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 370532828.0), (2001, 9, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 306928866.0), (2001, 10, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 294733613.0), (2001, 11, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 278933942.0), (2001, 12, 'US-TOTAL', 'Total Electric Power Industry', 'Total', 305496328.0), (2002, 1, 'US-TOTAL', 'Total Electric Power Industry', 'Tota

In [66]:
energy_data.head(20158)

Unnamed: 0,YEAR,MONTH,STATE,TYPE OF PRODUCER,ENERGY SOURCE,GENERATION (Megawatthours)
0,2001,1,AK,Total Electric Power Industry,Coal,46903.0
1,2001,1,AK,Total Electric Power Industry,Petroleum,71085.0
2,2001,1,AK,Total Electric Power Industry,Natural Gas,367521.0
3,2001,1,AK,Total Electric Power Industry,Hydroelectric Conventional,104549.0
4,2001,1,AK,Total Electric Power Industry,Wind,87.0
...,...,...,...,...,...,...
20153,2001,12,US-TOTAL,Total Electric Power Industry,Geothermal,1190232.0
20154,2001,12,US-TOTAL,Total Electric Power Industry,Other Biomass,1252855.0
20155,2001,12,US-TOTAL,Total Electric Power Industry,Pumped Storage,-622574.0
20156,2001,12,US-TOTAL,Total Electric Power Industry,Other,1051329.0


In [69]:
# Create a temporary view and transform the temporary view to retrieve only the "School Organization" values from the "TYPE" column. 
def transform_data():
    energy_data.createOrReplaceTempView('energy')
    conn.autocommit = True
    cursor = conn.cursor()
  
    sql = '''SELECT * 
                FROM energy 
                WHERE year = 2001 
                    AND producer = 'Total Electric Power Industry' 
                    AND source = 'Total' 
                    AND state = 'US-TOTAL'
                    ;'''
  
    cursor.execute(sql)
    results = cursor.fetchall()
    print(results)
  
    conn.commit()
  
    return results

In [71]:
transform_data()

AttributeError: 'DataFrame' object has no attribute 'createOrReplaceTempView'

In [None]:
conn.close()