# Connect to POSTGRE and create tables

In [2]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT # <-- ADD THIS LINE

In [1]:
from configparser import ConfigParser
def config(filename='database.ini', section='postgresql'):
    # create a parser
    parser = ConfigParser()
    # read config file
    parser.read(filename)
 
    # get section, default to postgresql
    db = {}
    if parser.has_section(section):
        params = parser.items(section)
        for param in params:
            db[param[0]] = param[1]
    else:
        raise Exception('Section {0} not found in the {1} file'.format(section, filename))
 
    return db

In [3]:
def connect():
    """ Connect to the PostgreSQL database server returns connection """
    conn = None
    try:
        # read connection parameters
        params = config()
 
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params)
 
        # create a cursor
        cur = conn.cursor()
        
 # execute a statement
        print('PostgreSQL database version:')
        cur.execute('SELECT version()')
 
        # display the PostgreSQL database server version
        db_version = cur.fetchone()
        print(db_version)
       
     # close the communication with the PostgreSQL
        cur.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
    finally:
        return conn;

In [None]:
def create_tables(conn):
    """ create tables in the PostgreSQL database and load data from csv"""
    commands = (
        """
        CREATE TABLE adult (
            age INTEGER,
            workclass VARCHAR,
            fnlwgt integer,
            education varchar,
            education_num integer,
            marital_status varchar,
            occupation varchar,
            relationship varchar,
            race varchar,
            sex varchar,
            capital_gain integer,
            capital_loss integer,
            hours_per_week integer,
            native_country varchar,
            data_class varchar
        )
        """,
        """
        copy public.adult (
            age,
            workclass,
            fnlwgt,
            education,
            education_num,
            marital_status,
            occupation,
            relationship,
            race,
            sex,
            capital_gain,
            capital_loss,
            hours_per_week,
            native_country,
            data_class
        ) 
        FROM 'D:/UMass/Spring 18/645/SeeDB/adult_data.csv' 
        DELIMITER ',' CSV ENCODING 'UTF8';
        """)
    conn = None
    try:
        # read the connection parameters
        params = config()
        # connect to the PostgreSQL server
#         conn = psycopg2.connect(**params)
        cur = conn.cursor()
        # create table one by one
        for command in commands:
            cur.execute(command)
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

In [4]:
def create_views(conn):
    """ create views of married and unmarried, split into 10 subsets for phases"""
    commands = (
        """
        CREATE VIEW Married 
        AS SELECT * 
        FROM adult
        WHERE marital_status IN (' Married-AF-spouse', ' Married-civ-spouse', ' Married-spouse-absent', ' Separated')
        """,
        """
        CREATE VIEW Unmarried 
        AS SELECT * 
        FROM adult
        WHERE marital_status IN (' Never-married', ' Widowed', ' Divorced')
        """
    )
    conn = None
    try:
        # read the connection parameters
        params = config()
        # connect to the PostgreSQL server
#         conn = psycopg2.connect(**params)
        cur = conn.cursor()
        # create table one by one
        for command in commands:
            cur.execute(command)
        # close communication with the PostgreSQL database server
        cur.close()
        # commit the changes
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

In [5]:
def close_connection(conn):
    """ Close postgres connection"""
    try:
        if conn is not None:
            conn.close()
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)

In [8]:
a = ["workclass", "education", "occupation", "realtionship", "race", "sex", "native_country","data_class"]
m = ["age", "fnlwgt", "hours_per_week", "capital_gain", "capital_loss"]
f = ["avg", "sum", "min", "max", "count"]

In [9]:
tuples = [(x, m, f) for x in a]

In [40]:
conn = connect()
cur = conn.cursor()

#create 10 views of adult table
# for i in range(10):
commands = """CREATE TABLE a_0 (
            age INTEGER,
            workclass VARCHAR,
            fnlwgt integer,
            education varchar,
            education_num integer,
            marital_status varchar,
            occupation varchar,
            relationship varchar,
            race varchar,
            sex varchar,
            capital_gain integer,
            capital_loss integer,
            hours_per_week integer,
            native_country varchar,
            data_class varchar);"""
#        for command in commands:
res = cur.execute(commands)
print(res)

#find total rows in db then split into 10 phase - size of each phase
cur.execute("SELECT COUNT(age) FROM adult;")
num_of_rows = cur.fetchall()[0][0];
print("Number of rows in table ", num_of_rows)
phase = 10
size = num_of_rows/phase;
print("Number of phases ", phase)
print("Size of each phase ", size)

#query from adult
cur.execute("SELECT * FROM adult;")
# for i in ['a_'+`i` for i in range(10)]:
for i in range(10):
    commands = 'INSERT INTO a_'+ str(i) +' VALUES(%s, %s, %s,%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'
#     print(commands)
    while True:
        results = cur.fetchmany(size) #fetching every size-rows from adult
        if not results:
            break
        for result in results:
            #add to view i 
#             print(type(result))
            cur.execute(commands,result)

  


Connecting to the PostgreSQL database...
PostgreSQL database version:
('PostgreSQL 10.2, compiled by Visual C++ build 1800, 64-bit',)
None
None
None
None
None
None
None
None
None
None
('Number of rows in table ', 29933L)
('Number of phases ', 10)
('Size of each phase ', 2993L)


ProgrammingError: relation "a_0" does not exist
LINE 1: INSERT INTO a_0 VALUES(39, ' State-gov', 77516,' Bachelors',...
                    ^
