## Imports

In [None]:
import psycopg2
import numpy as np

## Setting up ENV and GLOBAL variables

In [56]:

dbname = "census"
user = "nightfury"
host = "localhost"
password = ""
db_relation = "adult"
reference_dataset = db_relation
target_dataset = "target_dataset"


# measure_attributes=['age','fnlwgt', 'education_num','capital_gain','capital_loss','hours_per_week']
# groupby_attributes=['workclass','education','occupation','relationship','race','sex','native_country','salary_range']
# aggregate_functions=['sum','avg','max','min','count']

measure_attributes=['age','capital_gain','hours_per_week']
groupby_attributes=['workclass','relationship','sex']
aggregate_functions=['avg','count']

#Connecting with local db
try:
    conn = psycopg2.connect(f"dbname='{dbname}' user='{user}' host='{host}' password='{password}'")
    cur = conn.cursor()
except Exception as e:
    print (f"Unable to connect to the database. Error: {str(e)}")

## Util functions

In [57]:

def get_cursor(conn):
    cur = conn.cursor()
    return cur

def get_all_combinations(groupby_attributes, measure_attributes, aggregate_functions):
    combinations = np.array(np.meshgrid(groupby_attributes, measure_attributes, aggregate_functions)).T.reshape(-1,3)
    return combinations

def query_generator(groupby_attribute, measure_attribute, aggregate_function, target_relation, reference_relation):
    target_query = f"""SELECT {groupby_attribute}, {aggregate_function}({measure_attribute}) FROM {target_relation} GROUP BY {groupby_attribute}"""
    reference_query = f"""SELECT {groupby_attribute}, {aggregate_function}({measure_attribute}) FROM {reference_relation} GROUP BY {groupby_attribute}"""
    return target_query, reference_query

def execute_get_query(cursor, query):
    cursor.execute(query)
    rows = cursor.fetchall()
    return rows

## Getting user input and setting the target_db

In [38]:
#test_query = select * from adult where relationship =' Unmarried'; 
try:
    query = input("Enter a SELECT query")
    #cur = get_cursor(conn)
    cur.execute(f"""DROP table IF EXISTS {target_dataset};""")
    print(f"""Target dataset create command: create table {target_dataset} as {query};""")
    cur.execute(f"""create table {target_dataset} as {query};""")
    conn.commit()
except Exception as e:
    print(f"Error in establishing target db. Error: {str(e)}")
    
print(f"The reference dataset is {reference_dataset}")

In [58]:
def run_views():
    combinations = get_all_combinations(groupby_attributes, measure_attributes, aggregate_functions)
    for groupby_attribute, measure_attribute, aggregate_function in combinations:
        target_query, reference_query = query_generator(groupby_attribute, measure_attribute, aggregate_function, target_dataset, reference_dataset)
        print(target_query)
        print(reference_query)
        
run_views()

SELECT workclass, avg(age) FROM target_dataset GROUP BY workclass
SELECT workclass, avg(age) FROM adult GROUP BY workclass
SELECT workclass, avg(capital_gain) FROM target_dataset GROUP BY workclass
SELECT workclass, avg(capital_gain) FROM adult GROUP BY workclass
SELECT workclass, avg(hours_per_week) FROM target_dataset GROUP BY workclass
SELECT workclass, avg(hours_per_week) FROM adult GROUP BY workclass
SELECT relationship, avg(age) FROM target_dataset GROUP BY relationship
SELECT relationship, avg(age) FROM adult GROUP BY relationship
SELECT relationship, avg(capital_gain) FROM target_dataset GROUP BY relationship
SELECT relationship, avg(capital_gain) FROM adult GROUP BY relationship
SELECT relationship, avg(hours_per_week) FROM target_dataset GROUP BY relationship
SELECT relationship, avg(hours_per_week) FROM adult GROUP BY relationship
SELECT sex, avg(age) FROM target_dataset GROUP BY sex
SELECT sex, avg(age) FROM adult GROUP BY sex
SELECT sex, avg(capital_gain) FROM target_datas