In [1]:
import io
import psycopg2
import pandas

pandas.set_option('display.max_columns', None)  
pandas.set_option('display.expand_frame_repr', False)
pandas.set_option('max_colwidth', -1)

d = pandas.DataFrame({
    'subjectID':[1, 1, 2, 2],
    'surveyCategory': [ "withdrawal behavior", "positive re-framing", "withdrawal behavior", "positive re-framing"],
    'assessmentTotal': [5, 2, 3, 4],
    'irrelevantCol1': ['irrel1']*4,
    'irrelevantCol2': ['irrel2']*4,
})

print(d)




   subjectID       surveyCategory  assessmentTotal irrelevantCol1  \
0          1  withdrawal behavior                5         irrel1   
1          1  positive re-framing                2         irrel1   
2          2  withdrawal behavior                3         irrel1   
3          2  positive re-framing                4         irrel1   

  irrelevantCol2  
0         irrel3  
1         irrel3  
2         irrel3  
3         irrel3  


In [None]:
conn = psycopg2.connect(
    database="johnmount",
    user="johnmount",
    host="localhost",
    password=""
)
conn.autocommit=True

cur = conn.cursor()

In [None]:
def is_numeric(col):
    try:
        0.0 + col
        return True
    except Exception as ex:
        return False


def insert_table(conn, d, table_name):
    cr = [d.columns[i] + " " + ("double precision" if is_numeric(d[d.columns[i]]) else "VARCHAR") for 
            i in range(d.shape[1])]
    table_name = 'd'
    create_stmt = "CREATE TABLE " + table_name + " ( " + ', '.join(cr) + " )"
    cur = conn.cursor()
    cur.execute("DROP TABLE IF EXISTS " + table_name)
    conn.commit()
    cur.execute(create_stmt)
    conn.commit()
    buf = io.StringIO(d.to_csv(index=False, header=False, sep='\t'))
    cur.copy_from(buf, 'd', columns=[c for c in d.columns])
    conn.commit()


def read_query(conn, q):
    cur.execute(q)
    r = cur.fetchall()
    colnames = [desc[0] for desc in cur.description]
    return pandas.DataFrame(columns = colnames, data = r)


def read_table(conn, table_name):
    return read_query(conn, "SELECT * FROM " + table_name)
    

insert_table(conn, d, 'd')


read_table(conn, 'd')

In [2]:
from data_algebra.data_ops import *
import data_algebra.env
import data_algebra.yaml
import data_algebra.PostgreSQL




_, _1, _2, _get = [None, None, None, lambda x: x] # don't look unbound
data_algebra.yaml.fix_ordered_dict_yaml_rep()
data_algebra.env.push_onto_namespace_stack(locals())

db_model = data_algebra.PostgreSQL.PostgreSQLModel()

scale = 0.237

ops = TableDescription('d', 
                 ['subjectID',
                  'surveyCategory',
                  'assessmentTotal',
                  'irrelevantCol1',
                  'irrelevantCol2']) .\
    extend({'probability': '(assessmentTotal * scale).exp()'}) .\
    extend({'probability': 'probability/probability.sum()'},
           partition_by='subjectID') .\
    extend({'row_number':'_.row_number()'},
           partition_by=['subjectID'],
           order_by=['probability', 'surveyCategory'],
           reverse=['probability'])

print(ops.to_python(pretty=True))


TableDescription(
    table_name="d",
    column_names=[
        "subjectID",
        "surveyCategory",
        "assessmentTotal",
        "irrelevantCol1",
        "irrelevantCol2",
    ],
).extend({"probability": "(assessmentTotal * 0.237).exp()"}).extend(
    {"probability": "(probability / probability.sum())"}, partition_by=["subjectID"]
).extend(
    {"row_number": "_.row_number()"},
    partition_by=["subjectID"],
    order_by=["probability", "surveyCategory"],
    reverse=["probability"],
)



In [2]:
sql = ops.to_sql(db_model, pretty=True)
print(sql)

SELECT "probability",
       "surveyCategory",
       "irrelevantCol2",
       "subjectID",
       "irrelevantCol1",
       "assessmentTotal",
       ROW_NUMBER() OVER (PARTITION BY "subjectID"
                          ORDER BY "subjectID") AS "row_number"
FROM
  (SELECT "surveyCategory",
          "irrelevantCol2",
          "subjectID",
          "irrelevantCol1",
          "assessmentTotal", ("probability" / SUM("probability")) OVER (PARTITION BY "subjectID") AS "probability"
   FROM
     (SELECT "surveyCategory",
             "irrelevantCol2",
             "subjectID",
             "irrelevantCol1",
             "assessmentTotal",
             EXP(("assessmentTotal" * 0.237)) AS "probability"
      FROM
        (SELECT "surveyCategory",
                "irrelevantCol1",
                "assessmentTotal",
                "irrelevantCol2",
                "subjectID"
         FROM "d") "SQ_0") "SQ_1") "SQ_2"


In [None]:


read_query(conn, sql)

In [None]:
conn.close()