In [1]:
import pandas as pd
import sqlite3

In [2]:
DB_name = '../data/checking-logs.sqlite'
conn = sqlite3.connect(DB_name)
schema_df = pd.io.sql.read_sql("PRAGMA table_info(control)",conn)
schema_df

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,uid,TEXT,0,,0
1,1,labname,TEXT,0,,0
2,2,first_commit_ts,TIMESTAMP,0,,0
3,3,first_view_ts,TIMESTAMP,0,,0


In [3]:
query_template = "SELECT * FROM {}"

test_query = query_template.format('deadlines')
control_query = query_template.format('control')

test_results = pd.read_sql(test_query, conn)
control_results = pd.read_sql(control_query, conn)
test_results

Unnamed: 0,index,labs,deadlines
0,0,laba04,1587945599
1,1,laba04s,1587945599
2,2,laba05,1588550399
3,4,laba06,1590364799
4,5,laba06s,1590364799
5,3,project1,1589673599


In [4]:
query = """
WITH user_first_views AS (
    SELECT uid, MIN(strftime('%s', first_view_ts)) AS first_view_ts
    FROM {table}
    GROUP BY uid
),
merged AS (
    SELECT
        t.uid,
        t.labname,
        strftime('%s', t.first_commit_ts) AS first_commit_ts,
        fv.first_view_ts,
        d.deadlines,
        CASE
            WHEN strftime('%s', t.first_commit_ts) < fv.first_view_ts THEN 'before'
            ELSE 'after'
        END AS time,
        (strftime('%s', t.first_commit_ts) - d.deadlines) / 3600.0 AS diff
    FROM {table} t
    JOIN user_first_views fv ON t.uid = fv.uid
    JOIN deadlines d ON t.labname = d.labs
    WHERE t.labname != 'project1'
)
SELECT time, AVG(diff) AS avg_diff
FROM merged
GROUP BY time;

"""

test_query = query.format(table='test')
control_query = query.format(table='control')

test_results = pd.read_sql(test_query, conn)
control_results = pd.read_sql(control_query, conn)

conn.close()


print("Test Group Results:")
print(test_results)

print("\nControl Group Results:")
print(control_results)


Test Group Results:
     time    avg_diff
0   after -103.953446
1  before  -61.156632

Control Group Results:
     time    avg_diff
0   after -113.232346
1  before  -99.901448
