In [17]:
import pandas as pd
import sqlite3

## create a connection to the database using the library `sqlite3`

In [18]:
con = sqlite3.connect('../../datasets/checking-logs.sqlite')

## find among all the users the `minimum` value of the delta between the first commit of the user and the deadline of the corresponding lab using only one query
  * do this by joining the table with the table `deadlines`
  * the difference should be displayed in `hours`
  * do not take the lab `project1` into account, it has longer deadlines and will be
  an outlier
  * the value should be stored in the dataframe `df_min` with the corresponding
  uid

In [19]:
test_results = pd.read_sql(
    '''
        SELECT
            time,
            AVG(delta) AS average
        FROM (
            SELECT 
                t.uid,
                CAST((julianday(t.first_commit_ts) - julianday(d.deadlines, 'unixepoch'))
                 * 24 AS INTEGER) AS delta,
                CASE WHEN t.first_commit_ts < t.first_view_ts THEN 'before'
                    ELSE 'after' END AS time
            FROM test AS t
            LEFT JOIN deadlines AS d ON t.labname = d.labs
            WHERE t.labname <> 'project1'
        )
        WHERE uid in (
                    SELECT uid
                    FROM (
                        SELECT t.uid,  
                        CASE WHEN t.first_commit_ts < t.first_view_ts THEN 'before'
                            ELSE 'after' END AS time
                        FROM test AS t
                        LEFT JOIN deadlines AS d ON t.labname = d.labs
                        WHERE t.labname <> 'project1'
                        GROUP BY t.uid
                        HAVING COUNT(DISTINCT time) = 2
                    )
                )
        GROUP BY time 
    ''',
    con
)
test_results

Unnamed: 0,time,average
0,after,-104.6
1,before,-60.5625


In [20]:
control_results = pd.read_sql(
    '''
        SELECT
            time,
            AVG(delta) AS average
        FROM (
            SELECT 
                c.uid,
                CAST((julianday(c.first_commit_ts) - julianday(d.deadlines, 'unixepoch'))
                 * 24 AS INTEGER) AS delta,
                CASE WHEN c.first_commit_ts < c.first_view_ts THEN 'before'
                    ELSE 'after' END AS time
            FROM control AS c
            LEFT JOIN deadlines AS d ON c.labname = d.labs
            WHERE c.labname <> 'project1'
        )
        WHERE uid in (
                    SELECT uid
                    FROM (
                        SELECT c.uid,  
                        CASE WHEN c.first_commit_ts < c.first_view_ts THEN 'before'
                            ELSE 'after' END AS time
                        FROM control AS c
                        LEFT JOIN deadlines AS d ON c.labname = d.labs
                        WHERE c.labname <> 'project1'
                        GROUP BY c.uid
                        HAVING COUNT(DISTINCT time) = 2
                    )
                )
        GROUP BY time 
    ''',
    con
)
control_results

Unnamed: 0,time,average
0,after,-117.636364
1,before,-99.464286


## Close the connection

In [21]:
con.close()