In [1]:
%defaultDatasource jdbc:h2:mem:db

In [2]:
DROP TABLE IF EXISTS DescEKGCases;

CREATE TABLE DescEKGCases (
  ctype VARCHAR(2),
  user INT,
  casen VARCHAR(7),
  answer_desc VARCHAR(200),
  points_desc DECIMAL(3,1),
  confidence_desc INT,
  time_desc DECIMAL(8,2),
  answer_ekg VARCHAR(200),
  points_ekg DECIMAL(3,1),
  confidence_ekg INT,
  time_ekg DECIMAL(8,2),
  PRIMARY KEY(user, casen)
) AS SELECT *
  FROM CSVREAD('../../resources/nl-experiment1/transformed-data/nl-experiment-01.csv');

In [3]:
SELECT * FROM DescEKGCases ORDER BY user, casen;

# Overall Average of the Accuracy

## For Description First

* AVG_POINTS_DESC - avarage points of answer in the description stage (1st stage);
* STD_POINTS_DESC - standard deviation of the points for the answers in the description stage (1st stage);
* AVG_POINTS_EKG  - avarage points of answer in the EKG stage (2nd stage);
* STD_POINTS_EKG - standard deviation of the points for the answers in the EKG stage (2nd stage).

In [4]:
SELECT ROUND(AVG(points_desc),2) AVG_points_desc, ROUND(STDDEV_SAMP(points_desc),2) STD_points_desc,
       ROUND(AVG(points_ekg),2) AVG_points_ekg, ROUND(STDDEV_SAMP(points_ekg),2) STD_points_ekg
FROM DescEKGCases
WHERE ctype='DF';

# For EKG First

* AVG_POINTS_EKG  - avarage points of answer in the EKG stage (1st stage);
* STD_POINTS_EKG - standard deviation of the points for the answers in the EKG stage (1st stage);
* AVG_POINTS_DESC - avarage points of answer in the description stage (2nd stage);
* STD_POINTS_DESC - standard deviation of the points for the answers in the description stage (2nd stage).

In [5]:
SELECT ROUND(AVG(points_ekg),2) AVG_points_ekg, ROUND(STDDEV_SAMP(points_ekg),2) STD_points_ekg,
       ROUND(AVG(points_desc),2) AVG_points_desc, ROUND(STDDEV_SAMP(points_desc),2) STD_points_desc
FROM DescEKGCases
WHERE ctype='EF';

# Accuracy in the First and Second Task by Case
## For Description First

* AVG_POINTS_DESC - average points of the answer in the description stage (1st stage);
* STD_POINTS_DESC - standard deviation of the points for the answers in the description stage (1st stage);
* AVG_POINTS_EKG  - average points of the answer in the EKG stage (2nd stage);
* STD_POINTS_EKG - standard deviation of the points for the answers in the EKG stage (2nd stage).

In [6]:
SELECT casen, ROUND(AVG(points_desc),2) AVG_points_desc, ROUND(STDDEV_SAMP(points_desc),2) STD_points_desc,
              ROUND(AVG(points_ekg),2) AVG_points_ekg, ROUND(STDDEV_SAMP(points_ekg),2) STD_points_ekg
FROM DescEKGCases
WHERE ctype='DF'
GROUP BY casen
ORDER BY casen;

# For EKG First

* AVG_POINTS_EKG  - average points of the answer in the EKG stage (1st stage);
* STD_POINTS_EKG - standard deviation of the points for the answers in the EKG stage (1st stage);
* AVG_POINTS_DESC - average points of the answer in the description stage (2nd stage);
* STD_POINTS_DESC - standard deviation of the points for the answers in the description stage (2nd stage).

In [7]:
SELECT casen, ROUND(AVG(points_ekg),2) AVG_points_ekg, ROUND(STDDEV_SAMP(points_ekg),2) STD_points_ekg,
              ROUND(AVG(points_desc),2) AVG_points_desc, ROUND(STDDEV_SAMP(points_desc),2) STD_points_desc
FROM DescEKGCases
WHERE ctype='EF'
GROUP BY casen
ORDER BY casen;

# Average According to the Success of Attempts
## Wrong to Right
### For Description First
Attempts that were wrong at the beginning (accuracy below 1) and became right (accuracy equal 1).

* NUMBER_ATTEMPTS - number of attempts that were wrong in the beginning and became right;
* AVG_TIME_DESC - average time to answer in the description stage (1st stage);
* AVG_CONFIDENCE_DESC - average confidence of the answers given in the description stage (1st stage);
* AVG_TIME_EKG - average time to answer in the EKG stage (2nd stage);
* AVG_CONFIDENCE_EKG - average confidence of the answers given in the EKG stage (2nd stage).

In [8]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc,
                                 ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG
FROM DescEKGCases
WHERE ctype='DF' AND points_desc < 1 AND points_ekg = 1;

### For EKG First
Attempts that were wrong at the beginning (accuracy below 1) and became right (accuracy equal 1).

* NUMBER_ATTEMPTS - number of attempts that were wrong in the beginning and became right;
* AVG_TIME_EKG - average time to answer in the EKG stage (1st stage);
* AVG_CONFIDENCE_EKG - average confidence of the answers given in the EKG stage (1st stage);
* AVG_TIME_DESC - average time to answer in the description stage (2nd stage);
* AVG_CONFIDENCE_DESC - average confidence of the answers given in the description stage (2nd stage).

In [9]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG,
                                 ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc
FROM DescEKGCases
WHERE ctype='EF' AND points_ekg < 1 AND points_desc = 1;

## Wrong stays Wrong
### For Description First

Attempts that were wrong at the beginning (accuracy below 1) and stayed wrong (accuracy below 1).

* NUMBER_ATTEMPTS - number of attempts that were wrong in the beginning and stayed wrong.

In [10]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc,
                                 ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG
FROM DescEKGCases
WHERE ctype='DF' AND points_desc < 1 AND points_ekg < 1;

### For EKG First

In [11]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG,
                                 ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc
FROM DescEKGCases
WHERE ctype='EF' AND points_ekg < 1 AND points_desc < 1;

## Right stays Right
### For Description First

Attempts that were right at the beginning (accuracy equal 1) and stayed right (accuracy equal 1).

* NUMBER_ATTEMPTS - number of attempts that were right in the beginning and stayed right.

In [12]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc,
                                 ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG
FROM DescEKGCases
WHERE ctype='DF' AND points_desc = 1 AND points_ekg = 1;

### For EKG First

In [13]:
SELECT COUNT(*) Number_Attempts, ROUND(AVG(time_ekg),2) AVG_Time_EKG, AVG(confidence_ekg) AVG_Confidence_EKG,
                                 ROUND(AVG(time_desc),2) AVG_Time_Desc, AVG(confidence_desc) AVG_Confidence_Desc
FROM DescEKGCases
WHERE ctype='EF' AND points_ekg = 1 AND points_desc = 1;

# Question 01
The order of the presentation affects the confidence.

## For Description First

* AVG_CONFIDENCE_DESC - average confidence of the answers given in the description stage (1st stage);
* AVG_CONFIDENCE_EKG - average confidence of the answers given in the EKG stage (2nd stage).
* AVG_CONFIDENCE_OVERALL - average confidence of the answers in both stages.

In [14]:
SELECT AVG(confidence_desc) AVG_confidence_desc, AVG(confidence_ekg) AVG_confidence_ekg,
       ((AVG(confidence_desc) + AVG(confidence_ekg)) / 2) AS AVG_Overall
FROM DescEKGCases
WHERE ctype = 'DF';

## For EKG First

* AVG_CONFIDENCE_EKG - average confidence of the answers given in the EKG stage (1st stage).
* AVG_CONFIDENCE_DESC - average confidence of the answers given in the description stage (2nd stage);
* AVG_CONFIDENCE_OVERALL - average confidence of the answers in both stages.

In [15]:
SELECT AVG(confidence_ekg) AVG_confidence_ekg, AVG(confidence_desc) AVG_confidence_desc,
       ((AVG(confidence_desc) + AVG(confidence_ekg)) / 2) AS AVG_Overall
FROM DescEKGCases
WHERE ctype = 'EF';

# Question 02
The confidence score in the first stage affects the time it takes to accomplish the task in the second stage.

## For Description First
* AVG_CONFIDENCE_DESC - average confidence of the answers given in the description stage (1st stage) grouped per case;
* AVG_TIME_EKG - average time to give the answers in the EKG stage (2nd stage) grouped per case;


* AVG_CONFIDENCE_DESC_OVERALL - average confidence of the answers given in the description stage (1st stage) in all cases;
* AVG_TIME_EKG_OVERALL - average time to give the answers in the EKG stage (2nd stage) in all cases.

In [21]:
SELECT casen, AVG(confidence_desc) AVG_confidence_desc, ROUND(AVG(time_ekg),2) AVG_time_ekg
FROM DescEKGCases
WHERE ctype = 'DF'
GROUP BY casen
ORDER BY casen;

SELECT AVG(confidence_desc) AVG_confidence_desc_overall, ROUND(AVG(time_ekg),2) AVG_time_ekg_overall
FROM DescEKGCases
WHERE ctype = 'DF';

## For EKG First
* AVG_CONFIDENCE_EKG - average confidence of the answers given in the EKG stage (1st stage) grouped per case;
* AVG_TIME_DESC - average time to give the answers in the description stage (2nd stage) grouped per case;


* AVG_CONFIDENCE_EKG_OVERALL - average confidence of the answers given in the EKG stage (1st stage) in all cases;
* AVG_TIME_DESC_OVERALL - average time to give the answers in the description stage (2nd stage) in all cases.

In [22]:
SELECT casen, AVG(confidence_ekg) AVG_confidence_ekg, ROUND(AVG(time_desc),2) AVG_time_desc
FROM DescEKGCases
WHERE ctype = 'EF'
GROUP BY casen
ORDER BY casen;

SELECT AVG(confidence_ekg) AVG_confidence_ekg_overall, ROUND(AVG(time_desc),2) AVG_time_desc_overall
FROM DescEKGCases
WHERE ctype = 'EF';

# Question 3

The confidence score in the first stage will influence the probability of changing the hypothesis.

## Description First

In [None]:
SELECT casen, AVG(confidence_desc) AVG_confidence_desc, COUNT(*)
FROM DescEKGCases
WHERE ctype = 'DF'
GROUP BY casen
ORDER BY casen;

SELECT AVG(confidence_desc) AVG_confidence_desc_overall, ROUND(AVG(time_ekg),2) AVG_time_ekg_overall
FROM DescEKGCases
WHERE ctype = 'DF';

### AVG confidence for same answer

In [None]:
SELECT D1.casen, AVG(confidence) AVG_confidence, COUNT(*)
FROM First_Answer_DF D1, Second_Answer_DF D2
WHERE D1.user = D2.user AND D1.casen = D2.casen AND First_Answer = Second_Answer
GROUP BY D1.casen;

### AVG confidence for different answer

In [None]:
SELECT D1.casen, AVG(confidence) AVG_confidence, COUNT(*)
FROM First_Answer_DF D1, Second_Answer_DF D2
WHERE D1.user = D2.user AND D1.casen = D2.casen AND First_Answer <> Second_Answer
GROUP BY D1.casen;

## EKG First

In [None]:
DROP VIEW IF EXISTS First_Answer_EF;
CREATE VIEW First_Answer_EF AS
SELECT user, casen, answer First_Answer, confidence
FROM DescEKGCases
WHERE stage = 'EKG Hypothesis' AND ctype = 'EF';

DROP VIEW IF EXISTS Second_Answer_EF;
CREATE VIEW Second_Answer_EF AS
SELECT user, casen, answer Second_Answer
FROM DescEKGCases
WHERE stage = 'Description Hypothesis' AND ctype = 'EF';

SELECT E1.user, E1.casen, First_Answer, confidence, Second_Answer
FROM First_Answer_EF E1, Second_Answer_EF E2
WHERE E1.user = E2.user AND E1.casen = E2.casen;

### AVG confidence for same answer

In [None]:
SELECT E1.casen, AVG(confidence) AVG_confidence, COUNT(*)
FROM First_Answer_EF E1, Second_Answer_EF E2
WHERE E1.user = E2.user AND E1.casen = E2.casen AND First_Answer = Second_Answer
GROUP BY E1.casen;

### AVG confidence for different answer

In [None]:
SELECT E1.casen, AVG(confidence) AVG_confidence, COUNT(*)
FROM First_Answer_EF E1, Second_Answer_EF E2
WHERE E1.user = E2.user AND E1.casen = E2.casen AND First_Answer <> Second_Answer
GROUP BY E1.casen;

# Question 4

The time to accomplish the first task between the groups.

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_DF;
CREATE VIEW Confidence_AVG_DF AS
SELECT casen, AVG(time) AVG_Time_Description_First
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_EF;
CREATE VIEW Confidence_AVG_EF AS
SELECT casen, AVG(time) AVG_Time_EKG_First
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT DF.casen, AVG_Time_Description_First, AVG_Time_EKG_First
FROM Confidence_AVG_DF DF, Confidence_AVG_EF EF
WHERE DF.casen = EF.casen;

# Time in the EKG

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_DF;
CREATE VIEW Confidence_AVG_DF AS
SELECT casen, AVG(time) AVG_Time_Description_First
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_EF;
CREATE VIEW Confidence_AVG_EF AS
SELECT casen, AVG(time) AVG_Time_EKG_First
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT DF.casen, AVG_Time_Description_First, AVG_Time_EKG_First
FROM Confidence_AVG_DF DF, Confidence_AVG_EF EF
WHERE DF.casen = EF.casen;

# Time in the Description

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_DF;
CREATE VIEW Confidence_AVG_DF AS
SELECT casen, AVG(time) AVG_Time_Description_First
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_EF;
CREATE VIEW Confidence_AVG_EF AS
SELECT casen, AVG(time) AVG_Time_EKG_First
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT DF.casen, AVG_Time_Description_First, AVG_Time_EKG_First
FROM Confidence_AVG_DF DF, Confidence_AVG_EF EF
WHERE DF.casen = EF.casen;

# AVG confidence first task

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_DF;
CREATE VIEW Confidence_AVG_DF AS
SELECT casen, AVG(confidence) AVG_Confidence_Description_First
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_EF;
CREATE VIEW Confidence_AVG_EF AS
SELECT casen, AVG(confidence) AVG_Confidence_EKG_First
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT DF.casen, AVG_Confidence_Description_First, AVG_Confidence_EKG_First
FROM Confidence_AVG_DF DF, Confidence_AVG_EF EF
WHERE DF.casen = EF.casen;

# AVG confidence second task

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_DF;
CREATE VIEW Confidence_AVG_DF AS
SELECT casen, AVG(confidence) AVG_Confidence_Description_First
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_EF;
CREATE VIEW Confidence_AVG_EF AS
SELECT casen, AVG(confidence) AVG_Confidence_EKG_First
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT DF.casen, AVG_Confidence_Description_First, AVG_Confidence_EKG_First
FROM Confidence_AVG_DF DF, Confidence_AVG_EF EF
WHERE DF.casen = EF.casen;

# Question 5

The time between the first and the second task.

## Description First

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_Task1_DF;
CREATE VIEW Confidence_AVG_Task1_DF AS
SELECT casen, AVG(time) AVG_Time_Task1
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_Task2_DF;
CREATE VIEW Confidence_AVG_Task2_DF AS
SELECT casen, AVG(time) AVG_Time_Task2
FROM DescEKGCases
WHERE ctype='DF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT T1.casen, AVG_Time_Task1, AVG_Time_Task2
FROM Confidence_AVG_Task1_DF T1, Confidence_AVG_Task2_DF T2
WHERE T1.casen = T2.casen;

## EKG First

In [None]:
DROP VIEW IF EXISTS Confidence_AVG_Task1_EF;
CREATE VIEW Confidence_AVG_Task1_EF AS
SELECT casen, AVG(time) AVG_Time_Task1
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'EKG Hypothesis'
GROUP BY casen
ORDER BY casen;

DROP VIEW IF EXISTS Confidence_AVG_Task2_EF;
CREATE VIEW Confidence_AVG_Task2_EF AS
SELECT casen, AVG(time) AVG_Time_Task2
FROM DescEKGCases
WHERE ctype='EF' AND stage = 'Description Hypothesis'
GROUP BY casen
ORDER BY casen;

SELECT T1.casen, AVG_Time_Task1, AVG_Time_Task2
FROM Confidence_AVG_Task1_EF T1, Confidence_AVG_Task2_EF T2
WHERE T1.casen = T2.casen;

# Question 6

The confidence level in the interpretation of EKG can indicate their proficiency.