In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
display_table('Logs')

+----+-----+
| id | num |
+----+-----+
| 1  |  1  |
| 2  |  1  |
| 3  |  1  |
| 4  |  2  |
| 5  |  1  |
| 6  |  2  |
| 7  |  2  |
+----+-----+


### Write an SQL query to find all numbers that appear at least three times consecutively.
```
+-----------------+
| ConsecutiveNums |
+-----------------+
| 1               |
+-----------------+
Explanation: 1 is the only number that appears consecutively for at least three times.
```

# Using IN()

In [3]:
%%sql 

SELECT 
    DISTINCT num AS ConsecutiveNums
FROM Logs
WHERE (id+1, num) IN (SELECT id,num FROM Logs) AND 
        (id+2, num) IN (SELECT id,num FROM Logs)

ConsecutiveNums
1


# Using LAG() and LEAD()

In [4]:
%%sql 

SELECT 
    num,
    LAG(num) OVER (ORDER BY id) AS previous_number,
    LEAD(num) OVER (ORDER BY id) AS next_number
FROM Logs 

num,previous_number,next_number
1,,1.0
1,1.0,1.0
1,1.0,2.0
2,1.0,1.0
1,2.0,2.0
2,1.0,2.0
2,2.0,


In [5]:
%%sql 

SELECT 
    DISTINCT num AS ConsecutiveNums
FROM (
    SELECT 
        num,
        LAG(num) OVER (ORDER BY id) AS previous_number,
        LEAD(num) OVER (ORDER BY id) AS next_number
    FROM Logs
) AS inner_table
WHERE num = previous_number AND num = next_number;

ConsecutiveNums
1


## Using SELF-JOIN

#### *Fails test case when num is set to -1*

In [6]:
%%sql 

SELECT 
    log1.num 
FROM Logs log1
JOIN Logs log2 ON log1.id = log2.id - 1
WHERE log1.num = log2.num


num
1
1
2


In [7]:
%%sql 

SELECT 
    log1.num 
FROM Logs log1
JOIN Logs log2 ON log1.id = log2.id - 1
JOIN Logs log3 ON log2.id = log3.id - 1
WHERE log1.num = log2.num

num
1
1


In [8]:
%%sql 

SELECT 
    DISTINCT log1.num AS ConsecutiveNums
FROM Logs log1
JOIN Logs log2 ON log1.id = log2.id - 1
JOIN Logs log3 ON log2.id = log3.id - 1
WHERE log1.num = log2.num = log3.num

ConsecutiveNums
1


## Using LAG()

In [9]:
%%sql 

SELECT 
    num,
    LAG(num, 1) OVER (ORDER BY id) AS prev1,
    LAG(num, 2) OVER (ORDER BY id) AS prev2 
FROM Logs 

num,prev1,prev2
1,,
1,1.0,
1,1.0,1.0
2,1.0,1.0
1,2.0,1.0
2,1.0,2.0
2,2.0,1.0


In [10]:
%%sql 

SELECT 
    num AS ConsecutiveNums
FROM (
    SELECT 
        num,
        LAG(num, 1) OVER (ORDER BY id) AS prev1,
        LAG(num, 2) OVER (ORDER BY id) AS prev2
    FROM Logs 
) AS inner_table 
WHERE num = prev1 AND num = prev2

ConsecutiveNums
1


In [11]:
%%sql 

SELECT 
    DISTINCT num AS ConsecutiveNums
FROM (
    SELECT 
        num,
        LAG(num, 1) OVER (ORDER BY id) AS prev1,
        LAG(num, 2) OVER (ORDER BY id) AS prev2
    FROM Logs 
) AS inner_table 
WHERE num = prev1 and num = prev2

ConsecutiveNums
1


# Using Pandas

In [12]:
logs_query = %sql SELECT * FROM Logs # type: ignore 
logs_df = logs_query.DataFrame()

display(logs_df)

Unnamed: 0,id,num
0,1,1
1,2,1
2,3,1
3,4,2
4,5,1
5,6,2
6,7,2


### The `shift()` method enables to shift values in a column up or down by a specified number of periods.   

In [13]:
logs_df['num'].shift(1)

0    NaN
1    1.0
2    1.0
3    1.0
4    2.0
5    1.0
6    2.0
Name: num, dtype: float64

In [14]:
logs_df['num'].shift(2)

0    NaN
1    NaN
2    1.0
3    1.0
4    1.0
5    2.0
6    1.0
Name: num, dtype: float64

In [15]:
logs_df['lagged_num_1'] = logs_df['num'].shift(1)
logs_df

Unnamed: 0,id,num,lagged_num_1
0,1,1,
1,2,1,1.0
2,3,1,1.0
3,4,2,1.0
4,5,1,2.0
5,6,2,1.0
6,7,2,2.0


In [16]:
logs_df['lagged_num_2'] = logs_df['num'].shift(2)
logs_df

Unnamed: 0,id,num,lagged_num_1,lagged_num_2
0,1,1,,
1,2,1,1.0,
2,3,1,1.0,1.0
3,4,2,1.0,1.0
4,5,1,2.0,1.0
5,6,2,1.0,2.0
6,7,2,2.0,1.0


In [17]:
# To accomodate the presence of `NaN`, the resulting column's data type is automatically
# changed to a floating-point type('float64')
logs_df['num'] = logs_df['num'].astype(float)
logs_df

Unnamed: 0,id,num,lagged_num_1,lagged_num_2
0,1,1.0,,
1,2,1.0,1.0,
2,3,1.0,1.0,1.0
3,4,2.0,1.0,1.0
4,5,1.0,2.0,1.0
5,6,2.0,1.0,2.0
6,7,2.0,2.0,1.0


In [18]:
logs_df.query("num == lagged_num_1 == lagged_num_2")

Unnamed: 0,id,num,lagged_num_1,lagged_num_2
2,3,1.0,1.0,1.0


In [19]:
# Cast the num as int
logs_df.query("num == lagged_num_1 == lagged_num_2")[['num']].astype(int)

Unnamed: 0,num
2,1


In [20]:
# Rename the column
logs_df.query("num == lagged_num_1 == lagged_num_2")[['num']] \
    .astype(int) \
    .rename(columns={'num': 'ConsecutiveNums'})

Unnamed: 0,ConsecutiveNums
2,1
