In [1]:
%run helper/setup_notebook.ipynb import display_table

Successfully connected to leetcode50 database.


In [2]:
display_table('weather')

+----+------------+-------------+
| id | recordDate | temperature |
+----+------------+-------------+
| 1  | 2015-01-01 |      10     |
| 2  | 2015-01-02 |      25     |
| 3  | 2015-01-03 |      20     |
| 4  | 2015-01-04 |      30     |
+----+------------+-------------+


### Write an SQL query to find all dates' Id with higher temperatures compared to its previous dates (yesterday).

```
Weather table:
+----+------------+-------------+
| id | recordDate | temperature |
+----+------------+-------------+
| 1  | 2015-01-01 | 10          |
| 2  | 2015-01-02 | 25          |
| 3  | 2015-01-03 | 20          |
| 4  | 2015-01-04 | 30          |
+----+------------+-------------+
Output: 
+----+
| id |
+----+
| 2  |
| 4  |
+----+
Explanation: 
In 2015-01-02, the temperature was higher than the previous day (10 -> 25).
In 2015-01-04, the temperature was higher than the previous day (20 -> 30).
```

# Using CTE()

In [3]:
%%sql 
WITH yesterday AS (
    SELECT 
        id, recordDate, temperature
    FROM weather
)
SELECT weather.id
FROM yesterday
JOIN weather ON yesterday.id != weather.id
WHERE weather.temperature > yesterday.temperature
AND DATEDIFF(weather.recordDate, yesterday.recordDate) = 1

id
2
4


# Using INNER JOIN

In [4]:
%%sql 

SELECT *
FROM weather
INNER JOIN weather yesterday ON weather.id <> yesterday.id

id,recordDate,temperature,id_1,recordDate_1,temperature_1
4,2015-01-04,30,1,2015-01-01,10
3,2015-01-03,20,1,2015-01-01,10
2,2015-01-02,25,1,2015-01-01,10
4,2015-01-04,30,2,2015-01-02,25
3,2015-01-03,20,2,2015-01-02,25
1,2015-01-01,10,2,2015-01-02,25
4,2015-01-04,30,3,2015-01-03,20
2,2015-01-02,25,3,2015-01-03,20
1,2015-01-01,10,3,2015-01-03,20
3,2015-01-03,20,4,2015-01-04,30


In [5]:
%%sql 

SELECT *
FROM weather
INNER JOIN weather yesterday ON weather.id <> yesterday.id 
WHERE DATEDIFF(weather.recordDate, yesterday.recordDate) = 1

id,recordDate,temperature,id_1,recordDate_1,temperature_1
2,2015-01-02,25,1,2015-01-01,10
3,2015-01-03,20,2,2015-01-02,25
4,2015-01-04,30,3,2015-01-03,20


In [6]:
%%sql 

SELECT *
FROM weather
INNER JOIN weather yesterday ON weather.id <> yesterday.id 
WHERE DATEDIFF(weather.recordDate, yesterday.recordDate) = 1
AND weather.temperature > yesterday.temperature

id,recordDate,temperature,id_1,recordDate_1,temperature_1
2,2015-01-02,25,1,2015-01-01,10
4,2015-01-04,30,3,2015-01-03,20


In [7]:
%%sql 

SELECT weather.id
FROM weather
INNER JOIN weather yesterday ON weather.id <> yesterday.id 
WHERE DATEDIFF(weather.recordDate, yesterday.recordDate) = 1
AND weather.temperature > yesterday.temperature

id
2
4


# Using Pandas

In [8]:
import pandas as pd 

In [9]:
weather_query = %sql SELECT * FROM weather # type: ignore 
weather_df = weather_query.DataFrame()
weather_df

Unnamed: 0,id,recordDate,temperature
0,1,2015-01-01,10
1,2,2015-01-02,25
2,3,2015-01-03,20
3,4,2015-01-04,30


In [10]:
# check to make sure date column is not typecasted to object
weather_df.dtypes

id              int64
recordDate     object
temperature     int64
dtype: object

In [11]:
# convert 'recordDate' column to datetime type
weather_df['recordDate'] = pd.to_datetime(weather_df['recordDate']) 

In [12]:
weather_df.dtypes

id                      int64
recordDate     datetime64[ns]
temperature             int64
dtype: object

In [13]:
# make a deep copy of the weather_df
yesterday_weather_df = weather_df.copy()

## shift()

In [14]:
# create a lagged version of the weather_df where the values are shifted by one row
yesterday_weather_df = weather_df.shift(1)
yesterday_weather_df

Unnamed: 0,id,recordDate,temperature
0,,NaT,
1,1.0,2015-01-01,10.0
2,2.0,2015-01-02,25.0
3,3.0,2015-01-03,20.0


In [15]:

# 'id' of the current day should not be equal to 'id' of the previous day.
weather_df[
    (weather_df['id'] != yesterday_weather_df['id'])
]


Unnamed: 0,id,recordDate,temperature
0,1,2015-01-01,10
1,2,2015-01-02,25
2,3,2015-01-03,20
3,4,2015-01-04,30


In [16]:
# 'id' of the current day should not be equal to 'id' of the previous day.
# 'temperature' of the current day should be greater than 'temperature' of the previous day.
weather_df[
    (weather_df['id'] != yesterday_weather_df['id']) &
    (weather_df['temperature'] > yesterday_weather_df['temperature'])
]

Unnamed: 0,id,recordDate,temperature
1,2,2015-01-02,25
3,4,2015-01-04,30


In [17]:
# 'id' of the current day should not be equal to 'id' of the previous day.
# 'temperature' of the current day should be greater than 'temperature' of the previous day.
# the difference in 'recordDate' between the current day and the previous day should be exactly one day.
weather_df[
    (weather_df['id'] != yesterday_weather_df['id']) &
    (weather_df['temperature'] > yesterday_weather_df['temperature']) &
    (weather_df['recordDate'] == yesterday_weather_df['recordDate'] + pd.Timedelta(days=1))
]

Unnamed: 0,id,recordDate,temperature
1,2,2015-01-02,25
3,4,2015-01-04,30


In [18]:
# display only the id column and drop index to match MySQL output 
weather_df[
    (weather_df['id'] != yesterday_weather_df['id']) &
    (weather_df['temperature'] > yesterday_weather_df['temperature']) &
    (weather_df['recordDate'] == yesterday_weather_df['recordDate'] + pd.Timedelta(days=1))
][['id']].set_index('id', drop=True)

2
4


## merge()

In [19]:
# increment recordDate by 1 
yesterday_weather_df['recordDate'] = yesterday_weather_df['recordDate'] + pd.Timedelta(days=1)
yesterday_weather_df

Unnamed: 0,id,recordDate,temperature
0,,NaT,
1,1.0,2015-01-02,10.0
2,2.0,2015-01-03,25.0
3,3.0,2015-01-04,20.0


In [20]:
merged_df = weather_df.merge(yesterday_weather_df, on='recordDate')
merged_df

Unnamed: 0,id_x,recordDate,temperature_x,id_y,temperature_y
0,2,2015-01-02,25,1.0,10.0
1,3,2015-01-03,20,2.0,25.0
2,4,2015-01-04,30,3.0,20.0


In [21]:
merged_df = weather_df.merge(yesterday_weather_df, 
                             on='recordDate', 
                             suffixes=('', '_yesterday'))
merged_df

Unnamed: 0,id,recordDate,temperature,id_yesterday,temperature_yesterday
0,2,2015-01-02,25,1.0,10.0
1,3,2015-01-03,20,2.0,25.0
2,4,2015-01-04,30,3.0,20.0


In [22]:
merged_df[
    (merged_df['id'] != merged_df['id_yesterday']) &
    (merged_df['temperature'] > merged_df['temperature_yesterday'])]

Unnamed: 0,id,recordDate,temperature,id_yesterday,temperature_yesterday
0,2,2015-01-02,25,1.0,10.0
2,4,2015-01-04,30,3.0,20.0


In [23]:
# only display the id column
merged_df[
    (merged_df['id'] != merged_df['id_yesterday']) &
    (merged_df['temperature'] > merged_df['temperature_yesterday'])
][['id']]

Unnamed: 0,id
0,2
2,4


In [24]:
merged_df[
    (merged_df['id'] != merged_df['id_yesterday']) &
    (merged_df['temperature'] > merged_df['temperature_yesterday'])] \
        [['id']].set_index('id', drop=True)

2
4
