# SQL Practice

The dataset and exercises are from [kiwidamien repository](https://github.com/kiwidamien/SQL_practice)

In [3]:
import pandas as pd
from pandasql import sqldf
import sqlite3
import math

In [4]:
pysqldf = lambda q: sqldf(q, globals())

## 1. Seattle Weather

In [6]:
weather = pd.read_csv("01_seattle_weather/cleaned_weather.csv")
conn_1 = sqlite3.connect(":memory:")
weather.to_sql('weather', conn_1, index=False)


25548

**1. Select all rows from December 1st, 2000 to December 15th, 2000 (inclusive)**

In [8]:
query = """
SELECT *
FROM weather
WHERE DATE >= "2000-12-01"
AND DATE <= "2000-12-15"
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

          DATE  PRCP  TMAX  TMIN  RAIN
0   2000-12-01  0.04    55    39     1
1   2000-12-02  0.18    51    37     1
2   2000-12-03  0.00    44    34     0
3   2000-12-04  0.00    51    37     0
4   2000-12-05  0.00    50    36     0
5   2000-12-06  0.00    50    35     0
6   2000-12-07  0.00    40    34     0
7   2000-12-08  0.02    45    30     1
8   2000-12-09  0.06    43    36     1
9   2000-12-10  0.00    40    30     0
10  2000-12-11  0.00    37    28     0
11  2000-12-12  0.00    37    28     0
12  2000-12-13  0.00    37    30     0
13  2000-12-14  0.26    46    28     1
14  2000-12-15  0.00    42    35     0


**2. Get the average maximum temperature for every year from the year 2000 onward. Order the results by year (ascending)**

In [10]:
query = """
SELECT STRFTIME('%Y', DATE) AS YEAR,
AVG(TMAX)
FROM weather
GROUP BY YEAR
HAVING YEAR >= "2000"
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

    YEAR  AVG(TMAX)
0   2000  58.674863
1   2001  58.473973
2   2002  58.893151
3   2003  60.441096
4   2004  60.622951
5   2005  60.148352
6   2006  61.038356
7   2007  59.202740
8   2008  58.494536
9   2009  59.912329
10  2010  59.663014
11  2011  58.139726
12  2012  59.502732
13  2013  60.901370
14  2014  62.594521
15  2015  63.369863
16  2016  62.546448
17  2017  61.727011


**3 Get the standard deviation of the maximum temperature per year, from 2000 onward. Order by year (ascending)**

In [12]:
conn_1.create_function("SQRT", 1, math.sqrt)

In [13]:
query = """
SELECT 
    STRFTIME('%Y', DATE) AS YEAR,
    AVG(TMAX) AS mean_TMAX,
    AVG(TMAX * TMAX) - AVG(TMAX) * AVG(TMAX) AS variance_TMAX,
    SQRT(AVG(TMAX * TMAX) - AVG(TMAX) * AVG(TMAX)) AS standard_deviation_TMAX
FROM weather
GROUP BY YEAR
HAVING YEAR >= "2000"
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

    YEAR  mean_TMAX  variance_TMAX  standard_deviation_TMAX
0   2000  58.674863     131.574614                11.470598
1   2001  58.473973     124.545213                11.159983
2   2002  58.893151     151.026939                12.289302
3   2003  60.441096     165.238311                12.854505
4   2004  60.622951     158.606468                12.593906
5   2005  60.148352     141.054915                11.876654
6   2006  61.038356     169.790310                13.030361
7   2007  59.202740     166.391773                12.899294
8   2008  58.494536     168.463085                12.979333
9   2009  59.912329     202.019711                14.213364
10  2010  59.663014     124.196029                11.144327
11  2011  58.139726     154.635271                12.435243
12  2012  59.502732     161.954911                12.726151
13  2013  60.901370     184.516299                13.583678
14  2014  62.594521     170.723258                13.066111
15  2015  63.369863     173.285119      

**4. What are the 10 hottest days on record? Take hottest to mean 'highest maximum temperature'**

In [29]:
query = """
SELECT *
FROM weather
ORDER BY TMAX DESC
LIMIT 10
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

         DATE  PRCP  TMAX  TMIN  RAIN
0  2009-07-29   0.0   103    71     0
1  1994-07-20   0.0   100    65     0
2  1960-08-09   0.0    99    59     0
3  1981-08-09   0.0    99    68     0
4  1991-07-23   0.0    99    65     0
5  1960-08-08   0.0    98    66     0
6  1967-08-16   0.0    98    59     0
7  1979-07-16   0.0    98    63     0
8  1981-08-10   0.0    98    67     0
9  1988-09-02   0.0    98    59     0


**5. In 2016, what fraction of days did it rain?**

In [38]:
query = """
SELECT AVG(RAIN) AS rain_fraction_2016
FROM weather
WHERE DATE >= "2016-01-01"
AND DATE <= "2016-12-31"
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   rain_fraction_2016
0            0.469945


**6. What is the 75th percentile for the amount of rain that fell on a day where there was some rain in 2016?**

In [67]:
query = """
SELECT PRCP
FROM (
    SELECT PRCP
    FROM weather
    WHERE 
        PRCP > 0 AND
        STRFTIME('%Y', DATE) = '2016'
    ORDER BY PRCP
    LIMIT 1 OFFSET (
        (SELECT COUNT(*) * 0.75 
         FROM weather
         WHERE PRCP > 0 AND STRFTIME('%Y', DATE) = '2016')
    )
) AS sub
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   PRCP
0  0.33


**7. What is the 75th percentile for the amount of rain that fell on any day in 2016?**

In [127]:
query = """
SELECT PRCP
    FROM weather
    WHERE 
        STRFTIME('%Y', DATE) = '2016'
    ORDER BY PRCP
    LIMIT 1 OFFSET ROUND((SELECT COUNT(*) * 0.75 
         FROM weather
         WHERE STRFTIME('%Y', DATE) = '2016'))
    
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   PRCP
0  0.15


**8. Get the 10 years with the hottest average maximum temperature in July. Order from hottest to coolest**

In [168]:
query = """
SELECT 
    STRFTIME('%Y', DATE) AS YEAR,
    AVG(TMAX) AS avg_july_high_temp
FROM weather   
WHERE STRFTIME('%m', DATE) = "07"
GROUP BY YEAR
ORDER BY avg_july_high_temp DESC
LIMIT 10
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   YEAR  avg_july_high_temp
0  2015           82.580645
1  1958           81.419355
2  2009           80.967742
3  1985           80.935484
4  2014           80.419355
5  1960           79.645161
6  1965           79.451613
7  1990           79.193548
8  2013           78.967742
9  2003           78.967742


**9. Get the 10 years with the coldest average minimum temperature in December. Order from coolest to hottest**

In [175]:
query = """
SELECT 
    STRFTIME('%Y', DATE) AS YEAR,
    AVG(TMIN) AS avg_dec_min_temp
FROM weather   
WHERE STRFTIME('%m', DATE) = "12"
GROUP BY YEAR
ORDER BY avg_dec_min_temp ASC
LIMIT 10
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   YEAR  avg_dec_min_temp
0  1990         30.387097
1  1948         30.806452
2  1985         30.935484
3  1951         31.225806
4  1964         31.483871
5  1983         31.516129
6  1968         32.032258
7  1984         32.096774
8  2009         32.096774
9  1978         32.161290


**10. Repeat the last question, but round the temperatures to 3 decimal places**

In [181]:
query = """
SELECT 
    STRFTIME('%Y', DATE) AS YEAR,
    ROUND(AVG(TMIN), 3) AS avg_dec_min_temp
FROM weather   
WHERE STRFTIME('%m', DATE) = "12"
GROUP BY YEAR
ORDER BY avg_dec_min_temp ASC
LIMIT 10
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   YEAR  avg_dec_min_temp
0  1990            30.387
1  1948            30.806
2  1985            30.935
3  1951            31.226
4  1964            31.484
5  1983            31.516
6  1968            32.032
7  1984            32.097
8  2009            32.097
9  1978            32.161


**11. Give the average inches of rain that fell per day for each month, where the average is taken over 2000 - 2010 (inclusive)**

In [234]:
query = """
SELECT
    STRFTIME('%m', DATE) AS month,
    AVG(PRCP) as avg_daily_inches_rain
FROM weather
WHERE STRFTIME('%Y', DATE) >= "2000" AND STRFTIME('%Y', DATE) <= "2010"
GROUP BY month
ORDER BY month ASC
"""

result =  pd.read_sql_query(query, conn_1)
print(result)

   month  avg_daily_inches_rain
0     01               0.191613
1     02               0.094277
2     03               0.113578
3     04               0.085364
4     05               0.068035
5     06               0.050182
6     07               0.016129
7     08               0.034370
8     09               0.056930
9     10               0.115543
10    11               0.216242
11    12               0.180059
