In [None]:
# !pip3 install ipython-sql

In [60]:
import os
import pandas as pd

from vizro import Vizro; Vizro._reset()
import vizro.models as vm
from vizro.models.types import capture
# import vizro.plotly.express as px
import plotly.graph_objects as go
import plotly.express as px

In [None]:
%load_ext sql

In [None]:
!psql postgresql://postgres:postgres@localhost:5432/postgres -tc \
"SELECT 1 FROM pg_database WHERE datname = 'wind_energy_magic'" | grep -q 1 \
&& echo "Database already exists. Doing nothing" \
|| (echo "Creating database..."; \
psql postgresql://postgres:postgres@localhost:5432/postgres -c "CREATE DATABASE wind_energy_magic;")

This command works as follows:

The first psql command (-tc "SELECT 1 FROM pg_database WHERE datname = 'wind_energy_magic'") checks if a database named wind_energy_magic exists. The -t flag suppresses psql's normal output, and -c is used to run the given SQL command.
The grep -q 1 part checks if the output of the previous command contains 1 (which indicates that the database exists). The -q flag makes grep quiet; it doesn't output the lines, it just returns 0 (true) if the string was found, and 1 (false) if not.
The || is a logical OR operator in shell scripting. If the grep command returns false (meaning the database does not exist), the command after || is executed.
The second psql command after || (-c "CREATE DATABASE wind_energy_magic;") creates the database.
This is a non-standard but effective way to conditionally create a database in PostgreSQL when working from a command line environment like a Jupyter Notebook. Remember, always exercise caution when executing shell commands, especially when they involve database operations.

Yes, you can modify the command to include a print statement for cases where the database already exists. In a shell command, you can use echo for printing messages. Here's an updated version of the previous command with added print statements:

This command works as follows:

It first checks if the wind_energy_magic database exists.
If the database exists (grep -q 1 returns true), it executes echo "Database already exists. Doing nothing" after the &&.
If the database does not exist (grep -q 1 returns false), it executes the commands after ||, which are:
echo "Creating database..." to print a message indicating that the database is being created.
The psql command to actually create the database.
This way, you will get a clear message in your Jupyter Notebook output indicating whether the database was created or if it already existed.

In [None]:
# Establish connection with database
%sql postgresql+psycopg2://postgres:postgres@localhost:5432/wind_energy_magic

In [None]:
%%sql
CREATE SCHEMA IF NOT EXISTS wind_sites;

CREATE TABLE IF NOT EXISTS wind_sites.upd_wind_site (
    id SERIAL PRIMARY KEY,
    date_time TIMESTAMP NOT NULL,
    wind_speed DECIMAL NOT NULL,
    gust_speed DECIMAL NOT NULL,
    wind_direction DECIMAL NOT NULL
);

In [None]:
import os
os.environ["PGPASSWORD"] = "postgres"

!psql -h localhost -U postgres -d wind_energy_magic -tc \
"SELECT count(*) FROM wind_sites.upd_wind_site" | grep -q '^0$' \
&& psql -h localhost -U postgres -d wind_energy_magic -c \
"\\copy wind_sites.upd_wind_site (date_time,wind_speed,gust_speed,wind_direction) \
FROM '/Users/gioabeleda/Desktop/wind-energy-dashboard/data/wind_energy.csv' \
DELIMITER ',' CSV HEADER;" \
|| echo "Data already exists in the table. Doing nothing."

In [None]:
%%sql monthly_data_availability <<

SELECT 
    EXTRACT(YEAR FROM date_time) AS year,
    EXTRACT(MONTH FROM date_time) AS month,
	TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    COUNT(DISTINCT EXTRACT(DAY FROM date_time)) as days_count
FROM 
    wind_sites.upd_wind_site
GROUP BY 1,2,3
;

In [None]:
%%sql diurnal_variation_daily <<

SELECT 
    EXTRACT(YEAR FROM date_time) AS year,
    EXTRACT(MONTH FROM date_time) AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    EXTRACT(DAY FROM date_time) AS day,
    EXTRACT(HOUR FROM date_time) + 1 AS hour,
   ROUND(AVG(wind_speed),3) AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3,4,5
;

In [None]:
%%sql diurnal_variation_monthly <<

SELECT 
    EXTRACT(YEAR FROM date_time) AS year,
    EXTRACT(MONTH FROM date_time) AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    EXTRACT(HOUR FROM date_time) + 1 AS hour,
    ROUND(AVG(wind_speed),3) AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3,4
;

In [None]:
%%sql diurnal_variation_yearly <<

SELECT 
    EXTRACT(YEAR FROM date_time) AS year,
    EXTRACT(HOUR FROM date_time) + 1 AS hour,
   ROUND(AVG(wind_speed),3) AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2
;

In [None]:
%%sql frequency_distribution_monthly <<

WITH MaxWindSpeed AS (
	SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
	SELECT 
		EXTRACT(YEAR FROM date_time) AS year,
		EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
		wind_speed, 
		width_bucket(
			CAST(wind_speed AS float),
			0,
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer), 
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer) 
		) AS speed_bin
	FROM wind_sites.upd_wind_site
), 
MonthlyCounts AS (
    SELECT
        year,
        month,
        year_month,
        speed_bin,
        COUNT(*) AS frequency,
        SUM(COUNT(*)) OVER (PARTITION BY year,month) AS monthly_total
    FROM BinnedSpeed
    GROUP BY 1,2,3,4
)
SELECT
    year,
    month,
    year_month,
    speed_bin,
    frequency,
    ROUND((frequency / monthly_total) * 100,3) as percent_frequency
FROM MonthlyCounts
ORDER BY 1,2,3,4
;

In [None]:
%%sql frequency_distribution_yearly <<

WITH MaxWindSpeed AS(
	SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
	SELECT 
		EXTRACT(YEAR FROM date_time) AS year,
		wind_speed, 
		width_bucket(
			CAST(wind_speed AS double precision),
			0,
			CAST((SELECT max_speed FROM MaxWindSpeed) AS double precision), 
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer) 
		) AS speed_bin
	FROM wind_sites.upd_wind_site
), 
YearlyCounts AS (
    SELECT
        year,
        speed_bin,
        COUNT(*) AS frequency,
        SUM(COUNT(*)) OVER (PARTITION BY year) AS yearly_total
    FROM BinnedSpeed
    GROUP BY 1,2
)
SELECT
    year,
    speed_bin,
    frequency,
    ROUND((frequency / yearly_total) * 100,3) as percent_frequency
FROM YearlyCounts
ORDER BY 1,2
;

In [None]:
%%sql wind_rose_by_hour_daily <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        EXTRACT(DAY FROM date_time) AS day,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3,4,5
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        month,
        year_month,
        day,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        day,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        day,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5,6
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        day,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3,4
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.day,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month AND
        f.day = tc.day
    ORDER BY 1,2,3,4,5,6
)
SELECT
    year,
    month,
    year_month,
    day,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, day, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5,6
;

In [None]:
%%sql wind_rose_by_hour_monthly <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3,4
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        month,
        year_month,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month
    ORDER BY 1,2,3,4,5
)
SELECT
    year,
    month,
    year_month,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5
;

In [None]:
%%sql wind_rose_by_hour_yearly <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    -- WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3
),
TotalCounts AS (
    SELECT
        year,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1
),
PercentFrequency AS (
    SELECT
        f.year,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year
    ORDER BY 1,2,3
)
SELECT
    year,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3
;

In [None]:
%%sql wind_rose_raw_daily <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
        EXTRACT(DAY FROM date_time) AS day,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        day,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        day,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5,6
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        day,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3,4
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.day,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month AND
        f.day = tc.day
    ORDER BY 1,2,3,4,5,6
)
SELECT
    year,
    month,
    year_month,
    day,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, day, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5,6
;

In [None]:
%%sql wind_rose_raw_monthly <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month 
    ORDER BY 1,2,3,4,5
)
SELECT
    year,
    month,
    year_month,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5
;

In [None]:
%%sql wind_rose_raw_yearly <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3
),
TotalCounts AS (
    SELECT
        year,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1
),
PercentFrequency AS (
    SELECT
        f.year,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year
    ORDER BY 1,2,3
)
SELECT
    year,
    cardinal_direction,
    speed_bin,
    count_speed_bin,
    count_total,
    percent_frequency,
    SUM(percent_frequency) OVER (
        PARTITION BY year, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3
;

In [None]:
%%sql wind_stats_monthly <<

SELECT  
    EXTRACT(YEAR FROM date_time) AS year,
    EXTRACT(MONTH FROM date_time) AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
    MAX(wind_speed) AS monthly_max_speed,
    MIN(wind_speed) AS monthly_min_speed,
    ROUND(AVG(wind_speed),3) AS monthly_avg_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3
ORDER BY 1,2,3
;

In [None]:
%%sql wind_stats_max <<

WITH MonthlyStats AS (
    SELECT  
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        MAX(wind_speed) AS monthly_max_speed
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3
    ORDER BY 1,2,3
),
HourlyStats AS (
    SELECT 
    	EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month, 
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
		EXTRACT(DAY FROM date_time) AS day,
		EXTRACT(HOUR FROM date_time) AS hour,
		wind_speed
	FROM wind_sites.upd_wind_site
)
SELECT 
	hs.year,
	hs.month,
	hs.year_month,
	hs.day,
	hs.hour,
	hs.wind_speed
FROM 
	MonthlyStats AS ms
JOIN
	HourlyStats AS hs
ON
	ms.year_month = hs.year_month
WHERE 
	hs.wind_speed = ms.monthly_max_speed 
ORDER BY 1,2,3,4,5
;

In [None]:
%%sql wind_stats_min <<

WITH MonthlyStats AS (
    SELECT  
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        MIN(wind_speed) AS monthly_min_speed
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3
    ORDER BY 1,2,3
),
HourlyStats AS (
    SELECT 
    	EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month, 
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
		EXTRACT(DAY FROM date_time) AS day,
		EXTRACT(HOUR FROM date_time) AS hour,
		wind_speed
	FROM wind_sites.upd_wind_site
)
SELECT 
	hs.year,
	hs.month,
	hs.year_month,
	hs.day,
	hs.hour,
	hs.wind_speed
FROM 
	MonthlyStats AS ms
JOIN
	HourlyStats AS hs
ON
	ms.year_month = hs.year_month
WHERE 
	hs.wind_speed = ms.monthly_min_speed 
ORDER BY 1,2,3,4,5
;

In [None]:
%%sql yey <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(109 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
), Weibull AS (
	SELECT
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		CASE 	
			WHEN wind_shear > 0 THEN
				((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))
			ELSE
				0
		END AS f_v
	FROM
		TurbinePowerCurve
)
SELECT 
	year,
	month,
	year_month,
	avg_wind_speed,
	wind_shear,
	wind_turbine_speeds,
	power_curve,
	f_v,
	f_v * power_curve * 24 AS daily_yey,
	f_v * power_curve * 8760 AS yearly_yey
FROM
	Weibull
ORDER BY 1,2,3,4,5,6
;

In [None]:
%%sql sum_yey <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(109 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
), Weibull AS (
	SELECT
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		CASE 	
			WHEN wind_shear > 0 THEN
				((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))
			ELSE
				0
		END AS f_v
	FROM
		TurbinePowerCurve
),
YEY AS (
	SELECT 
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		f_v,
		f_v * power_curve * 24 AS daily_yey,
		f_v * power_curve * 8760 AS yearly_yey
	FROM
		Weibull
	ORDER BY 1,2,3,4,5,6	
)
SELECT
	year,
	month,
	year_month,
	wind_shear,
	ROUND(SUM(daily_yey::NUMERIC),4) AS daily_yey_for_month,
	ROUND(SUM(yearly_yey::NUMERIC),4) AS yearly_yey_for_month
FROM YEY
GROUP BY 1,2,3,4
;

In [None]:
%%sql estimate_k <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(111 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
)
SELECT
    year,
    month,
    year_month,
    avg_wind_speed,
    wind_shear,
    wind_turbine_speeds,
    power_curve,
    CASE 	
        WHEN wind_shear > 0 THEN
            ((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))
        ELSE
            0
    END AS f_v
FROM
    TurbinePowerCurve
ORDER BY 1,2,3,4,5,6
;

In [76]:
yey_df

Unnamed: 0,year,month,year_month,avg_wind_speed,wind_shear,wind_turbine_speeds,power_curve,f_v,daily_yey,yearly_yey
0,2010,1,2010-January,1.6571,1.8073,0,0,0.000000,0.0,0.0
1,2010,1,2010-January,1.6571,1.8073,0.5,0,0.226424,0.0,0.0
2,2010,1,2010-January,1.6571,1.8073,1.0,0,0.378122,0.0,0.0
3,2010,1,2010-January,1.6571,1.8073,1.5,0,0.419942,0.0,0.0
4,2010,1,2010-January,1.6571,1.8073,2.0,0,0.367604,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
607,2011,1,2011-January,0.0000,0.0000,23.0,105,0.000000,0.0,0.0
608,2011,1,2011-January,0.0000,0.0000,23.5,104,0.000000,0.0,0.0
609,2011,1,2011-January,0.0000,0.0000,24.0,103,0.000000,0.0,0.0
610,2011,1,2011-January,0.0000,0.0000,24.5,102,0.000000,0.0,0.0


In [78]:
diurnal_daily = pd.DataFrame(diurnal_variation_daily)
diurnal_monthly = pd.DataFrame(diurnal_variation_monthly)
diurnal_yearly = pd.DataFrame(diurnal_variation_yearly)

In [79]:
rose_hour_daily = pd.DataFrame(wind_rose_by_hour_daily)
rose_hour_monthly = pd.DataFrame(wind_rose_by_hour_monthly)
rose_hour_yearly = pd.DataFrame(wind_rose_by_hour_yearly)

In [80]:
rose_raw_daily_df = pd.DataFrame(wind_rose_raw_daily)
rose_raw_monthly_df= pd.DataFrame(wind_rose_raw_monthly)
rose_raw_yearly_df = pd.DataFrame(wind_rose_raw_yearly)

In [81]:
distribution_monthly_df = pd.DataFrame(frequency_distribution_monthly)
distribution_yearly_df = pd.DataFrame(frequency_distribution_yearly)

In [82]:
yey_df = pd.DataFrame(yey)
sum_yey_df = pd.DataFrame(sum_yey)

In [83]:
estimate_k_df = pd.DataFrame(estimate_k)