In [150]:
# !pip3 install ipython-sql

In [151]:
import os
import pandas as pd
import numpy as np

from vizro import Vizro; Vizro._reset()
import vizro.models as vm
from vizro.models.types import capture
import vizro.plotly.express as px
import plotly.graph_objects as go
import plotly.express as pxo

from scipy.optimize import curve_fit

In [152]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [153]:
!psql postgresql://postgres:postgres@localhost:5432/postgres -tc \
"SELECT 1 FROM pg_database WHERE datname = 'wind_energy_magic'" | grep -q 1 \
&& echo "Database already exists. Doing nothing" \
|| (echo "Creating database..."; \
psql postgresql://postgres:postgres@localhost:5432/postgres -c "CREATE DATABASE wind_energy_magic;")

Database already exists. Doing nothing


This command works as follows:

The first psql command (-tc "SELECT 1 FROM pg_database WHERE datname = 'wind_energy_magic'") checks if a database named wind_energy_magic exists. The -t flag suppresses psql's normal output, and -c is used to run the given SQL command.
The grep -q 1 part checks if the output of the previous command contains 1 (which indicates that the database exists). The -q flag makes grep quiet; it doesn't output the lines, it just returns 0 (true) if the string was found, and 1 (false) if not.
The || is a logical OR operator in shell scripting. If the grep command returns false (meaning the database does not exist), the command after || is executed.
The second psql command after || (-c "CREATE DATABASE wind_energy_magic;") creates the database.
This is a non-standard but effective way to conditionally create a database in PostgreSQL when working from a command line environment like a Jupyter Notebook. Remember, always exercise caution when executing shell commands, especially when they involve database operations.

Yes, you can modify the command to include a print statement for cases where the database already exists. In a shell command, you can use echo for printing messages. Here's an updated version of the previous command with added print statements:

This command works as follows:

It first checks if the wind_energy_magic database exists.
If the database exists (grep -q 1 returns true), it executes echo "Database already exists. Doing nothing" after the &&.
If the database does not exist (grep -q 1 returns false), it executes the commands after ||, which are:
echo "Creating database..." to print a message indicating that the database is being created.
The psql command to actually create the database.
This way, you will get a clear message in your Jupyter Notebook output indicating whether the database was created or if it already existed.

In [154]:
# Establish connection with database
%sql postgresql+psycopg2://postgres:postgres@localhost:5432/wind_energy_magic

In [155]:
%%sql
CREATE SCHEMA IF NOT EXISTS wind_sites;

CREATE TABLE IF NOT EXISTS wind_sites.upd_wind_site (
    id SERIAL PRIMARY KEY,
    date_time TIMESTAMP NOT NULL,
    wind_speed DECIMAL NOT NULL,
    gust_speed DECIMAL NOT NULL,
    wind_direction DECIMAL NOT NULL
);

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
Done.
Done.


[]

In [156]:
import os
os.environ["PGPASSWORD"] = "postgres"

!psql -h localhost -U postgres -d wind_energy_magic -tc \
"SELECT count(*) FROM wind_sites.upd_wind_site" | grep -q '^0$' \
&& psql -h localhost -U postgres -d wind_energy_magic -c \
"\\copy wind_sites.upd_wind_site (date_time,wind_speed,gust_speed,wind_direction) \
FROM '/Users/gioabeleda/Desktop/wind-energy-dashboard/data/wind_energy.csv' \
DELIMITER ',' CSV HEADER;" \
|| echo "Data already exists in the table. Doing nothing."

Data already exists in the table. Doing nothing.


In [157]:
%%sql monthly_data_availability <<

SELECT 
    EXTRACT(YEAR FROM date_time)::integer AS year,
    EXTRACT(MONTH FROM date_time)::integer AS month,
	TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    COUNT(DISTINCT EXTRACT(DAY FROM date_time))::integer as days_count
FROM 
    wind_sites.upd_wind_site
GROUP BY 1,2,3
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
12 rows affected.
Returning data to local variable monthly_data_availability


In [158]:
%%sql diurnal_variation_daily <<

SELECT 
    EXTRACT(YEAR FROM date_time)::integer AS year,
    EXTRACT(MONTH FROM date_time)::integer AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    EXTRACT(DAY FROM date_time)::integer AS day,
    (EXTRACT(HOUR FROM date_time) + 1)::integer AS hour,
   ROUND(AVG(wind_speed),3)::float AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3,4,5
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
7022 rows affected.
Returning data to local variable diurnal_variation_daily


In [159]:
%%sql diurnal_variation_monthly <<

SELECT 
    EXTRACT(YEAR FROM date_time)::integer AS year,
    EXTRACT(MONTH FROM date_time)::integer AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
    (EXTRACT(HOUR FROM date_time) + 1)::integer AS hour,
    ROUND(AVG(wind_speed),3)::float AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3,4
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
265 rows affected.
Returning data to local variable diurnal_variation_monthly


In [160]:
%%sql diurnal_variation_yearly <<

SELECT 
    EXTRACT(YEAR FROM date_time)::integer AS year,
    (EXTRACT(HOUR FROM date_time) + 1)::integer AS hour,
   ROUND(AVG(wind_speed),3)::float AS avg_wind_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
25 rows affected.
Returning data to local variable diurnal_variation_yearly


In [161]:
%%sql frequency_distribution_monthly <<

WITH MaxWindSpeed AS (
	SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
	SELECT 
		EXTRACT(YEAR FROM date_time) AS year,
		EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
		wind_speed, 
		width_bucket(
			CAST(wind_speed AS float),
			0,
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer), 
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer) 
		) AS speed_bin
	FROM wind_sites.upd_wind_site
), 
MonthlyCounts AS (
    SELECT
        year,
        month,
        year_month,
        speed_bin,
        COUNT(*) AS frequency,
        SUM(COUNT(*)) OVER (PARTITION BY year,month) AS monthly_total
    FROM BinnedSpeed
    GROUP BY 1,2,3,4
)
SELECT
    year::integer,
    month::integer,
    year_month,
    speed_bin::integer,
    frequency::integer,
    ROUND((frequency / monthly_total) * 100,3)::float as percent_frequency
FROM MonthlyCounts
ORDER BY 1,2,3,4
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
84 rows affected.
Returning data to local variable frequency_distribution_monthly


In [162]:
%%sql frequency_distribution_yearly <<

WITH MaxWindSpeed AS(
	SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
	SELECT 
		EXTRACT(YEAR FROM date_time) AS year,
		wind_speed, 
		width_bucket(
			CAST(wind_speed AS double precision),
			0,
			CAST((SELECT max_speed FROM MaxWindSpeed) AS double precision), 
			CAST((SELECT max_speed FROM MaxWindSpeed) AS integer) 
		) AS speed_bin
	FROM wind_sites.upd_wind_site
), 
YearlyCounts AS (
    SELECT
        year,
        speed_bin,
        COUNT(*) AS frequency,
        SUM(COUNT(*)) OVER (PARTITION BY year) AS yearly_total
    FROM BinnedSpeed
    GROUP BY 1,2
)
SELECT
    year::integer,
    speed_bin::integer,
    frequency::integer,
    ROUND((frequency / yearly_total) * 100,3)::float as percent_frequency
FROM YearlyCounts
ORDER BY 1,2
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
13 rows affected.
Returning data to local variable frequency_distribution_yearly


In [163]:
%%sql wind_rose_by_hour_daily <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        EXTRACT(DAY FROM date_time) AS day,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3,4,5
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        month,
        year_month,
        day,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        day,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        day,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5,6
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        day,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3,4
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.day,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month AND
        f.day = tc.day
    ORDER BY 1,2,3,4,5,6
)
SELECT
    year::integer,
    month::integer,
    year_month,
    day::integer,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, day, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5,6
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
3243 rows affected.
Returning data to local variable wind_rose_by_hour_daily


In [164]:
%%sql wind_rose_by_hour_monthly <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3,4
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        month,
        year_month,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month
    ORDER BY 1,2,3,4,5
)
SELECT
    year::integer,
    month::integer,
    year_month,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
75 rows affected.
Returning data to local variable wind_rose_by_hour_monthly


In [165]:
%%sql wind_rose_by_hour_yearly <<

WITH AvgHourlyWindSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(HOUR FROM date_time) + 1 AS hour,
        ROUND(AVG(wind_speed),3) AS avg_wind_speed,
        ROUND(AVG(wind_direction),3) AS avg_wind_direction
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2
),
MaxWindSpeed AS (
    SELECT CEIL(MAX(avg_wind_speed)) AS max_speed
	FROM AvgHourlyWindSpeed
),
BinnedSpeed AS (
    SELECT
        year,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        width_bucket(
            avg_wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM AvgHourlyWindSpeed
),
CardinalDirections AS (
    SELECT 
        year,
        hour,
        avg_wind_speed,
        avg_wind_direction,
        CASE 
            WHEN avg_wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN avg_wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN avg_wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN avg_wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN avg_wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN avg_wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN avg_wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN avg_wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN avg_wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    -- WHERE avg_wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3
),
TotalCounts AS (
    SELECT
        year,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1
),
PercentFrequency AS (
    SELECT
        f.year,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year
    ORDER BY 1,2,3
)
SELECT
    year::integer,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
5 rows affected.
Returning data to local variable wind_rose_by_hour_yearly


In [166]:
%%sql wind_rose_raw_daily <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
        EXTRACT(DAY FROM date_time) AS day,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        day,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        day,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5,6
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        day,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3,4
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.day,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month AND
        f.day = tc.day
    ORDER BY 1,2,3,4,5,6
)
SELECT
    year::integer,
    month::integer,
    year_month,
    day::integer,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, day, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5,6
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
6739 rows affected.
Returning data to local variable wind_rose_raw_daily


In [167]:
%%sql wind_rose_raw_monthly <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') as year_month,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        month,
        year_month,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        month,
        year_month,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3,4,5
),
TotalCounts AS (
    SELECT
        year,
        month,
        year_month,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1,2,3
),
PercentFrequency AS (
    SELECT
        f.year,
        f.month,
        f.year_month,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year AND
        f.month = tc.month AND
        f.year_month = tc.year_month 
    ORDER BY 1,2,3,4,5
)
SELECT
    year::integer,
    month::integer,
    year_month,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, month, year_month, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3,4,5
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
497 rows affected.
Returning data to local variable wind_rose_raw_monthly


In [168]:
%%sql wind_rose_raw_yearly <<

WITH MaxWindSpeed AS (
    SELECT CEIL(MAX(wind_speed)) AS max_speed
	FROM wind_sites.upd_wind_site
),
BinnedSpeed AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        wind_speed,
        wind_direction,
        width_bucket(
            wind_speed,
            0,
            (SELECT max_speed FROM MaxWindSpeed)::integer,
            (SELECT max_speed FROM MaxWindSpeed)::integer
        ) AS speed_bin
    FROM wind_sites.upd_wind_site
),
CardinalDirections AS (
    SELECT 
        year,
        wind_speed,
        wind_direction,
        CASE 
            WHEN wind_direction BETWEEN 0 AND 22.5 THEN 'N'
			WHEN wind_direction BETWEEN 22.5 AND 67.5 THEN 'NE'
			WHEN wind_direction BETWEEN 67.5 AND 112.5 THEN 'E'
			WHEN wind_direction BETWEEN 112.5 AND 157.5 THEN 'SE'
			WHEN wind_direction BETWEEN 157.5 AND 202.5 THEN 'S'
			WHEN wind_direction BETWEEN 202.5 AND 247.5 THEN 'SW'
			WHEN wind_direction BETWEEN 247.5 AND 292.5 THEN 'W'
			WHEN wind_direction BETWEEN 292.5 AND 337.5 THEN 'NW'
			WHEN wind_direction BETWEEN 337.5 AND 360 THEN 'N'
		END AS cardinal_direction,
        speed_bin
    FROM BinnedSpeed
    WHERE wind_speed > 0 
),
Frequency AS (
    SELECT
        year,
        cardinal_direction,
        speed_bin,
        COUNT(*) AS count_speed_bin
    FROM CardinalDirections
    GROUP BY 1,2,3
),
TotalCounts AS (
    SELECT
        year,
        COUNT(*) AS count_total
    FROM CardinalDirections
    GROUP By 1
),
PercentFrequency AS (
    SELECT
        f.year,
        f.cardinal_direction,
        f.speed_bin,
        f.count_speed_bin,
        tc.count_total,
        CASE 
            WHEN tc.count_total > 0 THEN ROUND((f.count_speed_bin * 100.0) / tc.count_total,3) 
            ELSE 0 
        END AS percent_frequency
    FROM Frequency AS f 
    JOIN TotalCounts AS tc
    ON
        f.year = tc.year
    ORDER BY 1,2,3
)
SELECT
    year::integer,
    cardinal_direction,
    speed_bin::integer,
    count_speed_bin::integer,
    count_total::integer,
    percent_frequency::float,
    SUM(percent_frequency) OVER (
        PARTITION BY year, cardinal_direction
        ORDER BY speed_bin
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    )::float AS cumulative_percent_frequency
FROM PercentFrequency
ORDER BY 1,2,3
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
67 rows affected.
Returning data to local variable wind_rose_raw_yearly


In [169]:
%%sql wind_stats_monthly <<

SELECT  
    EXTRACT(YEAR FROM date_time)::integer AS year,
    EXTRACT(MONTH FROM date_time)::integer AS month,
    TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
    MAX(wind_speed)::float AS monthly_max_speed,
    MIN(wind_speed)::float AS monthly_min_speed,
    ROUND(AVG(wind_speed),3)::float AS monthly_avg_speed
FROM wind_sites.upd_wind_site
GROUP BY 1,2,3
ORDER BY 1,2,3
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
12 rows affected.
Returning data to local variable wind_stats_monthly


In [170]:
%%sql wind_stats_max <<

WITH MonthlyStats AS (
    SELECT  
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        MAX(wind_speed) AS monthly_max_speed
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3
    ORDER BY 1,2,3
),
HourlyStats AS (
    SELECT 
    	EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month, 
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
		EXTRACT(DAY FROM date_time) AS day,
		EXTRACT(HOUR FROM date_time) AS hour,
		wind_speed
	FROM wind_sites.upd_wind_site
)
SELECT 
	hs.year::integer,
	hs.month::integer,
	hs.year_month,
	hs.day::integer,
	hs.hour::integer,
	hs.wind_speed::float
FROM 
	MonthlyStats AS ms
JOIN
	HourlyStats AS hs
ON
	ms.year_month = hs.year_month
WHERE 
	hs.wind_speed = ms.monthly_max_speed 
ORDER BY 1,2,3,4,5
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
13 rows affected.
Returning data to local variable wind_stats_max


In [171]:
%%sql wind_stats_min <<

WITH MonthlyStats AS (
    SELECT  
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        MIN(wind_speed) AS monthly_min_speed
    FROM wind_sites.upd_wind_site
    GROUP BY 1,2,3
    ORDER BY 1,2,3
),
HourlyStats AS (
    SELECT 
    	EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month, 
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
		EXTRACT(DAY FROM date_time) AS day,
		EXTRACT(HOUR FROM date_time) AS hour,
		wind_speed
	FROM wind_sites.upd_wind_site
)
SELECT 
	hs.year::integer,
	hs.month::integer,
	hs.year_month,
	hs.day::integer,
	hs.hour::integer,
	hs.wind_speed::float
FROM 
	MonthlyStats AS ms
JOIN
	HourlyStats AS hs
ON
	ms.year_month = hs.year_month
WHERE 
	hs.wind_speed = ms.monthly_min_speed 
ORDER BY 1,2,3,4,5
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
11847 rows affected.
Returning data to local variable wind_stats_min


In [172]:
%%sql wind_shear <<

WITH MonthlyAverages AS (
    SELECT
        EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
        TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed)::numeric, 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
)
SELECT
    year::integer,
    month::integer,
    year_month,
    avg_wind_speed::float,
    ROUND((avg_wind_speed * POWER(CAST(109 AS NUMERIC) / 86, 0.34))::numeric, 4) AS wind_shear
FROM 
    MonthlyAverages
ORDER BY 1,2,3
;


 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
12 rows affected.
Returning data to local variable wind_shear


In [173]:
%%sql yey <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(109 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
), Weibull AS (
	SELECT
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		CASE 	
			WHEN wind_shear > 0 THEN
				((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))
			ELSE
				0
		END AS f_v
	FROM
		TurbinePowerCurve
)
SELECT 
	year::integer,
	month::integer,
	year_month,
	avg_wind_speed::float,
	wind_shear::float,
	wind_turbine_speeds::float,
	power_curve::integer,
	f_v::float,
	(f_v * power_curve)::float * 24 AS daily_yey,
	(f_v * power_curve * 8760)::float AS yearly_yey
FROM
	Weibull
ORDER BY 1,2,3,4,5,6
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
612 rows affected.
Returning data to local variable yey


In [174]:
%%sql sum_yey <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(109 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
), Weibull AS (
	SELECT
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		CASE 	
			WHEN wind_shear > 0 THEN
				((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))
			ELSE
				0
		END AS f_v
	FROM
		TurbinePowerCurve
),
YEY AS (
	SELECT 
		year,
		month,
		year_month,
		avg_wind_speed,
		wind_shear,
		wind_turbine_speeds,
		power_curve,
		f_v,
		f_v * power_curve * 24 AS daily_yey,
		f_v * power_curve * 8760 AS yearly_yey
	FROM
		Weibull
	ORDER BY 1,2,3,4,5,6	
)
SELECT
	year::integer,
	month::integer,
	year_month,
	wind_shear::float,
	ROUND(SUM(daily_yey::NUMERIC),2)::float AS daily_yey_for_month,
	ROUND(SUM(yearly_yey::NUMERIC),2)::float AS yearly_yey_for_month
FROM YEY
GROUP BY 1,2,3,4
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
12 rows affected.
Returning data to local variable sum_yey


In [175]:
%%sql estimate_k <<

WITH MonthlyAverages AS (
    SELECT
		EXTRACT(YEAR FROM date_time) AS year,
        EXTRACT(MONTH FROM date_time) AS month,
		TO_CHAR(date_time, 'YYYY-FMMonth') AS year_month,
        ROUND(AVG(wind_speed), 4) AS avg_wind_speed
    FROM
        wind_sites.upd_wind_site
    GROUP BY 1,2,3
),
WindShear AS (
    SELECT
		year,
        month,
		year_month,
        avg_wind_speed,
        ROUND(avg_wind_speed * POWER(CAST(111 AS NUMERIC) / 86, 0.34), 4) AS wind_shear
    FROM 
        MonthlyAverages
    GROUP BY 1,2,3,4
    ORDER BY 1,2,3,4
),
TurbinePowerCurve AS (
	SELECT 
		ws.year,
		ws.month,
		ws.year_month,
		ws.avg_wind_speed,
		ws.wind_shear,
		gs.num AS wind_turbine_speeds,
		CASE
			WHEN gs.num < 5 THEN 0
			WHEN gs.num = 5 THEN 5
			WHEN gs.num = 5.5 THEN 8
			WHEN gs.num = 6 THEN 13
			WHEN gs.num = 6.5 THEN 19
			WHEN gs.num = 7 THEN 26
			WHEN gs.num = 7.5 THEN 32
			WHEN gs.num = 8 THEN 39
			WHEN gs.num = 8.5 THEN 46
			WHEN gs.num = 9 THEN 53
			WHEN gs.num = 9.5 THEN 59
			WHEN gs.num = 10 THEN 65
			WHEN gs.num = 10.5 THEN 71
			WHEN gs.num = 11 THEN 76
			WHEN gs.num = 11.5 THEN 80
			WHEN gs.num = 12 THEN 84
			WHEN gs.num = 12.5 THEN 88
			WHEN gs.num = 13 THEN 92
			WHEN gs.num = 13.5 THEN 95
			WHEN gs.num = 14 THEN 97
			WHEN gs.num = 14.5 THEN 100
			WHEN gs.num = 15 THEN 102
			WHEN gs.num = 15.5 THEN 104
			WHEN gs.num = 16 THEN 105
			WHEN gs.num = 16.5 THEN 107
			WHEN gs.num = 17 THEN 108
			WHEN gs.num BETWEEN 17.5 AND 20.5 THEN 109
			WHEN gs.num BETWEEN 21 AND 21.5 THEN 108
			WHEN gs.num = 22 THEN 107
			WHEN gs.num = 22.5 THEN 106
			WHEN gs.num = 23 THEN 105
			WHEN gs.num = 23.5 THEN 104
			WHEN gs.num = 24 THEN 103
			WHEN gs.num BETWEEN 24.5 AND 25 THEN 102
		END AS power_curve
	FROM 
		WindShear AS ws
	JOIN generate_series(0, 25,0.5) AS gs(num) ON true
)
SELECT
    year::integer,
    month::integer,
    year_month,
    avg_wind_speed::float,
    wind_shear::float,
    wind_turbine_speeds::float,
    power_curve::float,
    CASE 	
        WHEN wind_shear > 0 THEN
            ((PI() * wind_turbine_speeds) / (2 * POWER(wind_shear, 2))) * EXP((-PI()/4) * POWER((wind_turbine_speeds/wind_shear), 2))::float
        ELSE
            0
    END AS f_v
FROM
    TurbinePowerCurve
ORDER BY 1,2,3,4,5,6
;

 * postgresql+psycopg2://postgres:***@localhost:5432/wind_energy_magic
612 rows affected.
Returning data to local variable estimate_k


In [176]:
data_availability = pd.DataFrame(monthly_data_availability)

In [177]:
diurnal_daily = pd.DataFrame(diurnal_variation_daily)
diurnal_monthly = pd.DataFrame(diurnal_variation_monthly)
diurnal_yearly = pd.DataFrame(diurnal_variation_yearly,)

In [178]:
rose_hour_daily = pd.DataFrame(wind_rose_by_hour_daily)
rose_hour_monthly = pd.DataFrame(wind_rose_by_hour_monthly)
rose_hour_yearly = pd.DataFrame(wind_rose_by_hour_yearly)

In [179]:
rose_raw_daily_df = pd.DataFrame(wind_rose_raw_daily)
rose_raw_monthly_df= pd.DataFrame(wind_rose_raw_monthly)
rose_raw_yearly_df = pd.DataFrame(wind_rose_raw_yearly)

In [180]:
distribution_monthly_df = pd.DataFrame(frequency_distribution_monthly)
distribution_yearly_df = pd.DataFrame(frequency_distribution_yearly)

In [181]:
wind_stats = pd.DataFrame(wind_stats_monthly)
stats_min = pd.DataFrame(wind_stats_min)
stats_max = pd.DataFrame(wind_stats_max)

In [182]:
wind_shear_df = pd.DataFrame(wind_shear)
wind_shear_df['wind_shear'] = wind_shear_df['wind_shear'].astype(float)
yey_df = pd.DataFrame(yey)
sum_yey_df = pd.DataFrame(sum_yey)

In [183]:
estimate_k_df = pd.DataFrame(estimate_k)
aug_k = estimate_k_df[estimate_k_df['month'] == 8]
aug_k

Unnamed: 0,year,month,year_month,avg_wind_speed,wind_shear,wind_turbine_speeds,power_curve,f_v
306,2010,8,2010-August,1.3583,1.4814,0.0,0.0,0.0
307,2010,8,2010-August,1.3583,1.4814,0.5,0.0,0.3272564
308,2010,8,2010-August,1.3583,1.4814,1.0,0.0,0.5004343
309,2010,8,2010-August,1.3583,1.4814,1.5,0.0,0.4799028
310,2010,8,2010-August,1.3583,1.4814,2.0,0.0,0.3420525
311,2010,8,2010-August,1.3583,1.4814,2.5,0.0,0.1911127
312,2010,8,2010-August,1.3583,1.4814,3.0,0.0,0.0857124
313,2010,8,2010-August,1.3583,1.4814,3.5,0.0,0.03124994
314,2010,8,2010-August,1.3583,1.4814,4.0,0.0,0.009332237
315,2010,8,2010-August,1.3583,1.4814,4.5,0.0,0.00229387


In [184]:
from scipy.optimize import curve_fit
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

def fit_weibull(df : pd.DataFrame, k_initial : float, mse_limit : float, max_iterations : int = 50):
    
    def weibull_function_k(v : float, k : float, A : float):
        
        return (k / A) * (v / A) ** (k - 1) * np.exp(-(v / A) ** k)
    
    v = df['wind_turbine_speeds'].values
    vm = df['wind_shear'].iloc[0]
    f_v_exp = df['f_v'].values
    
    A = (2 / np.sqrt(np.pi)) * vm
    
    k = k_initial
    
    mse = mean_squared_error(f_v_exp,weibull_function_k(v=v,k=k_initial,A=A))
    print(f"Initial k : {k_initial}, Initial MSE : {mse}")
    
    for iteration in range(max_iterations):
        if mse <= mse_limit:
            break
            
        optimized_parameters, _ = curve_fit(weibull_function_k, v, f_v_exp, p0=[k])
        k = optimized_parameters[0]
        
        mse = mean_squared_error(f_v_exp,weibull_function_k(v,k,A))
        
        print(f"Iteration {iteration + 1}: k = {k}, MSE = {mse}")
        
    f_v_calc = weibull_function_k(v,k,A)
    
    return k, mse, f_v_exp,f_v_calc

In [185]:
k_analysis = fit_weibull(df=aug_k,k_initial=2,mse_limit=0.001)

Initial k : 2, Initial MSE : 1.4067254439277478e-34


In [186]:
k_viz_df = pd.DataFrame(
    {
        'v' : aug_k['wind_turbine_speeds'],
        'f(v) experimental' : k_analysis[2],
        'f(v) calculated' : k_analysis[3]
    }
)

In [187]:
k_viz_df

Unnamed: 0,v,f(v) experimental,f(v) calculated
306,0.0,0.0,0.0
307,0.5,0.3272564,0.3272564
308,1.0,0.5004343,0.5004343
309,1.5,0.4799028,0.4799028
310,2.0,0.3420525,0.3420525
311,2.5,0.1911127,0.1911127
312,3.0,0.0857124,0.0857124
313,3.5,0.03124994,0.03124994
314,4.0,0.009332237,0.009332237
315,4.5,0.00229387,0.00229387


# Dashboard

In [188]:
Vizro._reset()

In [189]:
landing_page = vm.Page(
    title="Wind Resource Assessment Dashboard",
    layout=vm.Layout(
        grid=[
            [0,0],
            [1,1],
            [2,3],

        ],
        row_min_height="500px"
    ),
    components=[
        vm.Card(
            id="Banner",
            text=
            """ 
            ![Banner](assets/dalle-wind-farm.png#banner)
            """
        ),
        vm.Card(
            id="Data Acknowledgement",
            text=
            """ 
            # Data Declaration and Acknowledgement
            The data used in this dashboard was provided via the Geodetic Engineering 
            Component of the Energy Engineering 205 Laboratory class presided over by 
            Engineer Rosario Ang, under the Energy 
            Engineering Program of the University of the Philippines Dilimam
            
            The author of the dashboard would like to:
            - Acknowledge the Applied Geodesy and Space Technology Research Laboratory (AGST Lab)
            of the UP Training Center for Applied Geodesy & Photogrammetry (TCAGP), College of 
            Engineering, UP Diliman. 
            - Express that this data is only used for research, and to showcase skill in wind
            resource assessment, and the use of PostgreSQL, Python, and related tools. 
           
           
            
            ![Data Declaration](assets/data_declaration.png#image)
            """
        ),
        vm.Card(
            id="Overview, Navigation",
            text=
            """ 
            # Welcome!
            
            This project aims to create a dashboard to aid in determining
            the theoretical wind energy potential of a site and to showcase
            proficiency in certain technical skills in creating a dashboard
            
            This dashboard can theoretically be applied to any wind site. 
            
            Skills and Tools
            - Python
            - SQL
            - Git and Github
            - PostgreSQL
            - Structuring a programming project (file structure, virtual environments, etc)
            - Jupyter Notebooks
            - Magic Commands
            - Pandas
            - Psycopg
            - Dashboarding
            - Vizro (Dashboarding Tool)
            """
        ),
        vm.Card(
            id="Author Information",
            text=
            """ 
            Author Name: Jose Mari Angelo Abeleda Jr \n
            Contact Number: 0968 681 1458 \n
            Email: gioabeleda@gmail.com \n
            Github: https://github.com/gbabeleda \n   
            """
        )
    ]
)

In [190]:
methodology_page = vm.Page(
    title="Methodology",
    layout=vm.Layout(
        grid=[
            [0],
            [1]
        ],
        row_min_height="500px"
    ),
    components=[
        vm.Card(
            id="Methodology Figure",
            text=
            """ 
            Placeholder
            """
        ),
        vm.Card(
            id="Methodology Text",
            text=
            """ 
            # Methodology
            
            - A postgresql server, database, schema and table were setup
            - Pandas was used to remove rows with null values and to create a csv from an excel file containing the data
            - The csv was loaded into the postgres using !psql magic commands
            - Queries were done using '%SQL' magic commands
            - Vizro was used to build the dashboard and to generate visualizations
            """
        ) 
    ]
)

In [191]:
data_availability_page = vm.Page(
    title="Data Availability",
    layout=vm.Layout(
        grid=[
            [0,1]
        ],
        row_min_height="500px",
    ),
    components=[
        vm.Graph(
            id="Data Availability Graph",
            figure=px.bar(
                data_frame=data_availability,
                y="year_month",
                x="days_count",
                color="days_count",
                labels={"year_month" : "Month", "days_count" : "Days Counted"},
                color_continuous_scale="sunset",
                text="days_count"
            )
        ),
        vm.Card(
            id="Data Availability Text",
            text=
            """ 
            # Data Availability
            
            We define a unique day having data available if even a single non-null
            wind record is present for that day
            
            This means that there are days in this data set that may not contain wind
            complete wind records per day. 

            As we can see, data for both January and March 2010 are incomplete,
            with there being no data at all for the month of February. Thus, we cannot
            do analysis using these months as they are not representative of the population.
            """
        ) 
    ]
)

In [192]:
diurnal_page = vm.Page(
    title="Diurnal Variation",
    layout=vm.Layout(
        grid=[
            [0,0],
            [1,2]
        ],
        row_min_height="500px",
    ),
    components=[
        vm.Graph(
            id="Daily Diurnal",
            figure=px.scatter(
                data_frame=diurnal_daily,
                x="hour",
                y="avg_wind_speed",
                color="avg_wind_speed",
                labels={"avg_wind_speed" : "Mean Hourly Wind Speed", "hour" : "Hour"},
                title="Daily",
                
            )
        ),
        vm.Graph(
            id="Monthly Diurnal",
            figure=px.scatter(
                data_frame=diurnal_monthly,
                x="hour",
                y="avg_wind_speed",
                color="avg_wind_speed",
                labels={"avg_wind_speed" : "Mean Hourly Wind Speed", "day" : "Days", "hour" : "Hour"},
                title="Monthly"
                
            )
        ),
        vm.Graph(
            id="Yearly Diurnal",
            figure=px.scatter(
                data_frame=diurnal_yearly,
                x="hour",
                y="avg_wind_speed",
                color="avg_wind_speed",
                labels={"avg_wind_speed" : "Mean Hourly Wind Speed", "hour" : "Hour"},
                title="Yearly"
                
            )
        )       
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=["Yearly Diurnal","Monthly Diurnal","Daily Diurnal"],
            selector=vm.RadioItems()
        ),
        vm.Filter(
            column="month",
            targets=["Monthly Diurnal","Daily Diurnal"],
            selector=vm.Slider(
                step=1
            )
        ),
        vm.Filter(
            column="day",
            targets=["Daily Diurnal"],
            selector=vm.RadioItems()
        )
    ]
)

In [193]:
wind_rose_hourly_page = vm.Page(
    title="Wind Rose Hour",
    layout=vm.Layout(
        grid=[
            [0,1],
            [2,2]
        ],
        row_min_height="500px"   
    ),
    components=[
        vm.Graph(
            id="Wind Rose Day",
            figure=px.bar_polar(
                data_frame=rose_hour_daily,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Daily",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="blugrn"
                
            )
        ),
        vm.Graph(
            id="Wind Rose Month",
            figure=px.bar_polar(
                data_frame=rose_hour_monthly,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Monthly",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="emrld"
            )
        ),
        vm.Graph(
            id="Wind Rose Year",
            figure=px.bar_polar(
                data_frame=rose_hour_yearly,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Yearly",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="deep"
            )
        )
        
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=[
                "Wind Rose Year",
                "Wind Rose Month",
                "Wind Rose Day"
                ],
            selector=vm.RadioItems()
        ),
        vm.Filter(
            column="month",
            targets=[
                "Wind Rose Month",
                "Wind Rose Day"
                ],
            selector=vm.Slider(
                step=1
            )
        ),
        vm.Filter(
            column="day",
            targets=[
                "Wind Rose Day"
                ],
            selector=vm.RadioItems()
        )
    ]
)

In [194]:
wind_rose_raw_page = vm.Page(
    title="Wind Rose Raw",
    layout=vm.Layout(
        grid=[
            [0,1],
            [2,2]
        ],
        row_min_height="500px"   
    ),
    components=[
        vm.Graph(
            id="Wind Rose Day Raw",
            figure=px.bar_polar(
                data_frame=rose_raw_daily_df,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Daily",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="blugrn"            
            )
        ),
        vm.Graph(
            id="Wind Rose Month Raw",
            figure=px.bar_polar(
                data_frame=rose_raw_monthly_df,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Monthly",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="emrld"
            )
        ),
        vm.Graph(
            id="Wind Rose Year Raw",
            figure=px.bar_polar(
                data_frame=rose_raw_yearly_df,
                r="percent_frequency",
                theta="cardinal_direction",
                color="speed_bin",
                barmode="group",
                title="Yearly",
                labels={"speed_bin" : "Wind Speed Bin"},
                category_orders={"cardinal_direction" : ['N', 'NE', 'E', 'SE', 'S', 'SW', 'W', 'NW']},
                color_continuous_scale="deep"
            )
        )
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=[
                "Wind Rose Year Raw",
                "Wind Rose Month Raw",
                "Wind Rose Day Raw"
                ],
            selector=vm.RadioItems()
        ),
        vm.Filter(
            column="month",
            targets=[
                "Wind Rose Month Raw",
                "Wind Rose Day Raw"
                ],
            selector=vm.Slider(
                step=1
            )
        ),
        vm.Filter(
            column="day",
            targets=[
                "Wind Rose Day Raw"
                ],
            selector=vm.RadioItems()
        )
    ]
)

In [195]:
freq_dist_page = vm.Page(
    title="Frequency Distribution",
    layout=vm.Layout(
        grid=[
            [0],
            [1]
        ],
        row_min_height="500px"   
    ),
    components=[
        vm.Graph(
            id="Monthly Distribution",
            figure=px.bar(
                data_frame=distribution_monthly_df,
                x="speed_bin",
                y="percent_frequency",
                color="percent_frequency",
                labels={"speed_bin" : "Wind Speed Bins", "percent_frequency" : "Frequency (%)"},
                hover_name="year_month",
                title="Monthly",
                color_continuous_scale="viridis",
                text="percent_frequency"
            )
        ),
        vm.Graph(
            id="Yearly Distribution",
            figure=px.bar(
                data_frame=distribution_yearly_df,
                x="speed_bin",
                y="percent_frequency",
                color="percent_frequency",
                labels={"speed_bin" : "Wind Speed Bins", "percent_frequency" : "Frequency (%)"},
                hover_name="year",
                title="Yearly",
                color_continuous_scale="viridis",
                text="percent_frequency"
            )
        )
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=["Yearly Distribution","Monthly Distribution"],
            selector=vm.RadioItems()
        ),
        vm.Filter(
            column="month",
            targets=["Monthly Distribution"],
            selector=vm.Slider(
                step=1
            )
        )
    ]
)

In [196]:
stats_page = vm.Page(
    title="Statistics",
    layout=vm.Layout(
        grid=[
            [0,1],
            [2,2],
            [3,4]
        ],
        row_min_height="500px" 
    ),
    components=[
        vm.Graph(
            id="Maximum Speed Monthly",
            figure=px.bar(
                data_frame=wind_stats,
                x="monthly_max_speed",
                y="year_month",
                color="monthly_max_speed",
                labels={"year_month" : "Month", "monthly_max_speed" : "Max Speed"},
                title="Maximum Wind Speeds Per Month",
                text="monthly_max_speed",
                color_continuous_scale="plotly3"
            )
        ),
        vm.Graph(
            id="Minimum Speed Monthly",
            figure=px.scatter(
                data_frame=wind_stats,
                x="monthly_min_speed",
                y="year_month",
                color="monthly_min_speed",
                labels={"year_month" : "Month", "monthly_min_speed" : "Min Speed"},
                title="Mininimum Wind Speeds Per Month",
                color_continuous_scale="plasma"
            )
        ),
        vm.Graph(
            id="Average Speed Monthly",
            figure=px.bar(
                data_frame=wind_stats,
                y="monthly_avg_speed",
                x="year_month",
                color="monthly_avg_speed",
                labels={"year_month" : "Month", "monthly_avg_speed" : "Avg Speed"},
                title="Average Wind Speeds Per Month",
                color_continuous_scale="dense",
                text="monthly_avg_speed",
            )
        ),
        vm.Graph(
            id="Stats Hourly Max",
            figure=px.scatter(
                data_frame=stats_max,
                x="year_month",
                y="hour",
                color="day",
                size="wind_speed",
                hover_data=["year_month","wind_speed"],
                labels={"hour": "Hour", "day": "Day", "wind_speed": "Wind Speed", "year_month" : "Month"},
                title="Occurences of Maximum Wind Speed",
                color_continuous_scale="twilight"
            )
        ),
        vm.Graph(
            id="Stats Hourly Min",
            figure=px.scatter(
                data_frame=stats_min,
                x="day",
                y="hour",
                color="hour",
                hover_data=["year_month","wind_speed"],
                labels={"hour": "Hour", "day": "Day", "wind_speed": "Wind Speed"},
                title="Occurences of Minimum Wind Speed",
                color_continuous_scale="plasma"
            )
        )      
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=["Stats Hourly Min","Stats Hourly Max"],
            selector=vm.RadioItems()
        ),
        
        vm.Filter(
            column="month",
            targets=["Stats Hourly Min"],
            selector=vm.Slider(
                step=1
            )
            
        )
    ]
)

In [197]:
weibull_page = vm.Page(
    title="Weibull Distribution and Periodic Energy Production",
    layout=vm.Layout(
        grid=[
            [0,0],
            [1,2],
            [3,3],
            [4,4],
            [5,5]
        ],
        row_min_height="500px"
    ),
    components=[
        vm.Graph(
            id="Wind Shear",
            figure=px.bar(
                data_frame=wind_shear_df,
                x="year_month",
                y=["avg_wind_speed","wind_shear"],
                barmode="group",
                labels={
                    "value" : "Wind Speed (m/s)",
                    "avg_wind_speed" : "Wind Speed @ Anemometer",
                    "wind_shear" : "Wind Speed @ Rotor",
                    "year_month" : "Month"
                },
                title="Wind Shear Per Month",
            )
        ),
        vm.Card(
            id="Wind Turbine Details",
            text=
            """ 
            # Wind Turbine Details
            Manufacturer: Vestas \n
            Model: V20/100 \n
            Rated Power: 100 kW \n
            Rotor Diameter: 20 m \n
            Cut-in wind speed: 5 m/s \n
            Rated wind speed: 17.5 m/s \n
            Cut-off wind speed: 25 m/s \n
            Hub Height: 24 m \n
            """
        ),
        vm.Graph(
            id="Power Curve",
            figure=px.scatter(
                data_frame=yey_df[yey_df["month"] == 10],
                x="wind_turbine_speeds",
                y="power_curve",
                labels={
                    "power_curve" : "Power (kW)",
                    "wind_turbine_speeds" : "Wind speed (m/s)"
                },
                title="Vestas 20/100 Power Curve"
            )
        ),
        vm.Graph(
            id="Weibull Function",
            figure=px.scatter(
                data_frame=yey_df,
                x="wind_turbine_speeds",
                y="f_v",
                labels={
                    "f_v" : "f(v)",
                    "wind_turbine_speeds" : "Wind speed (m/s)"
                },
                title="Weibull Function per Month"
            )
        ),
        vm.Graph(
            id="Periodic Energy Production Daily",
            figure=px.bar(
                data_frame=sum_yey_df,
                x="year_month",
                y="daily_yey_for_month",
                labels={
                    "year_month" : "Month",
                    "daily_yey_for_month" : "Daily Energy Production (kWh)"
                },
                color="daily_yey_for_month",
                text="daily_yey_for_month"
            )
        ),
        vm.Graph(
            id="Periodic Energy Production Yearly",
            figure=px.bar(
                data_frame=sum_yey_df,
                x="year_month",
                y="yearly_yey_for_month",
                labels={
                    "year_month" : "Month",
                    "yearly_yey_for_month" : "Yearly Energy Production (kWh)"
                },
                color="yearly_yey_for_month",
                text="yearly_yey_for_month"
            )
        ),
    ],
    controls=[
        vm.Filter(
            column="year",
            targets=["Wind Shear","Weibull Function","Periodic Energy Production Daily", "Periodic Energy Production Yearly"],
            selector=vm.RadioItems()
        ),
        
        vm.Filter(
            column="month",
            targets=["Wind Shear","Weibull Function"],
            selector=vm.Slider(
                step=1
            )   
        )
    ]
)

In [198]:
# estimate_k_page = vm.Page(
#     title="Estimating K",
#     # layout=vm.Layout(
#     #     grid=[
#     #         [0,1],
#     #         [2,2],
#     #         [3,3]
#     #     ],
#     #     row_min_height="700px"
#     # ),
#     components=[
#         vm.Graph(
#             id="Weibull Estimate K",
#             figure=px.scatter(
#                 data_frame
#             )
#         )
#     ]
# )

In [199]:
dashboard = vm.Dashboard(
    pages=[
        landing_page,
        methodology_page,
        data_availability_page,
        diurnal_page,
        wind_rose_hourly_page,
        wind_rose_raw_page,
        freq_dist_page,
        stats_page,
        weibull_page,
        # estimate_k_page    
    ]
)

In [200]:
Vizro().build(dashboard=dashboard).run(port="8051")