cybertec-postgresql
diff --git a/‎05_create_table.sql‎
Lines changed: 7 additions & 5 deletions b/‎05_create_table.sql‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎06_create_table_options.sql‎
Lines changed: 10 additions & 16 deletions b/‎06_create_table_options.sql‎
Lines changed: 10 additions & 16 deletions
diff --git a/‎07_alter_table.sql‎
Lines changed: 32 additions & 3 deletions b/‎07_alter_table.sql‎
Lines changed: 32 additions & 3 deletions
diff --git a/‎09_data_type_showcase.sql‎
Lines changed: 83 additions & 18 deletions b/‎09_data_type_showcase.sql‎
Lines changed: 83 additions & 18 deletions
diff --git a/‎10_views.sql‎
Lines changed: 15 additions & 6 deletions b/‎10_views.sql‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎15_materialized_views.sql‎
Lines changed: 5 additions & 3 deletions b/‎15_materialized_views.sql‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎20_stored_procedures.sql‎
Lines changed: 52 additions & 25 deletions b/‎20_stored_procedures.sql‎
Lines changed: 52 additions & 25 deletions
diff --git a/‎25_table_using_sproc.sql‎
Lines changed: 5 additions & 5 deletions b/‎25_table_using_sproc.sql‎
Lines changed: 5 additions & 5 deletions
@@ -9,31 +9,33 @@ SET ROLE TO demorole;
 
 CREATE TABLE banking_demo.branch(
     branch_id       int NOT NULL PRIMARY KEY,   -- using just "id" for name here is not recommended, the more explicit the better for important stuff
-    balance         int NOT NULL DEFAULT 0
+    balance         numeric NOT NULL DEFAULT 0
 );
 
 CREATE TABLE banking_demo.teller(
     teller_id       int NOT NULL PRIMARY KEY,
     branch_id       int NOT NULL,
-    balance         int NOT NULL DEFAULT 0
+    balance         numeric NOT NULL DEFAULT 0
 );
 
 CREATE TABLE banking_demo.account(
     account_id      int NOT NULL PRIMARY KEY,
     branch_id       int NOT NULL,
     teller_id       int NOT NULL,
-    balance         int NOT NULL DEFAULT 0
+    balance         numeric NOT NULL DEFAULT 0
 );
 
 CREATE TABLE banking_demo.transaction_history(
     teller_id       int NOT NULL,
     branch_id       int NOT NULL,
     account_id      int NOT NULL,
-    delta           int NOT NULL,
+    delta           numeric NOT NULL,
     created_on      timestamp with time zone NOT NULL DEFAULT now()
 );
 
-
+-- Generally it's also a good practice to at least minimally comment the tables and columns for complex applications
+COMMENT ON TABLE banking_demo.transaction_history IS 'A simple banking table';
+COMMENT ON COLUMN banking_demo.transaction_history.delta IS 'Change in account balance for one transaction';
 
 -- generate 1 branch, 10 tellers for branch, 10K accounts for each teller with random balances
 
 
@@ -1,25 +1,19 @@
 /*
 Other ways of creating tables are:
-    1) using LIKE to use an existing table as a templates and selecting (or leaving out) some constraints/checks/indexes
+    1) using LIKE to use existing tables as a templates and selecting (or leaving out) some constraints/checks/indexes
     2) create table as select ...
-*/
-
--- create a copy of 
-CREATE TABLE temp  (LIKE t_demo EXCLUDING INDEXES);
-
--- could also do:
--- create table t_demo_log as select * from t_demo where false;
 
-
-/*
 Other types of tables are:
-    1) temporary tables
-    2) "unlogged" tables
+    1) temporary tables - auto-removed when session ends and visible only in that session that created them
+    2) "unlogged" tables - such tables are not WAL-logged thus a lot faster to work with. Downside is that they're emptied after a crash.
 */
 
--- temporary tables are not persistent and visible only in that session that created them
-CREATE TEMP TABLE t (LIKE t_demo);
+-- create a temporary copy of banking_demo.teller
+-- NB! Note that you cannot specify a schema for temp tables
+CREATE TEMP TABLE teller_temp(LIKE banking_demo.teller EXCLUDING INDEXES);
 
--- unlogged tables are not WAL-logged (emptied after a crash) thus a lot faster to work with
-CREATE UNLOGGED TABLE t_data_staging (LIKE t_demo);
+-- could also "auto create" a table from select (no indexes, FKs, checks, etc are transferred)
+CREATE TEMP TABLE teller_temp_2 AS SELECT * FROM banking_demo.teller WHERE false;
 
+-- unlogged tables are a good option for staging tables that get a lot of updates and can be re-initialized quickly from input data
+CREATE UNLOGGED TABLE banking_demo.staging_data AS SELECT * FROM banking_demo.account;
@@ -1,6 +1,35 @@
 /*
-Another significant performance tweaking option for tables is the FILLFACTOR parameter.
+ALTER TABLE is mostly commonly used to:
+    1) add/drop/rename columns
+    2) add/drop constraints (FKs, checks, not null)
+    3) set column defaults
+    4) change column data types
+    5) add/drop triggers
+    6) declare inheritance (used typically for partitioning which will be built-in from Postgres 10)
+    7) changing physical storage parameters
+    8) a lot more, see docus...
+*/
+
+-- adding a new column
+-- note that here we also demonstrate that changing the structure can be performed transactionally!! (unlike in Oracle for example)
+BEGIN;
+ALTER TABLE account_unlogged ADD COLUMN extra_info TEXT;
+ALTER TABLE account_unlogged ALTER COLUMN extra_info SET DEFAULT 'hello';
+COMMIT;
+-- also note that for big tables on busy DBs the above 2-step form is much preferred over below form as it won't re-write the whole table:
+-- ALTER TABLE account_unlogged ADD COLUMN extra_info TEXT DEFAULT 'hello';
+
+-- adding a simple check constraint
+ALTER TABLE account_unlogged ADD CONSTRAINT CHECK my_check CHECK (account_id > 0);
+
+-- change column data type. NB! mostly it means a full table re-write so be wary.
+ALTER TABLE account_unlogged ALTER COLUMN extra_info TYPE varchar(500);
+
+
+/*
+A significant performance tweaking option for tables is the FILLFACTOR parameter.
 It tells Postgres to fill up tables only to specified percentage, so that future row updates would
-have a chance to be performed "in line". Some "terms and conditions" apply but for certain usecases huge boosts are possible.
+have a chance to be performed "in line" (called HOT-updates). Some "terms and conditions" apply but for certain usecases (a lot of 
+updates on un-indexed columns) huge boosts are possible. Fillfactor can be also specified similarily when creating the table.
 */
-ALTER TABLE 
+ALTER TABLE account_unlogged SET (FILLFACTOR=80);
@@ -1,28 +1,93 @@
+CREATE TABLE main_datatypes (
+    /* serials aka sequences */
+    id          bigserial PRIMARY KEY,   -- serial/bigserial corresponds to int4/int8 and will just auto-attach a DEFAULT sequence
+    
+    /* numbers */
+    smallish_integers      int,            -- int4 or integer,
+    large_integers     int8,           -- for big-data i.e. > 2 billion rows
+    floating_point     double precision,    -- for non-exact calculations. synonymous with "float8"
+    exact_calculations  numeric,    -- for scientific/financial calculations. synonymous with "decimal"
 
+    /* character data */
+    text_data1  text,           -- for text data it's generally recommended to just use the "text" type
+    text_data2  varchar(200),   -- when limiting input needs to be enforced use varchar(X)
+    text_fixed  char(3),        -- for fixed size data like currency codes
+    json_data   jsonb,          -- efficiently stored and indexable JSON texts, meant for NoSQL use cases
 
+    /* temporals */
+    event_ts    timestamptz,    -- timestamptz = "timestamp with time zone" and should always be used instead of simple "timestamp"
+    event_date  date,           -- '2017-08-15'
+    event_time  time,           -- '17:35'
+    event_duration interval,    -- '1day', '2years 1month', '3h 0m 10s'
+    
+    is_active   boolean,        -- boolean input can be specified as true/false (case insensitive), t/f, on/off
+    
+    /* typical auditing fields to track changes of important data*/
+    created_by text NOT NULL DEFAULT current_user,
+    created_on timestamptz NOT NULL DEFAULT now(),
+    last_modified_by text           -- would need a trigger to ensure it's always updated when changing data
+    last_modified_on timestamptz,   -- would need a trigger to ensure it's always updated when changing data
+);
 
 
 
-CREATE TABLE t_demo (
-	id serial PRIMARY KEY,
-	data jsonb,
-	department text NOT NULL,
-	created_by text NOT NULL DEFAULT current_user,
-	created_on timestamptz NOT NULL DEFAULT now(),
-	last_modified_on timestamptz
-);
+/* Numerals */
+
+INSERT INTO main_datatypes(large_integers)
+  SELECT 1e11;   --100 billion
+
+INSERT INTO main_datatypes(floating_point)
+  SELECT 3.14;
+
+/* character data */
+
+INSERT INTO main_datatypes(text_data1)  -- take only first 10 chars of 300
+  SELECT repeat('a', 300)::char(10);
+
+--INSERT INTO main_datatypes(text_data2)  -- this will fail as text_data2 allows max 200 chars
+--  SELECT repeat('a', 300);
+
+INSERT INTO main_datatypes (json_data)
+  VALUES ('{"user_id": 1, "order_items": [{"item_id":3, "code": "EAS123"}], "created_on": "2017-08-15 11:45:28.852685+03"}');
+
+
+
+/* 
+Temporals - postgres has excellent support for working with times. Here some most used functions.
+ */
+
+-- current timestamptz as of beginning of transaction, thus we'll see the same values
+BEGIN;
+SELECT now();
+SELECT now();
+END;
+
+-- same
+SELECT 'now'::timestamptz;
+
+-- current timestamptz in real time, different values
+BEGIN;
+SELECT clock_timestamp();
+SELECT clock_timestamp();
+END;
 
-COMMENT ON TABLE t_demo IS 'a simple table';
-COMMENT ON COLUMN t_demo.data IS 'JSONB is designed for NoSQL';
+-- current date
+SELECT 'today'::date;
+SELECT current_date;
+SELECT now()::date;
 
-INSERT INTO t_demo (data, department)
-  VALUES ('{"user_id": 1, "order_items": [{"item_id":3, "code": "EAS123"}]}', 'sales');
+-- using interval to get exact date of 90 days ago
+SELECT current_date - '90d'::interval;
 
--- index top level keys for a simple NoSQL use case.
-CREATE INDEX CONCURRENTLY ON t_demo USING gin (data);
+-- current UNIX epoch seconds
+SELECT extract(epoch FROM now());
 
--- index everything
-CREATE INDEX ON t_demo USING gin (data jsonb_path_ops);
+-- generate all dates for the last week
+SELECT generate_series(date_trunc('week', now() - '1 week'::interval),
+                       date_trunc('week', now()) - '1day'::interval, '1day');
 
--- Prepare for frequent changes, increase FILLFACTOR
-ALTER TABLE t_demo SET (fillfactor=80);
+-- same as above but with ORDINALITY
+SELECT
+  * 
+FROM generate_series(date_trunc('week', now() - '1 week'::interval), date_trunc('week', now()) - '1day'::interval, '1day')
+    WITH ORDINALITY t(date, day_of_week);
@@ -1,9 +1,18 @@
-CREATE VIEW v_simple AS
-SELECT * FROM t_demo;
+/*
+Views allow application layering and can be also used for securing data. They behave the same as tables for GRANT privileges and
+don't normally incur any performance hit when selecting.
+*/
 
--- NB! simple views (selecting from one table basically) allow also inserting so "check option" might make sense,
+-- the simplest view possible
+CREATE VIEW v_account_balance AS
+SELECT account_id, balance FROM account;
+
+GRANT SELECT ON v_account_balance TO public;
+
+
+-- NB! simple views (selecting from one table basically) allow also inserting, so "check option" might make sense,
 -- it avoids inserting data that a user would not be able to see due to WHERE condition
-CREATE VIEW v_data_for_sales_dept AS
-SELECT * FROM t_demo
-WHERE department = 'sales'
+CREATE VIEW v_tellers_for_branch_one AS
+SELECT * FROM teller
+WHERE branch_id = 1
 WITH CHECK OPTION;
@@ -1,12 +1,14 @@
 -- Materialized view is a "point in time" copy of the select statement and needs explicit refreshing
 -- In other aspects the mat.view acts like a normal table - one can create indexes on it 
 
-CREATE MATERIALIZED VIEW mv_last_month_data AS
-SELECT * FROM t_demo
+CREATE MATERIALIZED VIEW mv_last_months_transactions AS
+SELECT * FROM transaction_history
 WHERE created_on > current_date - '1month'::interval;
 
+-- refresh the data (blocks selects)
+REFRESH MATERIALIZED VIEW mv_last_month_data;
+
 -- refresh concurrently needs at least one unique index to "merge" changes effectively
 CREATE UNIQUE INDEX ON mv_last_month_data (id);
-
 -- "concurrent" refresh allows other sessions to read the view during the update but is slower
 REFRESH MATERIALIZED VIEW CONCURRENTLY mv_last_month_data;
@@ -1,36 +1,63 @@
--- in PL/pgSQL there's no distinction between a function (restricted to returning a single scalar value in Oracle PL/SQL) and a procedure - 
--- all stored procedural code are 'functions' that can return anything (scalars, multiple values, rows of multiple values or table types) or nothing.
+/*
+PL/pgSQL is a specialized programming language that allows to query data and provides common features like variables, comparisons, 
+branching (if/else), error handling, calling other functions.
 
-CREATE OR REPLACE FUNCTION f1_returns_text() RETURNS text as
+Stored procedures in PL/pgSQL are beneficial as they provide layering for the "data logic" (one can do modifications 
+without applications knowing about it) and usually also improve performance for multi-step data processing scenarios.
+
+In PL/pgSQL there's no distinction between a function (restricted to returning a single scalar value in Oracle PL/SQL) and a procedure, 
+all stored procedural code are 'functions' that can return anything (scalars, multiple values, rows of multiple values or table types) 
+or nothing.
+
+NB! In addition to PL/pgSQL there are dozens of other PL-languages available for installation for more complex data processing needs - 
+plpython, plperl, pljava to name a few.
+*/
+
+-- a simple sproc returning a scalar
+CREATE OR REPLACE FUNCTION say_hello() RETURNS text as
 $SQL$
 BEGIN
-	RETURN 'demo';
+	RETURN 'Hello! Can you hear me?';
 END;
 $SQL$
 LANGUAGE plpgsql;
 
 
--- stored procedures support error handling and subtransactions via BEGIN/EXCEPTION/END block
+-- note that above functions we could (and should) write in pure SQL as this has some performance benefits
+CREATE OR REPLACE FUNCTION say_hello_2() RETURNS text as
+$SQL$
+    SELECT 'Hello! Can you hear me?'::text;
+$SQL$
+LANGUAGE sql;
+
 
-CREATE OR REPLACE FUNCTION merge_db(key INT, data TEXT) RETURNS VOID AS
-$$
+-- a stored procedure to performa a "bank transfer" for our banking schema
+CREATE OR REPLACE FUNCTION perform_transaction(
+    account_from        account.account_id%TYPE,
+    account_to          account.account_id%TYPE,
+    amount_to_transfer  account.balance%TYPE)
+RETURNS VOID AS
+$SQL$
+DECLARE
+  acccount_balance account.balance%TYPE;
 BEGIN
-    LOOP
-        -- first try to update the key
-        UPDATE db SET b = data WHERE a = key;
-        IF found THEN
-            RETURN;
-        END IF;
-        -- not there, so try to insert the key
-        -- if someone else inserts the same key concurrently,
-        -- we could get a unique-key failure
-        BEGIN
-            INSERT INTO db(a,b) VALUES (key, data);
-            RETURN;
-        EXCEPTION WHEN unique_violation THEN
-            -- Do nothing, and loop to try the UPDATE again.
-        END;
-    END LOOP;
+  
+  SELECT balance INTO acccount_balance FROM account WHERE account_id = account_from;
+  
+  IF NOT FOUND THEN
+    RAISE EXCEPTION 'Account % not found', account_from;
+  END IF;
+  
+  IF acccount_balance < amount_to_transfer THEN
+    RAISE EXCEPTION 'Not enough funds';
+  END IF;
+
+  UPDATE account SET balance = balance + amount_to_transfer WHERE account_id = account_to;      -- NB! Very simplistic approch, lot of problems possible here
+  UPDATE account SET balance = balance - amount_to_transfer WHERE account_id = account_from;
+
 END;
-$$
-LANGUAGE plpgsql;
+$SQL$
+LANGUAGE plpgsql;
+
+
+-- TODO error handling and subtransactions via BEGIN/EXCEPTION/END block
@@ -1,8 +1,8 @@
--- setting a function as a default value
--- dropping the function with "cascade" will also remove the "default" declaration. code updates with "replace" are non-blocking
-CREATE TABLE t_func_as_def_param(
+-- setting a function as default value
+-- dropping the function with "cascade" will also remove the "default" declaration
+CREATE TABLE func_as_def_param(
 	id serial PRIMARY KEY,
-	data text NOT NULL DEFAULT f1_returns_text()
+	data text NOT NULL DEFAULT say_hello()
 );
 
-INSERT INTO t_func_as_def_param (data) VALUES (default);
+INSERT INTO func_as_def_param (data) VALUES (default);