In [None]:
-- To identify bearish candlestick vs bullish candlestick, see:
-- https://en.wikipedia.org/wiki/Candlestick_pattern#Formation_of_candlestick

-- Task 1 
-- Find Hammer pattern

SELECT d1.*
FROM daily_ohlc d1
JOIN daily_ohlc d0 ON d1.day = d0.day + INTERVAL '1 day'
WHERE 
    d0.close < d0.open  -- Previous day is bearish
    AND d1.close > d1.open  -- Current day is bullish
    AND d1.high = d1.close;  -- High price is the close price

-- Find Inverted Hammer pattern

SELECT d1.*
FROM daily_ohlc d1
JOIN daily_ohlc d0 ON d1.day = d0.day + INTERVAL '1 day'
WHERE 
    d0.close < d0.open  -- Previous day is bearish
    AND d1.close > d1.open  -- Current day is bullish
    AND d1.low = d1.open;  -- Low price is the open price

In [None]:
-- Task 2 
-- Find Engulfing Bullish pattern
-- INSERT INTO engulfing_bullish_patterns (day, open, high, low, close, volume)
SELECT d1.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
WHERE 
    d0.close < d0.open  -- Previous day is bearish
    AND d1.close > d1.open  -- Current day is bullish
    AND d1.open < d0.close  -- The bullish candle opens lower
    AND d1.close > d0.open; -- The bullish candle fully engulfs the bearish one


-- Find Piercing Line pattern
-- INSERT INTO piercing_line_patterns (day, open, high, low, close, volume)
SELECT d1.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
WHERE 
    d0.close < d0.open  -- Previous day is bearish
    AND d1.close > d1.open  -- Current day is bullish
    AND d1.open < d0.low  -- Bullish candle opens lower than previous low
    AND d1.close > (d0.open + d0.close) / 2; -- Closes more than halfway into bearish candle


-- Find Morning Star pattern
-- INSERT INTO morning_star_patterns (day, open, high, low, close, volume)
SELECT d2.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
JOIN daily_ohlc d2 ON d2.day = d1.day + INTERVAL '1 day'
WHERE 
    d0.close < d0.open  -- First day is bearish
    -- Gabriel's interpretation:
    AND d1.close BETWEEN d0.low AND d0.close  -- Second day is a small candle below first day
    -- Stefan's interpretation:
    -- AND d1.open < d0.close -- Second day's candlestick body is entirely below the first day's candlestick body
    -- AND d1.close < d0.close
    AND d2.close > d2.open  -- Third day is bullish
    AND d2.close > (d0.open + d0.close) / 2; -- Third day's close overlaps the first day's body



In [None]:
-- Task 2 Calculate average number of continuously bullish days following each pattern
-- (e.g., hammer, inverted hammer, morning star) + check how reliable each pattern is
-- at predicting the future stock prices

-- To simplify the data processing process, we should save the pattern data collected
-- from previous tasks into a table
-- E.g., prepend the previous sql statements with:
-- INSERT INTO hammer_patterns (day, open, high, low, close, volume)
-- INSERT INTO inverted_hammer_patterns (day, open, high, low, close, volume)
-- INSERT INTO morning_star_patterns (day, open, high, low, close, volume)

-- For Stefan's interpretation of "number of continuously bullish days following each pattern",
-- see `number_of_consecutive_bullish_days.sql`

WITH bullish_streaks AS (
    SELECT p.day, COUNT(*) AS bullish_days
    FROM (
        SELECT t1.day, COUNT(*) OVER (PARTITION BY t1.day ORDER BY t2.day) AS bullish_days
        FROM daily_ohlc t1
        LEFT JOIN daily_ohlc t2 ON t2.day > t1.day
        WHERE t2.close > t2.open
    ) as p
    GROUP BY p.day
)

SELECT 
    pattern, 
    AVG(bullish_days) AS avg_bullish_streak,
    COUNT(*) AS total_occurrences,
    SUM(CASE WHEN bullish_days >= 2 THEN 1 ELSE 0 END) * 100.0 / COUNT(*) AS reliability_percentage
FROM (
    SELECT 'Engulfing Bullish' AS pattern, day FROM engulfing_bullish_patterns
    UNION ALL
    SELECT 'Piercing Line', day FROM piercing_line_patterns
    UNION ALL
    SELECT 'Morning Star', day FROM morning_star_patterns
) patterns
JOIN bullish_streaks ON patterns.day = bullish_streaks.day
GROUP BY pattern;


In [None]:
-- Task 3

-- Find Engulfing bearish pattern (simply the opposite of Engulfing bullish pattern)
-- INSERT INTO engulfing_bearish_patterns (day, open, high, low, close, volume)
SELECT d1.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
WHERE 
    d0.close > d0.open  -- First day is bullish
    AND d1.close < d1.open  -- Second day is bearish
    AND d1.open > d0.close  -- Second day opens above first day's close
    AND d1.close < d0.open; -- Second day's close is lower than first day's open


-- Find Evening star pattern (simply the opposite of star pattern)
-- INSERT INTO evening_star_patterns (day, open, high, low, close, volume)
SELECT d2.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
JOIN daily_ohlc d2 ON d2.day = d1.day + INTERVAL '1 day'
WHERE 
    d0.close > d0.open  -- First day is bullish
    AND d1.close BETWEEN d0.low AND d0.high  -- Second day is a small candle
    AND d2.close < d2.open  -- Third day is bearish
    AND d2.close < (d0.open + d0.close) / 2; -- Third day's close is below first day's midpoint


-- Find Three white soldier patterns
-- INSERT INTO three_white_soldiers_patterns (day, open, high, low, close, volume)
SELECT d2.*
FROM daily_ohlc d0
JOIN daily_ohlc d1 ON d1.day = d0.day + INTERVAL '1 day'
JOIN daily_ohlc d2 ON d2.day = d1.day + INTERVAL '1 day'
WHERE 
    d0.close > d0.open  -- First day is bullish
    AND d1.open BETWEEN d0.open AND d0.close  -- Second day opens within first day
    AND d1.close > d0.close  -- Second day closes higher
    AND d1.close > d1.open  -- Second day is bullish
    AND d2.open BETWEEN d1.open AND d1.close  -- Third day opens within second day
    AND d2.close > d1.close  -- Third day closes higher
    AND d2.close > d2.open; -- Third day is bullish


### Optimisation for performance:

#### Query based improvements

1) Materialized View Definitions
A continuous aggregate in TimescaleDB is a materialized view that automatically refreshes and stores aggregated results efficiently over time.
Speeds up queries that use GROUP BY (e.g., OHLC calculations).


2) Leverage timescales specific query (hyperfunctions)
last(close, day)
first(open, day)

CREATE MATERIALIZED VIEW daily_ohlc_cagg
WITH (timescaledb.continuous) AS
SELECT 
    time_bucket('1 day', day) AS day,
    first(open, day) AS open,
    MAX(high) AS high,
    MIN(low) AS low,
    last(close, day) AS close,
    SUM(volume) AS volume
FROM daily_ohlc
GROUP BY day;

3) SQL Scripts: For creating indexes. Place discrete columns first in created indexes, then continuous columns

CREATE INDEX ON daily_ohlc_cagg (day DESC);
CREATE INDEX ON engulfing_bearish_patterns (day DESC);
CREATE INDEX ON evening_star_patterns (day DESC);
CREATE INDEX ON three_white_soldiers_patterns (day DESC);

----------------------------------------------------------------
CREATE INDEX ON daily_ohlc_cagg (day DESC, close, open);
CREATE INDEX ON daily_ohlc_cagg (day DESC, open, close);

#### Database based improvements

4) timescaledb-tune - memory (not really disk) (postgresql.conf)

#### Memory based improvements

5) schema improvements to reduce memory usage

avoid numeric data types, use int with max 2 dp. Stocks priced above $1 or penny stocks are typically quoted to 2 decimal places (https://www.investopedia.com/terms/t/tick.asp) (use smallint a range of -32,768 to +32,767 2bytes)
The storage requirement for a numeric value in PostgreSQL is two bytes for each group of four decimal digits, plus three to eight bytes of overhead.
For 24.98437:

Total digits: 7

Groups of four digits: 2 (2498 and 4370)

Storage: (2 * 2) + 3 to 8 bytes overhead

Total: 7 to 12 bytes

6) set_chunk_time_interval to define chunks that make up no more than 25% of main memory (across all hyper tables) 25% is the size of shared buffers 
shared_buffer = 25% of RAM.


#### More tuning
7) Background workers
Background workers perform background processing for operations specific to TimescaleDB (both live queries and background jobs, all kinds of User-Defined Actions/Policies).

The background worker's settings need to be tuned to get the most out of TimescaleDB—issues often arise when worker settings are not properly set. Some of the issues we see often caused by a misconfiguration of background workers are:
User-Defined Actions are not working properly.
Continuous aggregates are not working properly.
Compression policies are not working properly.
The retention policies are not working properly.
Database size rapidly increases, due to failures in compression and the data retention policies.

- You should configure the timescaledb.max_background_workers setting to be equal to the sum of your total number of databases + the total number of concurrent background workers you want running at any given point in time.
- By default, the max_parallel_workers setting corresponds to the number of CPUs available.
- max_worker_processes should be AT LEAST 3 (required for checkpointer, WAL writer, and vacuum processes) plus the sum of the background workers and parallel workers:
max_worker_processes = 3 + timescaledb.max_background_workers + max_parallel_workers.


https://www.timescale.com/blog/timescale-parameters-you-should-know-about-and-tune-to-maximize-your-performance