# Stock Chart Pattern Recognition Using Deep Learning (CRISP-DM)

This notebook follows the CRISP-DM (Cross Industry Standard Process for Data Mining) methodology to prepare stock market data for Head and Shoulders (H&S) pattern recognition using Deep Learning.

## 1️⃣ Business Understanding & Data Understanding

Objective: Use Deep Learning to identify and predict outcomes of Head and Shoulders (H&S) and Inverse Head and Shoulders (IH&S) patterns, leveraging comprehensive Technical and Fundamental features.



1.1 Import Libraries and Setup

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from cassandra.cluster import Cluster
from datetime import datetime
import time
import importlib

# Technical Analysis Libraries
from ta.trend import EMAIndicator
from sklearn.preprocessing import MinMaxScaler


## 1.2 Data Understanding
The data comes from the SETTRADE API and is stored in a Cassandra database.

In [2]:
# ==========================================
# 1️⃣ เชื่อมต่อ Cassandra และเตรียม Keyspace/Table
# ==========================================
try:
    cluster = Cluster(['127.0.0.1'], port=9042)
    session = cluster.connect()
    session.execute("""
        CREATE KEYSPACE IF NOT EXISTS stock_data
        WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
    """)
    session.set_keyspace('stock_data')
    
    # ตรวจสอบและสร้าง Table หากยังไม่มี
    session.execute("""
        CREATE TABLE IF NOT EXISTS candlestick_data (
            symbol text,
            time timestamp,
            open float,
            high float,
            low float,
            close float,
            volume bigint,
            value float,
            PRIMARY KEY (symbol, time)
        ) WITH CLUSTERING ORDER BY (time ASC);
    """)
    print("✅ Keyspace และ Table พร้อมใช้งาน!")

except Exception as e:
    print(f"❌ Error during Cassandra connection/setup: {e}")
    print("โปรดตรวจสอบว่า Cassandra Server (127.0.0.1:9042) ได้รันอยู่หรือไม่")

✅ Keyspace และ Table พร้อมใช้งาน!


## 1.3 Data Extraction (OHLCV)


❌ เกิดข้อผิดพลาดที่ไม่คาดคิด: name 'get_candlestick_data' is not defined


## 2️⃣ Data Preparation

ขั้นตอนนี้จะรวม Market Cap, Technical Grouping และ Fundamental Data เข้าด้วยกัน

2.1 Feature Engineering: Market Cap & Technical Grouping

In [4]:
def create_technical_features_and_grouping(df: pd.DataFrame) -> pd.DataFrame:
    """คำนวณ Market Cap (Proxy), EMA, RSI และกำหนดกลุ่มสัญญาณทางเทคนิค"""
    
    # --- 1. Add Market Cap column ---
    # Market Cap Proxy = Close Price * Volume (มูลค่าการซื้อขาย)
    df['MarketCap_Proxy'] = df['close'] * df['volume']
    
    # --- 2. คำนวณ Indicators ---
    df['EMA5'] = EMAIndicator(close=df['close'], window=5, fillna=False).ema_indicator()
    df['EMA15'] = EMAIndicator(close=df['close'], window=15, fillna=False).ema_indicator()
    df['EMA35'] = EMAIndicator(close=df['close'], window=35, fillna=False).ema_indicator()
    df['EMA89'] = EMAIndicator(close=df['close'], window=89, fillna=False).ema_indicator()
    df['EMA200'] = EMAIndicator(close=df['close'], window=200, fillna=False).ema_indicator()
    df['RSI'] = RelativeStrengthIndex(close=df['close'], window=14, fillna=False).rsi()
    
    # --- 3. กำหนดกลุ่มสัญญาณ (Categorization) ---
    conditions = [
        # a: Strong Momentum / Overbought
        (df['close'] >= df['EMA5']) & (df['RSI'] >= 70),
        
        # b: Clear Uptrend
        (df['close'] >= df['EMA35']) & (df['EMA35'] >= df['EMA89']),
        
        # c: Sideways above EMA89 (Short-term EMAs close together)
        (df['close'] >= df['EMA89']) & 
        (np.abs(df['EMA5'] - df['EMA35']) / df['close'] < 0.01), # ใช้ EMA5, EMA35
        
        # d: Downtrend
        (df['close'] < df['EMA89']) & (df['close'] < df['EMA200']) & (df['EMA89'] < df['EMA200']),
        
        # e: Crash (Strong descending order and oversold)
        (df['close'] < df['EMA5']) & (df['EMA5'] < df['EMA15']) & (df['EMA15'] < df['EMA35']) & 
        (df['EMA35'] < df['EMA89']) & (df['EMA89'] < df['EMA200']) & (df['RSI'] <= 30)
    ]
    
    choices = ['a_Overbought', 'b_ClearUptrend', 'c_SidewaysAbove89', 'd_Downtrend', 'e_Crash']
    
    df['Technical_Group'] = np.select(conditions, choices, default='f_Neutral')
    
    # --- 4. Drop NAN Data ---
    # Drop rows ที่เป็น NaN (เกิดจากการคำนวณ EMA200)
    df_cleaned = df.dropna().copy()
    
    print(f"✅ NaN Data Dropped: {len(df) - len(df_cleaned)} rows removed (Initial trading period / Indicator lookback)")
    
    return df_cleaned

## 2.2 Feature Engineering: Fundamental Data (Mock)

เนื่องจากข้อมูล Fundamental (EPS, PE, PBV, Yield) ไม่ได้รวมอยู่ใน Table candlestick_data เราจะสร้าง Mock-up Data โดยอิงจาก Interpretation ของคุณ

In [5]:
def add_fundamental_data(df: pd.DataFrame) -> pd.DataFrame:
    """จำลองการเพิ่มข้อมูล Fundamental (PE, PBV, Yield) ตามเงื่อนไขที่กำหนด"""
    
    # 1. EPS (Negative, indicating a loss) - ใช้ค่าสุ่มเล็กน้อย
    df['EPS'] = np.random.uniform(-0.5, 0.5, size=len(df))
    
    # 2. PE (Zero due to company losses)
    # ถ้า EPS <= 0 ให้ PE = 0.0, ถ้ามีกำไร (EPS > 0) ให้คำนวณ PE จริง
    df['PE'] = df.apply(
        lambda row: 0.0 if row['EPS'] <= 0 else (row['close'] / row['EPS']), 
        axis=1
    )
    
    # 3. PBV (Relatively high: 1.5 - 5.0)
    df['PBV'] = np.random.uniform(1.5, 5.0, size=len(df))
    
    # 4. PercentYield (Dividend per share / stock price)
    df['PercentYield'] = np.random.uniform(0.0, 0.05, size=len(df))
    
    # Clean up PE where it might be Inf
    df['PE'].replace([np.inf, -np.inf], 0.0, inplace=True)
    
    print("✅ Fundamental Data Added (Mocked based on Interpretation)")
    return df

if not df_raw.empty:
    df_temp_features = create_technical_features_and_grouping(df_raw)
    df_final_features = add_fundamental_data(df_temp_features)
    
    df_model_ready = df_final_features.copy()
    
    print("\n--- Summary of Data Preparation (Ready for DL) ---")
    print(df_model_ready[['close', 'EMA89', 'RSI', 'MarketCap_Proxy', 'Technical_Group', 'PE']].tail(5).to_markdown(index=True))


NameError: name 'df_raw' is not defined

## 3️⃣ Modeling & 4️⃣ Evaluation (Data Labeling Stage)

ในส่วนนี้ เราจะแสดงการใช้ Logic Head and Shoulders เพื่อเป็น Data Labeling (Target) สำหรับการฝึกโมเดล Deep Learning ซึ่งเป็นส่วนสำคัญของการนำเสนอ

## 3.1 Head and Shoulders Detection and Visualization

In [None]:
# ==========================================================
# ฟังก์ชันวาด H&S Pattern ด้วย Plotly
# ==========================================================
def plot_classic_pattern(df: pd.DataFrame, patterns: list, symbol: str):
    
    # 1. สร้าง Candlestick Figure พื้นฐาน
    fig = go.Figure()
    fig.add_trace(go.Candlestick(
        x=df.index, open=df['open'], high=df['high'], low=df['low'], close=df['close'], name='Price'
    ))
    
    # ปรับ Layout
    fig.update_layout(
        title=f'📈 {symbol} - Head & Shoulders Detection (Data Labeling)',
        xaxis_rangeslider_visible=False,
        height=700,
        template='plotly_white'
    )

    # 2. วาด Pattern และ Neckline
    for i, pattern in enumerate(patterns):
        l_idx, h_idx, r_idx = pattern['left_idx'], pattern['head_idx'], pattern['right_idx']
        
        l_time, h_time, r_time = df.index[l_idx], df.index[h_idx], df.index[r_idx]
        
        line_color = '#EF553B' if pattern['type'] == 'H&S' else '#00CC96'
        
        # วาดเส้น Pattern (ไหล่ซ้าย-หัว-ไหล่ขวา)
        fig.add_trace(go.Scatter(
            x=[l_time, h_time, r_time],
            y=[df['close'].iloc[l_idx], df['close'].iloc[h_idx], df['close'].iloc[r_idx]],
            mode='lines+markers',
            line=dict(color=line_color, width=3),
            marker=dict(size=8, symbol='circle'),
            name=f"{pattern['type']} {i+1}",
            showlegend=True
        ))
        
        # 3. Annotations
        fig.add_annotation(
            x=h_time, y=df['high'].iloc[h_idx] * 1.01,
            text=f"{pattern['type']} Detected",
            showarrow=True,
            arrowhead=2,
            font=dict(color=line_color, size=10, weight='bold'),
            yshift=10 if pattern['type'] == 'H&S' else -10
        )
        
    fig.show()

# --- Main Detection Logic ---
if 'df_model_ready' in locals() and not df_model_ready.empty:
    
    print("\n--- 📉 Applying Classic Detector for DL Label Generation ---")
    
    # 🚨 ใช้ฟังก์ชันตรวจจับ
    classic_patterns = detect_head_shoulders(df_model_ready, distance=10, tolerance=0.04)
    
    print(f"✅ พบรูปแบบ H&S/IH&S ทั้งหมด: {len(classic_patterns)} จุด")
    
    if classic_patterns:
        plot_classic_pattern(df_model_ready, classic_patterns, symbol=STOCK_SYMBOL)
    else:
        print("💡 ไม่พบรูปแบบ Head & Shoulders หรือ Inverse Head & Shoulders ในข้อมูลที่กำหนด")


## 5️⃣ Deployment (Final Feature Preparation for DL)

ขั้นตอนนี้เป็นการแปลงข้อมูลที่เตรียมไว้ (Price, Volume, Technical, Fundamental) ให้อยู่ในรูปแบบ 3D Array (Time Series Sequence) ที่พร้อมป้อนเข้าโมเดล Deep Learning (CNN-LSTM หรือ Transformer)

## 5.1 Data Scaling and Sequence Creation

In [None]:
# ------------------------------------------------------------------
# 🤖 Final Step: Preparing 3D Array for Deep Learning Model
# ------------------------------------------------------------------
if 'df_model_ready' in locals() and not df_model_ready.empty:
    
    print("\n--- 🧠 Deployment Setup: Data Scaling and Sequence Creation ---")
    
    # 1. เลือก Features ทั้งหมดที่จะใช้ป้อนเข้าโมเดล DL
    features = [
        'open', 'high', 'low', 'close', 'volume', 'MarketCap_Proxy', 
        'EMA5', 'EMA15', 'EMA35', 'EMA89', 'EMA200', 'RSI',
        'EPS', 'PE', 'PBV', 'PercentYield' 
    ]
    
    df_dl = df_model_ready[features].copy()
    
    # 2. Normalization (MinMaxScaler)
    print(f"📐 Scaling {len(features)} features...")
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled_values = scaler.fit_transform(df_dl)
    df_scaled = pd.DataFrame(df_scaled_values, columns=features, index=df_dl.index)
    
    # 3. Creating Sequences (Sliding Window)
    # โมเดลจะมองข้อมูลย้อนหลัง 30 วัน (SEQUENCE_LENGTH) เพื่อทำนายวันถัดไป
    def create_sequences(data, sequence_length):
        xs = []
        for i in range(len(data) - sequence_length):
            x = data.iloc[i:(i + sequence_length)]
            xs.append(x.values)
        return np.array(xs)

    X_sequences = create_sequences(df_scaled, SEQUENCE_LENGTH)
    
    # 4. Creating Labels (Target Y) - Mock for Demo
    # การทำนายง่ายๆ: ราคาปิดใน 5 วันข้างหน้าสูงกว่าวันนี้หรือไม่ (Binary Classification)
    FUTURE_PREDICT_DAYS = 5
    y_raw = (df_dl['close'].shift(-FUTURE_PREDICT_DAYS) > df_dl['close']).astype(int)
    
    # ปรับ Label ให้เข้ากับ Sequence length
    y_labels = y_raw.iloc[SEQUENCE_LENGTH:].values
    y_labels = y_labels[:-FUTURE_PREDICT_DAYS].copy() 
    
    # ตัด X_sequences ให้มีจำนวน Sample เท่ากับ Y_labels
    X_sequences = X_sequences[:-FUTURE_PREDICT_DAYS]

    # 5. สรุปผลลัพธ์
    print("\n--- DL Model Input Dimensions ---")
    print(f"Sequence Length (Time Steps): {SEQUENCE_LENGTH} วัน")
    print(f"Number of Features: {len(features)} ตัว")
    print(f"Input Data Shape (Samples, Time Steps, Features): {X_sequences.shape}")
    print(f"Label Data Shape (Samples): {y_labels.shape}")     
    print("\n💡 ข้อมูลถูกแปลงเป็น 3D Array เรียบร้อยแล้ว พร้อมสำหรับการฝึกโมเดล Deep Learning.")
