In [29]:
import pandas as pd
import numpy as np
import os
from cassandra.cluster import Cluster
from datetime import datetime
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
import matplotlib.pyplot as plt
from cassandra.cluster import Cluster
from cassandra.auth import PlainTextAuthProvider

In [30]:
# เชื่อมต่อกับ Cassandra Docker
cluster = Cluster(['127.0.0.1'], port=9042)
session = cluster.connect()

In [31]:
keyspace_name = "stock_data"
session.set_keyspace(keyspace_name)
table_name = "stock_price"
rows = session.execute(f"SELECT * FROM {table_name}")
df = pd.DataFrame(rows)
df.shape

(1110, 7)

In [32]:
df.head()

Unnamed: 0,symbol,date,close,high,low,open,volume
0,PPPM,2025-05-16,0.41,0.43,0.38,0.41,780900
1,TPCH,2025-05-16,2.78,2.8,2.72,2.78,258400
2,KPNREIT,2025-05-16,,,,,0
3,POLY,2025-05-16,6.7,6.7,6.7,6.7,1100
4,QHBREIT,2025-05-16,3.62,3.62,3.58,3.62,240700


In [33]:
rows_symbol = session.execute(f"SELECT * FROM stock_data.stock_price WHERE symbol = 'AOT'")
df_symbol = pd.DataFrame(rows_symbol)
df_symbol.head(1)

Unnamed: 0,symbol,date,close,high,low,open,volume
0,AOT,1970-01-01,39.0,39.25,38.25,39.0,27097430


In [34]:
row = df_symbol.iloc[[1]]
non_null = row[row.notnull()]
print(non_null)

  symbol        date  close  high   low   open    volume
1    AOT  2025-05-16  37.25  37.5  36.0  37.25  31487900


In [35]:
# ดึงข้อมูลเฉพาะคอลัมน์ที่ต้องการจาก Cassandra
rows_nonnull = session.execute(
    """
    SELECT symbol, date, open, high, low, close, volume
    FROM stock_data.stock_price
    """
)

# แปลงเป็น DataFrame
df_nonnull = pd.DataFrame(rows_nonnull.all())  # ใช้ .all() เพื่อดึงผลลัพธ์ทั้งหมด

# แสดงผลลัพธ์
df_nonnull.head()


Unnamed: 0,symbol,date,open,high,low,close,volume
0,PPPM,2025-05-16,0.41,0.43,0.38,0.41,780900
1,TPCH,2025-05-16,2.78,2.8,2.72,2.78,258400
2,KPNREIT,2025-05-16,,,,,0
3,POLY,2025-05-16,6.7,6.7,6.7,6.7,1100
4,QHBREIT,2025-05-16,3.62,3.62,3.58,3.62,240700


In [36]:
df_nonnull.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1110 entries, 0 to 1109
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   symbol  1110 non-null   object 
 1   date    1110 non-null   object 
 2   open    1041 non-null   float64
 3   high    1041 non-null   float64
 4   low     1041 non-null   float64
 5   close   1041 non-null   float64
 6   volume  1110 non-null   int64  
dtypes: float64(4), int64(1), object(2)
memory usage: 60.8+ KB


In [37]:
df.isnull().sum()

symbol     0
date       0
close     69
high      69
low       69
open      69
volume     0
dtype: int64

In [38]:
# Step 1: Load stock data from Cassandra
keyspace_name = "stock_data"
session.set_keyspace(keyspace_name)
table_name = "stock_price"
rows = session.execute(f"SELECT * FROM {table_name}")
df = pd.DataFrame(rows)

# Ensure correct data type
df['date'] = pd.to_datetime(df['date'].astype(str))
df = df.sort_values(by='date')

In [39]:
# Step 2: Create rolling segments
window_size = 30
segments = []
labels = []  # 0 = no pattern, 1 = head and shoulders (HS)

def detect_head_and_shoulders(prices, distance=3, tolerance=0.05):
    from scipy.signal import find_peaks
    peaks, _ = find_peaks(prices, distance=distance)
    if len(peaks) < 3:
        return False
    for i in range(len(peaks) - 2):
        ls, head, rs = peaks[i], peaks[i+1], peaks[i+2]
        ls_val, head_val, rs_val = prices[ls], prices[head], prices[rs]
        if head_val > ls_val and head_val > rs_val:
            if abs(ls_val - rs_val) / max(ls_val, rs_val) < tolerance:
                return True
    return False

for i in range(len(df) - window_size):
    seg = df[['open', 'high', 'low', 'close']].iloc[i:i+window_size].values
    segments.append(seg)
    close_prices = seg[:, 3]
    label = 1 if detect_head_and_shoulders(close_prices) else 0
    labels.append(label)

X = np.array(segments)
y = np.array(labels)

In [40]:
# Step 3: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [41]:
# Step 4: Build CNN model
model = Sequential([
    Conv1D(32, kernel_size=3, activation='relu', input_shape=(window_size, 4)),
    MaxPooling1D(pool_size=2),
    Conv1D(64, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



In [42]:
# Step 5: Train model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1)

Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8890 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9030 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9025 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9027 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9127 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9082 - loss: nan - val_accuracy: 0.9540 - val_loss: nan
Epoch 7/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

In [43]:
train_acc = history.history['accuracy'][-1]
val_acc = history.history['val_accuracy'][-1]
print(f"Train Accuracy: {train_acc:.4f}")
print(f"Validation Accuracy: {val_acc:.4f}")


Train Accuracy: 0.9035
Validation Accuracy: 0.9540


In [44]:
import plotly.graph_objects as go
from scipy.signal import find_peaks

def detect_hs_points(prices, distance=5, tolerance=0.05):
    """Return index positions of (LS, Head, RS) if pattern is found, else None."""
    peaks, _ = find_peaks(prices, distance=distance)
    results = []
    for i in range(len(peaks) - 2):
        ls, head, rs = peaks[i], peaks[i+1], peaks[i+2]
        ls_val, head_val, rs_val = prices[ls], prices[head], prices[rs]
        if head_val > ls_val and head_val > rs_val:
            if abs(ls_val - rs_val) / max(ls_val, rs_val) < tolerance:
                results.append((ls, head, rs))
    return results

def plot_hs_patterns(df, symbol, window=100):
    df = df[df['symbol'] == symbol].sort_values('date').reset_index(drop=True)
    prices = df['close'].values
    dates = df['date']

    patterns = detect_hs_points(prices, distance=5)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=dates, y=prices, mode='lines', name='Close Price'))

    for ls, head, rs in patterns:
        fig.add_trace(go.Scatter(x=[dates[ls]], y=[prices[ls]], mode='markers+text',
                                 marker=dict(color='yellow', size=10), text=["LS"], name='Left Shoulder'))
        fig.add_trace(go.Scatter(x=[dates[head]], y=[prices[head]], mode='markers+text',
                                 marker=dict(color='red', size=10), text=["Head"], name='Head'))
        fig.add_trace(go.Scatter(x=[dates[rs]], y=[prices[rs]], mode='markers+text',
                                 marker=dict(color='green', size=10), text=["RS"], name='Right Shoulder'))

    fig.update_layout(title=f"H&S Pattern Detection for {symbol}",
                      xaxis_title="Date", yaxis_title="Close Price",
                      template='plotly_dark')
    fig.show()


In [None]:
plot_hs_patterns(df, symbol='PPPM')

