# 1. Prepare the data

## Import Library and Dataset

In [None]:
# Import necessary libraries and modules

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
# Load dataset
stock_df = pd.read_csv('stock_trend.csv')

## Exploratory Data Analysis (EDA)

In [None]:
# Inspect first 10 rows
stock_df.head(10)

In [None]:
# Inspect last 10 rows
stock_df.tail(10)

In [None]:
# Check the shape of the dataset through row and column
stock_df.shape

In [None]:
# Check data types of each column
stock_df.dtypes

In [None]:
# Ensure Quote is string and zero-padded to 4 digits
stock_df["Quote"] = stock_df["Quote"].astype(str).str.zfill(4)

In [None]:
# Check again data types for each column
stock_df.dtypes

In [None]:
# Inspect first 10 rows
stock_df.head(10)

In [None]:
# Check for missing values in the dataset
stock_df.isnull().sum()

In [None]:
# Summary of statistics for whole dataset
stock_df.describe()

In [None]:
# Summary statistics for stock prices
stock_df[['Before', 'After']].describe()

In [None]:
# Raw Price Movement
# Calculate price movement
stock_df['Price_Movement'] = stock_df['After'] - stock_df['Before']

# Classify price movement direction
# 'up' for increase, 'down' for decrease, 'no_change' for no change
direction = stock_df['Price_Movement'].apply(lambda x: 'up' if x > 0 else ('down' if x < 0 else 'no_change'))

# Count occurrences of each direction
direction.value_counts()


In [None]:
# Create a temporary datetime column 
temp_time = pd.to_datetime(stock_df["Time"])

# Extract date into a temporary variable
temp_date = temp_time.dt.date

# Compute daily average price movement
daily_avg_movement = stock_df["Price_Movement"].groupby(temp_date).mean()

# Plot
plt.figure(figsize=(12,5))
daily_avg_movement.plot()

plt.title("Overall Market Movement Over Time (Daily Average Price Change)")
plt.xlabel("Date")
plt.ylabel("Average Price Change (RM)")
plt.grid(True, linestyle="--", alpha=0.4)
plt.show()



In [None]:
# Create a temporary absolute change Series
temp_abs_change = stock_df["Price_Movement"].abs()

# Create a temporary datetime column
temp_time = pd.to_datetime(stock_df["Time"])
temp_date = temp_time.dt.date

# Group by date to get daily mean absolute price change
daily_vol = temp_abs_change.groupby(temp_date).mean()

# Plot
plt.figure(figsize=(12,4))
daily_vol.plot(color="purple")

plt.title("Market Volatility Over Time (Mean Absolute Daily Price Change)")
plt.xlabel("Date")
plt.ylabel("Mean |Change| (RM)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()


## Data Preprocessing

### Compute Relative Price Change (%)

In [None]:
# Compute relative price change percentage
stock_df["Relative_Price_Change"] = (stock_df["Price_Movement"] / stock_df["Before"]) * 100

# Inspect first 10 rows with new columns
stock_df.head(10)

### Label Function

In [None]:
# Label Function
def label_trend(percentage_change):
    if percentage_change > 10:
        return 'uptrend'
    elif percentage_change < -10:
        return 'downtrend'
    else:
        return 'flat'

### Assign Label

In [None]:
# Apply labeling function to create 'Trend_Label' column
stock_df['Trend_Label'] = stock_df['Relative_Price_Change'].apply(label_trend)

# Inspect first 10 rows with Trend_Label
stock_df.head(10)

### Check the class distribution for Trend_Label

In [None]:
# Count occurrences of each trend label
stock_df['Trend_Label'].value_counts()

In [None]:
# Keep only title and trend_label
stock_df = stock_df[['Title', 'Trend_Label']]

### Split dataset into train(70%), validate(20%) and test(10%)

In [None]:
# Split dataset into train, validate and test sets
train_df, temp_df = train_test_split(stock_df, test_size=0.3, random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=1/3, random_state=42)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))
print("Test size:", len(test_df))

In [None]:
# Inspect first 10 rows of training set
train_df.head(10)

In [None]:
# Inspect first 10 rows of validation set
val_df.head(10)

In [None]:
# Inspect first 10 rows of test set
test_df.head(10)

In [None]:
stock_df.sample(10)

In [None]:
stock_df.describe(include='all')