In [None]:
from datetime import datetime
from math import sqrt
import numpy as np
import pandas as pd
from pandas import read_csv, DataFrame, Series, concat
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from matplotlib.ticker import MaxNLocator
from matplotlib.patches import Patch
import matplotlib.patheffects as patheffects
import matplotlib.font_manager as fm
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.stattools import jarque_bera
from scipy.stats import t, ttest_rel
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    mean_absolute_percentage_error)
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from typing import Optional
import optuna
import joblib

# ---Libraries Setting ---
%matplotlib inline
sns.set_context('poster')
sns.set_style('white')
sns.set_color_codes()
plot_kwds = {'alpha' : 0.5, 's' : 80, 'linewidths':0}

import warnings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.dpi'] = 400
sns.set(rc = {'axes.facecolor': '#FBFBFB', 'figure.facecolor': '#FBFBFB'})
class clr:
    start = '\033[93m'+'\033[1m'
    color = '\033[93m'
    end = '\033[0m'

# ETL - Data

In [None]:
path = 'D:/Lainnya/Portofolio/Deret Waktu EDA/'
df = pd.read_csv(path + 'BBCA_Saham.csv')
df['Tanggal'] = pd.to_datetime(df['Tanggal']).dt.normalize()
full_index = pd.date_range('2020-01-02', '2025-10-27', freq='B')

df = df.set_index('Tanggal')
df = df.reindex(full_index)
df['Saham_BBCA'] = pd.to_numeric(df['Saham_BBCA'], errors='coerce')
df['Saham_BBCA'] = df['Saham_BBCA'].ffill() 
df.index.name = 'Tanggal'

print(f"Periode analisis: {df.index.min()} hingga {df.index.max()}")
print(f"Amatan Hilang: {df['Saham_BBCA'].isna().sum()}")

In [None]:
def make_dummy(df, start, end=None, impulse=False, linear_trend=False):
    start = pd.to_datetime(start)
    if end is not None:
        end = pd.to_datetime(end)
    
    if impulse:
        mask = (df.index == start)
        return mask.astype(int)
    elif linear_trend:
        mask = (df.index >= start) & (df.index <= end)
        trend = np.zeros(len(df))
        idx = np.where(mask)[0]
        if len(idx) > 0:
            trend[idx] = np.arange(1, len(idx) + 1)
        return trend
    else:
        if end is not None:
            mask = (df.index >= start) & (df.index <= end)
        else:
            mask = (df.index >= start)
        return mask.astype(int)

df['dummy_covid_lockdown'] = make_dummy(df, '2020-03-15', '2020-06-30')
df['dummy_covid_delta'] = make_dummy(df, '2021-06-15', '2021-08-31')
df['dummy_covid_omicron'] = make_dummy(df, '2022-01-15', '2022-02-28')
df['dummy_bi_rate_cut'] = make_dummy(df, '2020-02-01', '2020-11-30')
df['dummy_bi_rate_hike'] = make_dummy(df, '2022-08-01', '2023-12-31')
df['trend_recovery'] = make_dummy(df, '2021-09-01', '2022-12-31', linear_trend=True)
df['dummy_election_2024'] = make_dummy(df, '2023-11-01', '2024-02-29')
df['dummy_banking_crisis'] = make_dummy(df, '2023-03-10', '2023-04-30')
df['dummy_normalization'] = make_dummy(df, '2023-01-01', '2025-10-27')
df['dummy_covid_panic'] = make_dummy(df, '2020-02-20', '2020-03-14')
df['dummy_rally_2024'] = make_dummy(df, '2024-01-01', '2025-10-27')
df['dummy_menkeu_purbaya'] = make_dummy(df, '2025-09-08', '2025-10-27')

In [None]:
df

# EDA

## Plot Time Series-Trend

### Plot BBCA

In [None]:
plt.rcParams['font.family'] = 'DejaVu Sans'
sns.set(style='whitegrid', context='talk', palette='colorblind')

fig, ax = plt.subplots(figsize=(16, 8))
sns.lineplot(data=df, x=df.index, y='Saham_BBCA', ax=ax, linewidth=2.5, color='tab:blue')
max_date = df['Saham_BBCA'].idxmax()
min_date = df['Saham_BBCA'].idxmin()
max_price = df['Saham_BBCA'].max()
min_price = df['Saham_BBCA'].min()

ax.scatter(max_date, max_price, color='crimson', s=80, zorder=5, label=f'Max: {max_price:.2f} per Lembar')
ax.scatter(min_date, min_price, color='darkgreen', s=80, zorder=5, label=f'Min: {min_price:.2f} per Lembar')
ax.legend(loc='upper left', fontsize=12, frameon=True)
ax.set_title('Pola Pergerakan Saham BBCA 2020 - 2025', fontsize=22, fontweight='bold', pad=20)
ax.set_xlabel('Tanggal', fontsize=16, labelpad=10)
ax.set_ylabel('Lembar Saham', fontsize=16, labelpad=10)

# Format sumbu X per 5 bulan
locator = mdates.MonthLocator(interval=5)
formatter = mdates.DateFormatter('%b %Y')
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
fig.autofmt_xdate()
ax.yaxis.grid(True, which='major', linestyle='--', linewidth=0.6, alpha=0.7)
ax.xaxis.grid(False)
sns.despine()
plt.tight_layout(pad=2)
plt.show()

### Plot Tahunan

In [None]:
sns.set_style("whitegrid", {
    'axes.edgecolor': '0.8',
    'axes.linewidth': 0.5,
    'grid.color': '0.85',
    'grid.linestyle': '--',
    'axes.titlepad': 15})
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['figure.titlesize'] = 18
plt.rcParams['figure.titleweight'] = 'bold'

event_colors = {
    'dummy_covid_panic': '#FF6347',           
    'dummy_covid_lockdown': '#4682B4',        
    'dummy_bi_rate_cut': '#32CD32',           
    'dummy_covid_delta': '#CD5C5C',           
    'trend_recovery': '#9ACD32',             
    'dummy_covid_omicron': '#BA55D3',         
    'dummy_bi_rate_hike': '#FF8C00',          
    'dummy_banking_crisis': '#DC143C',        
    'dummy_normalization': '#20B2AA',         
    'dummy_election_2024': '#9370DB',         
    'dummy_rally_2024': '#FFD700',          
    'dummy_menkeu_purbaya': '#FF4500',}

event_labels = {
    'dummy_covid_panic': 'COVID Panic (Feb-Mar 2020)',
    'dummy_covid_lockdown': 'PSBB Lockdown (Mar-Jun 2020)',
    'dummy_bi_rate_cut': 'BI Rate Cut (Feb-Nov 2020)',
    'dummy_covid_delta': 'COVID Delta Wave (Jun-Aug 2021)',
    'trend_recovery': 'Recovery Trend (Sep 2021-Dec 2022)',
    'dummy_covid_omicron': 'COVID Omicron (Jan-Feb 2022)',
    'dummy_bi_rate_hike': 'BI Rate Hike (Aug 2022-Dec 2023)',
    'dummy_banking_crisis': 'Global Banking Crisis (Mar-Apr 2023)',
    'dummy_normalization': 'Economic Normalization (2023-2025)',
    'dummy_election_2024': 'Election Period (Nov 2023-Feb 2024)',
    'dummy_rally_2024': 'Market Rally (2024-2025)',
    'dummy_menkeu_purbaya': 'Menkeu Purbaya (Sep-Oct 2025)',}

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(24, 14), gridspec_kw={'hspace': 0.35, 'wspace': 0.15})
axes = axes.flatten()
price_color = '#1a3e72'

for i, year in enumerate(range(2020, 2026)):
    if i >= len(axes):
        break
    ax = axes[i]
    yearly_data = df[df.index.year == year]
    
    if yearly_data.empty:
        fig.delaxes(ax)
        continue
    
    ax.plot(yearly_data.index, yearly_data['Saham_BBCA'], color=price_color, linewidth=2.8, zorder=1, label='BBCA Price')
    used_label_positions = []
    y_range = yearly_data['Saham_BBCA'].max() - yearly_data['Saham_BBCA'].min()
    y_min = yearly_data['Saham_BBCA'].min()
    
    for dummy, color in event_colors.items():
        if dummy in df.columns:
            event_mask = (df[dummy] == 1) & (df.index.year == year)
            event_dates = df[event_mask].index
            
            if len(event_dates) > 0:
                start_date = event_dates.min()
                end_date = event_dates.max()
                ax.axvspan(start_date, end_date, color=color, alpha=0.25, zorder=2)
                
                if (end_date - start_date).days > 5:
                    for attempt in np.linspace(0.85, 0.35, 6):
                        y_pos = y_min + y_range * attempt
                        
                        collision = False
                        for existing_pos in used_label_positions:
                            if abs(y_pos - existing_pos) < y_range * 0.15:
                                collision = True
                                break
                                
                        if not collision:
                            used_label_positions.append(y_pos)
                            mid_date = start_date + (end_date - start_date)/2
                            
                            ax.text(mid_date, y_pos, event_labels[dummy],fontsize=8.5, color='black', ha='center',
                                    bbox=dict(boxstyle='round,pad=0.3',facecolor='white',edgecolor=color, linewidth=1,
                                              alpha=0.85),zorder=4)
                            break
                        
    if not yearly_data.empty:
        max_date = yearly_data['Saham_BBCA'].idxmax()
        min_date = yearly_data['Saham_BBCA'].idxmin()
        max_val = yearly_data['Saham_BBCA'].max()
        min_val = yearly_data['Saham_BBCA'].min()

        ax.scatter([max_date], [max_val], color='#006400', s=80, zorder=5, edgecolor='white', linewidth=1.2, marker='^')
        ax.scatter([min_date], [min_val], color='#8B0000', s=80, zorder=5, edgecolor='white', linewidth=1.2, marker='v')
        ax.annotate(f'High: Rp{max_val:,.0f}', xy=(max_date, max_val),xytext=(5, 8), textcoords='offset points',
                    fontsize=9, color='#006400', ha='left', weight='bold',bbox=dict(boxstyle='round,pad=0.3', 
                                                                                    facecolor='white', edgecolor='#006400',alpha=0.8))
        ax.annotate(f'Low: Rp{min_val:,.0f}', xy=(min_date, min_val), xytext=(5, -18), textcoords='offset points',
                    fontsize=9, color='#8B0000', ha='left', weight='bold', bbox=dict(boxstyle='round,pad=0.3', facecolor='white',
                                                                                     edgecolor='#8B0000', alpha=0.8))
    
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    ax.xaxis.set_minor_locator(mdates.MonthLocator())
    ax.tick_params(axis='x', which='major', rotation=45, labelsize=9.5, pad=3)
    ax.tick_params(axis='y', labelsize=9.5)
    ax.grid(axis='y', which='major', linestyle=':', linewidth=0.7, alpha=0.6)
    ax.grid(axis='x', which='major', linestyle=':', linewidth=0.3, alpha=0.3)
    
    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)
    ax.set_title(f'BBCA Stock Price - {year}', fontsize=13, pad=12, weight='bold')
    
    if i % 3 == 0:
        ax.set_ylabel('Price (IDR)', fontsize=11, labelpad=8, weight='bold')
    else:
        ax.set_ylabel('')
    ax.set_xlabel('')

for j in range(len(range(2020, 2026)), len(axes)):
    fig.delaxes(axes[j])

plt.suptitle('Pola Pergerakan Saham BBCA dengan Fenomena Ekonomi di Indonesia (2020-2025)',fontsize=17, y=0.995, weight='bold')
plt.tight_layout(pad=2.5, h_pad=2.5, w_pad=2.0)
plt.show()

### Plot M-to-M

In [None]:
df_filtered = df.copy()
monthly_by_year = df_filtered.groupby([df_filtered.index.year, df_filtered.index.month])['Saham_BBCA'].mean()
monthly_mean_of_means = monthly_by_year.groupby(level=1).mean()
month_order = list(range(1, 13))
month_names = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
monthly_values = [monthly_mean_of_means.get(m, np.nan) for m in month_order]
monthly_mean_series = pd.Series(monthly_values, index=month_names)
plt.figure(figsize=(14,7))
sns.set_style('whitegrid')
palette = sns.color_palette("viridis", len(monthly_mean_series))
bars = sns.barplot(x=monthly_mean_series.index, y=monthly_mean_series.values, palette=palette, edgecolor='black')
plt.plot(monthly_mean_series.index, monthly_mean_series.values, color='darkblue', marker='o', linewidth=2, 
         label='Tren (Rata-Rata Bulanan)')

for bar, value in zip(bars.patches, monthly_mean_series.values):
    height = np.nan_to_num(value, nan=0.0)
    label = f'{value:,.0f}' if not np.isnan(value) else 'n/a'
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        height + (0.01 * (monthly_mean_series.max() - monthly_mean_series.min() if monthly_mean_series.max()!=monthly_mean_series.min() else 1)),
        label, ha='center',va='bottom',fontsize=10, fontweight='bold', color='black', 
        path_effects=[patheffects.withStroke(linewidth=1.2, foreground='white')])

plt.title('Rata Rata Tren Harga per Lembar Saham BCA per Bulan)', fontsize=16, fontweight='bold')
plt.ylabel('Rata Rata', fontsize=13)
plt.xlabel('Bulan', fontsize=13)
ymin = np.nanmin(monthly_mean_series.values)
ymax = np.nanmax(monthly_mean_series.values)
if np.isfinite(ymin) and np.isfinite(ymax):
    plt.ylim(ymin * 0.98, ymax * 1.02)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()


### Plot D-to-D

In [None]:
df_filtered = df[~((df.index.year == 2025) & (df.index.month >= 7))]
df_filtered['weekday'] = df_filtered.index.weekday
df_filtered = df_filtered[df_filtered['weekday'] <= 4]
weekday_avg = df_filtered.groupby('weekday')['Saham_BBCA'].mean().reset_index()

weekday_mapping = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri'}
weekday_avg['weekday'] = weekday_avg['weekday'].map(weekday_mapping)
weekday_avg['weekday'] = pd.Categorical(
    weekday_avg['weekday'],
    categories=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'],
    ordered=True)
weekday_avg = weekday_avg.sort_values('weekday')

plt.figure(figsize=(10, 6))
palette = sns.color_palette("cubehelix", len(weekday_avg))
bars = sns.barplot(x='weekday', y='Saham_BBCA', data=weekday_avg, palette=palette, 
                  edgecolor='black', order=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'])
line = plt.plot(weekday_avg['weekday'], weekday_avg['Saham_BBCA'], 
               color='darkred', marker='o', linewidth=2, label='Trend')
for i, (day, saham_bbca) in enumerate(zip(weekday_avg['weekday'], weekday_avg['Saham_BBCA'])):
    plt.text(
        i,  
        saham_bbca + 0.1,
        f'{saham_bbca:.2f}', 
        ha='center',
        va='bottom',
        fontsize=10,
        fontweight='bold',
        color='black',
        path_effects=[patheffects.withStroke(linewidth=2, foreground='white')])

plt.title('Rata Rata Harga Saham BBCA per Hari Kerja dalam Seminggu', 
          fontsize=14, fontweight='bold', pad=20)
plt.ylabel('Rata Lembar Saham', fontsize=12)
plt.xlabel('Weekday', fontsize=12)
min_price = weekday_avg['Saham_BBCA'].min()
max_price = weekday_avg['Saham_BBCA'].max()
plt.ylim(min_price - 0.5, max_price + 0.5)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.figtext(0.5, 0.01, 
           'Note: Data Mingguan untuk Hari Sabtu dan Minggu tidak termasuk karena pasar tutup dan juga bulan Nov-Des 2025 tidak lengkap.', 
           ha='center', fontsize=10, style='italic')
plt.tight_layout()
plt.show()

### Plot Tanggal-to-Tanggal

In [None]:
df_analysis = df.copy()
df_analysis['day'] = df_analysis.index.day
day_counts = df_analysis.groupby('day').size().reset_index(name='count')
daily_avg = df_analysis.groupby('day')['Saham_BBCA'].mean().reset_index()
daily_stats = daily_avg.merge(day_counts, on='day')

plt.figure(figsize=(18, 7))
palette = sns.color_palette("coolwarm", len(daily_stats))
sorted_idx = daily_stats['Saham_BBCA'].argsort()
colors_mapped = [palette[i] for i in sorted_idx.argsort()]
bars = plt.bar(daily_stats['day'], daily_stats['Saham_BBCA'], color=colors_mapped, edgecolor='black', linewidth=0.8, alpha=0.85)
line = plt.plot(daily_stats['day'], daily_stats['Saham_BBCA'], color='#1a3e72', marker='o', linewidth=2.5, markersize=7, 
                label='Tren Amatan', zorder=3)

for i, row in daily_stats.iterrows():
    plt.text(row['day'], row['Saham_BBCA'] + (daily_stats['Saham_BBCA'].max() - daily_stats['Saham_BBCA'].min()) * 0.015,
        f"{row['Saham_BBCA']:,.0f}",ha='center',va='bottom',fontsize=8.5, fontweight='bold', color='black',
        path_effects=[patheffects.withStroke(linewidth=2.5, foreground='white')])

for i, row in daily_stats.iterrows():
    plt.text(row['day'], daily_stats['Saham_BBCA'].min() - (daily_stats['Saham_BBCA'].max() - daily_stats['Saham_BBCA'].min()) * 0.08,
        f"n={row['count']}", ha='center', va='top', fontsize=7, color='#555555', alpha=0.7)

plt.title('Rata Rata Harga Saham BCA dari Tanggal Kalender \nJanuary 2, 2020 – October 27, 2025 (Hari Kerja)',
          fontsize=15, fontweight='bold', pad=20)
plt.ylabel('Harga Rata Rata Lembar Saham', fontsize=12, fontweight='bold')
plt.xlabel('Hari dalam Bulan', fontsize=12, fontweight='bold')
plt.xticks(ticks=daily_stats['day'], labels=daily_stats['day'], fontsize=10)

data_min = daily_stats['Saham_BBCA'].min()
data_max = daily_stats['Saham_BBCA'].max()
y_margin = (data_max - data_min) * 0.15
plt.ylim(data_min - y_margin, data_max + y_margin * 0.8)
plt.grid(axis='y', linestyle='--', alpha=0.4, linewidth=0.8)
plt.grid(axis='x', linestyle=':', alpha=0.2)
plt.legend(loc='upper left', framealpha=0.95, fontsize=10, edgecolor='gray')
plt.figtext(0.5, 0.02, 
            'Note: Tiap tanggal diberikan bobot tertimbang (2020-2025). '
            'jumalah observasi (n) ditampilkan di bawah tiap bar.',
            ha='center', fontsize=9, style='italic', color='#555555')
plt.tight_layout()
plt.subplots_adjust(bottom=0.12)
plt.show()

In [None]:
print("\n" + "="*60)
print("Analisis Statistik Harga per Tanggal:")
print("="*60)
print(f"Tanggal dengan harga rata-rata tertinggi: {daily_stats.loc[daily_stats['Saham_BBCA'].idxmax(), 'day']} "
      f"(Rp {daily_stats['Saham_BBCA'].max():,.0f})")
print(f"Tanggal dengan harga rata-rata terendah: {daily_stats.loc[daily_stats['Saham_BBCA'].idxmin(), 'day']} "
      f"(Rp {daily_stats['Saham_BBCA'].min():,.0f})")
print(f"Range harga: Rp {daily_stats['Saham_BBCA'].max() - daily_stats['Saham_BBCA'].min():,.0f}")
print(f"Standar deviasi: Rp {daily_stats['Saham_BBCA'].std():,.2f}")

## Uji T Dampak Kejadian

### Def Function Uji T

In [None]:
def analisis_jendela_event_weekdays_only(df, dummy_col, window_days=18, price_col='Saham_BBCA', title_event='Event'):

    event_start = df[df[dummy_col] == 1].index.min()
    if pd.isna(event_start):
        print(f"Tidak ditemukan event pada kolom '{dummy_col}'.")
        return

    start_window = event_start - pd.Timedelta(days=window_days*2) 
    end_window = event_start + pd.Timedelta(days=window_days*2)
    temp_df = df.loc[start_window:end_window, [price_col]].reset_index()
    temp_df.columns = ['Tanggal', price_col]
    temp_df = temp_df[temp_df['Tanggal'].dt.weekday < 5].copy()
    event_pos = temp_df.index[temp_df['Tanggal'] == event_start]
    if len(event_pos) == 0:
        print("Tanggal event tidak ditemukan dalam data setelah filter hari kerja.")
        return
    event_pos = event_pos[0]
    
    start_pos = max(0, event_pos - window_days)
    end_pos = min(len(temp_df) - 1, event_pos + window_days)
    event_window_df = temp_df.loc[start_pos:end_pos].copy()
    event_window_df['relative_day'] = np.arange(- (event_pos - start_pos), (end_pos - event_pos) + 1)

    plt.figure(figsize=(14, 7))
    plt.plot(event_window_df['relative_day'], event_window_df[price_col], marker='o', linestyle='-', color='#8B0000', 
             linewidth=2.5, markersize=6)
    plt.axvline(0, color='#1a3e72', linestyle='--', linewidth=2.5, label='Event Date', alpha=0.8)
    plt.axvspan(event_window_df['relative_day'].min(), 0, alpha=0.15, color='gray', label='Before Event')
    plt.axvspan(0, event_window_df['relative_day'].max(), alpha=0.15, color='orange', label='After Event')
    plt.title(f'{title_event}\n(±{window_days} Jendela Amatan Hari Kerja)', 
              fontsize=15, fontweight='bold', pad=20)
    plt.xlabel('Trading Days Relative to Event (Day 0 = Event Date)', fontsize=12, fontweight='bold')
    plt.ylabel('BBCA Stock Price (IDR)', fontsize=12, fontweight='bold')
    plt.grid(True, linestyle='--', alpha=0.4, linewidth=0.8)

    step = max(1, window_days // 4)
    for _, row in event_window_df.iterrows():
        if (row['relative_day'] % step == 0) or (row['relative_day'] == 0) or \
           (row['relative_day'] == event_window_df['relative_day'].min()) or \
           (row['relative_day'] == event_window_df['relative_day'].max()):
            if not pd.isna(row[price_col]):
                plt.text(row['relative_day'], row[price_col] + 
                        (event_window_df[price_col].max() - event_window_df[price_col].min()) * 0.02, 
                        f"Rp{row[price_col]:,.0f}", ha='center', fontsize=8.5, fontweight='bold',
                        path_effects=[patheffects.withStroke(linewidth=2, foreground="white")])
    
    plt.legend(loc='best', framealpha=0.95, fontsize=10)
    plt.tight_layout()
    plt.show()

    # Analisis Statistik: Uji T Berpasangan
    before_prices = event_window_df[event_window_df['relative_day'] < 0][price_col].dropna()
    after_prices = event_window_df[event_window_df['relative_day'] > 0][price_col].dropna() 
    mean_before = before_prices.mean()
    mean_after = after_prices.mean()
    min_len = min(len(before_prices), len(after_prices))
    before_trim = before_prices.iloc[-min_len:]
    after_trim = after_prices.iloc[:min_len]
    
    print("\n" + "="*70)
    print("ANALISIS STATISTIK: PAIRED T-TEST")
    print("="*70)
    print("\nHipotesis:")
    print("H0: Rata-rata harga sebelum event = rata-rata harga setelah event")
    print("H1: Rata-rata harga sebelum event ≠ rata-rata harga setelah event (two-tailed)")
    
    t_stat, p_val = ttest_rel(before_trim, after_trim)

    print("\n" + "-"*70)
    print("HASIL UJI STATISTIK:")
    print("-"*70)
    print(f"Event tanggal            : {event_start.strftime('%d %B %Y')}")
    print(f"Window analisis          : ±{window_days} hari kerja")
    print(f"Jumlah observasi sebelum : {len(before_prices)}")
    print(f"Jumlah observasi setelah : {len(after_prices)}")
    print(f"\nRata-rata harga sebelum event: Rp{mean_before:,.2f}")
    print(f"Rata-rata harga setelah event: Rp{mean_after:,.2f}")
    print(f"Perubahan rata-rata          : Rp{mean_after - mean_before:,.2f} ({((mean_after - mean_before)/mean_before)*100:.2f}%)")
    print(f"\nStatistik t              : {t_stat:.4f}")
    print(f"P-value (two-tailed)     : {p_val:.4f}")
    alpha = 0.05
    print("\n" + "-"*70)
    print(f"KESIMPULAN (α = {alpha}):")
    print("-"*70)
    if p_val < alpha:
        print(f"✓ TOLAK H0")
        print(f"  Terdapat perbedaan yang nyata secara statistik antara rata-rata")
        print(f"  harga saham BBCA sebelum dan setelah {title_event.lower()}.")
        if mean_after > mean_before:
            print(f"  → Harga saham cenderung NAIK setelah event.")
        else:
            print(f"  → Harga saham cenderung TURUN setelah event.")
    else:
        print(f"✗ GAGAL TOLAK H0")
        print(f"  Tidak ditemukan perbedaan yang signifikan secara statistik antara")
        print(f"  rata-rata harga saham BBCA sebelum dan setelah {title_event.lower()}.")
    print("="*70)

In [None]:
# Analisis Event COVID Lockdown
analisis_jendela_event_weekdays_only(df, 'dummy_covid_lockdown', window_days=30, 
                                     title_event='Pengaruh PSBB/Lockdown COVID-19 pada Harga Saham BBCA')

In [None]:
# Analisis Event COVID Lockdown
analisis_jendela_event_weekdays_only(df, 'dummy_menkeu_purbaya', window_days=20, 
                                     title_event='Pengaruh Pergantian Menteri Keuangan pada Harga Saham BBCA')

In [None]:
# Analisis Event COVID Lockdown
analisis_jendela_event_weekdays_only(df, 'dummy_election_2024', window_days=50, 
                                     title_event='Pengaruh Pemilu Serentak pada Harga Saham BBCA')

## Interaksi Bulan dan Hari

In [None]:
df['weekday'] = df.index.day_name()
df['month'] = df.index.month_name()
pivot_table = df.pivot_table(
    values='Saham_BBCA',
    index='weekday',
    columns='month',
    aggfunc='mean')

ordered_weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
ordered_months = ['January', 'February', 'March', 'April', 'May', 'June',
                  'July', 'August', 'September', 'October', 'November', 'December']

pivot_table = pivot_table.reindex(index=ordered_weekdays, columns=ordered_months)
pivot_table = pivot_table.round(2)
pivot_table

In [None]:
plt.rcParams['font.family'] = 'DejaVu Sans'
fig, ax = plt.subplots(figsize=(20, 10))
sns.heatmap(
    pivot_table,
    cmap='crest',        
    annot=True,
    fmt='.2f',
    linewidths=0.7,
    linecolor='white',
    cbar_kws={'label': 'Lembar Saham Rata-Rata'},
    ax=ax)

ax.set_title('Heatmap Harga Rata-Rata berdasarkan Hari dan Bulan', fontsize=18, fontweight='bold', pad=20)
ax.set_xlabel('Bulan', fontsize=14, labelpad=10)
ax.set_ylabel('Hari dalam Minggu', fontsize=14, labelpad=10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=11)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=11)
plt.tight_layout()
plt.show()

## Analisis Gap Ekstrem

In [None]:
df = df.sort_index()
df['pct_change'] = df['Saham_BBCA'].pct_change() * 100
threshold = 5
df['extreme_up'] = df['pct_change'] > threshold
df['extreme_down'] = df['pct_change'] < -threshold
df['event_active'] = df['dummy_menkeu_purbaya'] == 1
extreme_days = df[(df['extreme_up']) | (df['extreme_down'])][
    ['Saham_BBCA', 'pct_change', 'extreme_up', 'extreme_down', 'event_active']]
extreme_days_sorted = extreme_days.sort_values(by='pct_change', ascending=False)
extreme_days_sorted = extreme_days.sort_index()
extreme_days_sorted['Saham_BBCA'] = extreme_days_sorted['Saham_BBCA'].round(2)
extreme_days_sorted['pct_change'] = extreme_days_sorted['pct_change'].round(2)
extreme_days_sorted

In [None]:
plt.figure(figsize=(15, 6))
sns.lineplot(data=df, x=df.index, y='Saham_BBCA', color='gray', label='Lembar Saham')
sns.scatterplot(data=df[df['extreme_up']], x=df[df['extreme_up']].index, y='Saham_BBCA',
                color='green', s=100, label='Lonjakan >5%')
sns.scatterplot(data=df[df['extreme_down']], x=df[df['extreme_down']].index, y='Saham_BBCA',
                color='red', s=100, label='Penurunan >5%')
sns.scatterplot(data=df[df['event_active']], x=df[df['event_active']].index, y='Saham_BBCA',
                color='blue', marker='X', s=100, label='Event Aktif')
plt.title('Selisih: Lonjakan / Penurunan Tajam (>5%)', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Tanggal', fontsize=13)
plt.ylabel('Saham Lembar', fontsize=13)
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df = df.sort_index()
if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index)
year_ranges = [(2020, 2021), (2022, 2023), (2024, 2025)]

fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(2, 2, hspace=0.25, wspace=0.25, height_ratios=[1, 1], width_ratios=[1, 1])
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
axes = [ax1, ax2, ax3]
all_handles = []
all_labels = []

for i, (start_year, end_year) in enumerate(year_ranges):
    ax = axes[i]
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]
    if df_sub.empty:
        ax.set_visible(False)
        continue
    ln_price = ax.plot(df_sub.index, df_sub['Saham_BBCA'],color='gray', linewidth=1.5, alpha=0.8, label='Harga')
    all_handles += ln_price
    all_labels += ['Harga']
    
    # Plot lonjakan ekstrem
    if df_sub['extreme_up'].sum() > 0:
        sc_up = ax.scatter(df_sub[df_sub['extreme_up']].index,df_sub.loc[df_sub['extreme_up'], 'Saham_BBCA'],color='#00AA00', 
                           s=110, marker='^', label='Lonjakan >5%', zorder=4, edgecolor='white', linewidth=1.2)
        all_handles.append(sc_up)
        all_labels.append('Lonjakan >5%')
    
    # Plot penurunan ekstrem 
    if df_sub['extreme_down'].sum() > 0:
        sc_down = ax.scatter(df_sub[df_sub['extreme_down']].index, df_sub.loc[df_sub['extreme_down'], 'Saham_BBCA'], color='#CC0000',
                             s=110, marker='v', label='Penurunan >5%', zorder=4, edgecolor='white', linewidth=1.2)
        all_handles.append(sc_down)
        all_labels.append('Penurunan >5%')
    
    # Dummy kejadian berlangsung
    if df_sub['event_active'].sum() > 0:
        sc_ev = ax.scatter(df_sub[df_sub['event_active']].index, df_sub.loc[df_sub['event_active'], 'Saham_BBCA'], color='#0066CC', 
                           marker='X', s=140, label='Event Aktif', zorder=5, edgecolor='white', linewidth=1.0, alpha=0.85)
        all_handles.append(sc_ev)
        all_labels.append('Event Aktif')
    
    ax.set_title(f'Periode {start_year} – {end_year}', fontsize=14, fontweight='bold', pad=12)
    ax.set_xlabel('Tanggal', fontsize=11, fontweight='bold')
    ax.set_ylabel('Harga Saham BBCA (IDR)', fontsize=11, fontweight='bold')
    ax.tick_params(axis='x', rotation=45, labelsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.grid(True, linestyle='--', alpha=0.35, linewidth=0.8)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'Rp{x:,.0f}'))
    n_up = int(df_sub['extreme_up'].sum())
    n_down = int(df_sub['extreme_down'].sum())
    n_event = int(df_sub['event_active'].sum())
    total_days = len(df_sub)
    stats_text = f"Lonjakan: {n_up} | Penurunan: {n_down}"
    if n_event > 0:
        stats_text += f" | Event Days: {n_event}"
    ax.text(0.02, 0.98, stats_text,transform=ax.transAxes,fontsize=9,verticalalignment='top',
            bbox=dict(boxstyle='round,pad=0.3', facecolor='wheat', alpha=0.6))
    
# Legenda
unique = {}
for h, lab in zip(all_handles, all_labels):
    if lab not in unique:
        unique[lab] = h
legend_labels = list(unique.keys())
legend_handles = [unique[k] for k in legend_labels]
fig.legend(legend_handles, legend_labels,loc='lower center',ncol=4,fontsize=11,frameon=True,framealpha=0.95,
           facecolor='white',edgecolor='0.7',title='Legenda',title_fontsize=12, bbox_to_anchor=(0.5, -0.02), borderpad=0.6)
plt.suptitle('Selisih: Pergerakan Lonjakan Harga Saham BBCA per Periode\n(Threshold: >5%)',
             fontsize=17, fontweight='bold', y=0.98)
plt.tight_layout(rect=[0, 0.03, 1, 0.96])
plt.show()

In [None]:
# Ringkasan Statistik Pergerakan Lonjakan per Periode
print("\n" + "="*80)
print("RINGKASAN PERGERAKAN LONJAKAN PER PERIODE")
print("="*80)
for start_year, end_year in year_ranges:
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]
    total_days = len(df_sub)
    n_up = int(df_sub['extreme_up'].sum()) if total_days>0 else 0
    n_down = int(df_sub['extreme_down'].sum()) if total_days>0 else 0
    pct_extreme = ((n_up + n_down) / total_days * 100) if total_days > 0 else 0
    
    print(f"\nPeriode {start_year}-{end_year}:")
    print(f"  Total hari trading   : {total_days}")
    print(f"  Lonjakan >5%         : {n_up} ({(n_up/total_days*100):.2f}%)" if total_days>0 else f"  Lonjakan >5%         : {n_up}")
    print(f"  Penurunan >5%        : {n_down} ({(n_down/total_days*100):.2f}%)" if total_days>0 else f"  Penurunan >5%        : {n_down}")
    print(f"  Total hari ekstrem   : {n_up + n_down} ({pct_extreme:.2f}%)" if total_days>0 else f"  Total hari ekstrem   : {n_up + n_down}")

## Jendela Standar Deviasi Bergerak

In [None]:
df = df.sort_index()
if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index)
df['rolling_mean_22'] = df['Saham_BBCA'].rolling(window=22, min_periods=1).mean()
df['rolling_std_22'] = df['Saham_BBCA'].rolling(window=22, min_periods=1).std()
df['upper_band'] = df['rolling_mean_22'] + df['rolling_std_22']
df['lower_band'] = df['rolling_mean_22'] - df['rolling_std_22']

In [None]:
plt.figure(figsize=(16, 7))
plt.plot(df.index, df['Saham_BBCA'], label='Harga Aktual', color='black', linewidth=1)
plt.plot(df.index, df['rolling_mean_22'], label='Rolling Mean (22 Hari)', color='orange', linewidth=2)
plt.fill_between(df.index, df['lower_band'], df['upper_band'], color='orange', alpha=0.2, label='±1 Std Dev')

plt.title('Pergerakan 22 Hari: Tren & Volatilitas Harga', fontsize=18, fontweight='bold')
plt.xlabel('Tanggal', fontsize=13)
plt.ylabel('Saham per Lembar', fontsize=13)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df['rolling_mean_22'] = df['Saham_BBCA'].rolling(window=22, min_periods=1).mean()
df['rolling_std_22'] = df['Saham_BBCA'].rolling(window=22, min_periods=1).std()
df['upper_band'] = df['rolling_mean_22'] + df['rolling_std_22']
df['lower_band'] = df['rolling_mean_22'] - df['rolling_std_22']
year_ranges = [('2020', '2021'), ('2022', '2023'), ('2024', '2025')]

fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(2, 2, hspace=0.35, wspace=0.25)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
axes = [ax1, ax2, ax3]

for i, (start, end) in enumerate(year_ranges):
    ax = axes[i]
    df_sub = df.loc[start:end]
    
    if df_sub.empty:
        ax.set_visible(False)
        continue
    
    ax.plot(df_sub.index, df_sub['Saham_BBCA'], 
            label='Harga Aktual', color='#1a3e72', linewidth=1.5, zorder=3)
    ax.plot(df_sub.index, df_sub['rolling_mean_22'], 
            label='Rolling Mean (22 Hari)', color='#FF6B35', linewidth=2.5, zorder=2)
    ax.fill_between(df_sub.index, df_sub['lower_band'], df_sub['upper_band'],
                    color='#FF6B35', alpha=0.2, label='±1 Std Dev', zorder=1)
    ax.set_title(f'Tren & Volatilitas: {start}–{end}', fontsize=14, fontweight='bold', pad=15)
    ax.set_xlabel('Tanggal', fontsize=11, fontweight='bold')
    ax.set_ylabel('Saham per Lembar)', fontsize=11, fontweight='bold')
    ax.grid(True, linestyle='--', alpha=0.4, linewidth=0.8)
    ax.tick_params(axis='x', rotation=45, labelsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'Rp{x:,.0f}'))
    
    avg_std = df_sub['rolling_std_22'].mean()
    max_price = df_sub['Saham_BBCA'].max()
    min_price = df_sub['Saham_BBCA'].min()
    price_range = max_price - min_price
    
    stats_text = (f"Range: Rp{price_range:,.0f}\n"
                 f"Avg Volatility: Rp{avg_std:,.0f}")

    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8, edgecolor='gray'), family='monospace')

handles, labels = axes[0].get_legend_handles_labels()

fig.legend(handles, labels, loc='lower center',bbox_to_anchor=(0.5, -0.02),ncol=len(labels),frameon=True,fontsize=11,
           framealpha=0.95, edgecolor='gray', fancybox=True, shadow=True)
fig.suptitle('Pergerakan 22 Hari: Analisis Tren & Volatilitas Harga Saham BBCA per Periode', 
             fontsize=17, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

In [None]:
# Ringkasan Statistika
print("\n" + "="*80)
print("RINGKASAN VOLATILITAS PER PERIODE (Pergerakan 22 Hari)")
print("="*80)
for start, end in year_ranges:
    df_sub = df.loc[start:end]
    
    if df_sub.empty:
        continue
    
    avg_price = df_sub['Saham_BBCA'].mean()
    avg_std = df_sub['rolling_std_22'].mean()
    max_price = df_sub['Saham_BBCA'].max()
    min_price = df_sub['Saham_BBCA'].min()
    price_range = max_price - min_price
    cv = (avg_std / avg_price * 100) if avg_price > 0 else 0  #Koefisien Ragam
    
    print(f"\nPeriode {start}-{end}:")
    print(f"  Harga rata-rata      : Rp{avg_price:,.2f}")
    print(f"  Harga tertinggi      : Rp{max_price:,.2f}")
    print(f"  Harga terendah       : Rp{min_price:,.2f}")
    print(f"  Range harga          : Rp{price_range:,.2f}")
    print(f"  Avg Std Dev (22 hari): Rp{avg_std:,.2f}")
    print(f"  Coefficient of Var   : {cv:.2f}% (volatilitas relatif)")
    
print("\n" + "="*80)
print("INTERPRETASI:")
print("="*80)
print("• Semakin lebar area ±1 Std Dev → Semakin tinggi volatilitas harga")
print("• Harga aktual sering keluar dari band → Pergerakan tidak terprediksi")
print("• Coefficient of Variation tinggi → Volatilitas relatif terhadap harga tinggi")

## Cek Anomali Pencilan

In [None]:
df['z_score'] = (df['Saham_BBCA'] - df['Saham_BBCA'].mean()) / df['Saham_BBCA'].std()
threshold = 2
df['anomaly_zscore'] = df['z_score'].abs() > threshold
anomalies_zscore = df[df['anomaly_zscore']][['Saham_BBCA', 'z_score']]
anomalies_zscore

In [None]:
thresholds = [2.0, 2.2, 2.5, 3.0]
fig, axes = plt.subplots(2, 2, figsize=(18, 12), sharex=True, sharey=True)
axes = axes.flatten()

for i, thresh in enumerate(thresholds):
    col_name = f'anomaly_zscore_{thresh}'
    df[col_name] = df['z_score'].abs() > thresh
    
    ax = axes[i]
    sns.lineplot(data=df, x=df.index, y='Saham_BBCA', ax=ax, color='black', label='Harga', linewidth=1.2)
    sns.scatterplot(
        data=df[df[col_name]], 
        x=df[df[col_name]].index, y='Saham_BBCA',
        ax=ax, color='red', s=80, label='Anomali', zorder=5)
    
    ax.set_title(f'Z-score Anomaly Detection (Threshold = {thresh})', fontsize=14, fontweight='bold', pad=14)
    ax.set_xlabel('Tanggal', fontsize=12, labelpad=8)
    ax.set_ylabel('Harga (USD)', fontsize=12, labelpad=8)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.tick_params(axis='x', rotation=45)
    ax.xaxis.set_major_locator(plt.MaxNLocator(6))
    for label in ax.get_xticklabels():
        label.set_horizontalalignment('right')

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(
    handles, labels,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.05),
    ncol=len(labels),
    frameon=True,
    fontsize=13,
    framealpha=0.9)

plt.subplots_adjust(hspace=0.3, wspace=0.15, bottom=0.18)
plt.suptitle('Deteksi Pencilan dengan Z-score untuk Berbagai Ambang Batas', fontsize=18, fontweight='bold', y=0.95)
plt.tight_layout(rect=[0, 0.05, 1, 0.95])
plt.show()

In [None]:
df['z_score'] = (df['Saham_BBCA'] - df['Saham_BBCA'].mean()) / df['Saham_BBCA'].std()
threshold = 2
df['anomaly_zscore_2'] = df['z_score'].abs() > threshold
year_ranges = [(2020, 2021), (2022, 2023), (2024, 2025)]

plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['axes.labelsize'] = 13
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['legend.fontsize'] = 12

fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(2, 2, hspace=0.35, wspace=0.25)
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[0, 1])
ax3 = fig.add_subplot(gs[1, :])
axes = [ax1, ax2, ax3]

for i, (start_year, end_year) in enumerate(year_ranges):
    ax = axes[i]
    
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]
    
    if df_sub.empty:
        ax.set_visible(False)
        continue
    
    sns.lineplot(data=df_sub, x=df_sub.index, y='Saham_BBCA', 
                ax=ax, color='#1a3e72', label='Harga', linewidth=1.8, alpha=0.8)
    
    if df_sub['anomaly_zscore_2'].sum() > 0:
        sns.scatterplot(
            data=df_sub[df_sub['anomaly_zscore_2']], x=df_sub[df_sub['anomaly_zscore_2']].index,y='Saham_BBCA', 
            ax=ax, color='#DC143C', s=120, label='Anomali (|Z| > 2)', zorder=5, edgecolor='white',linewidth=1.5)
    
    ax.set_title(f'Deteksi Pencilan Z-score: {start_year}-{end_year}', fontsize=14, fontweight='bold', pad=14)
    ax.set_xlabel('Tanggal', labelpad=8, fontweight='bold')
    ax.set_ylabel('Harga Saham per Lembar', labelpad=8, fontweight='bold')
    ax.grid(True, linestyle='--', alpha=0.4, linewidth=0.8)
    ax.tick_params(axis='x', rotation=45)
    ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'Rp{x:,.0f}'))
    ax.xaxis.set_major_locator(MaxNLocator(nbins=8))
    
    for label in ax.get_xticklabels():
        label.set_horizontalalignment('right')
    
    n_anomalies = df_sub['anomaly_zscore_2'].sum()
    pct_anomalies = (n_anomalies / len(df_sub) * 100) if len(df_sub) > 0 else 0
    avg_zscore = df_sub['z_score'].abs().mean()
    max_zscore = df_sub['z_score'].abs().max()
    stats_text = (f"Pencilan: {n_anomalies} ({pct_anomalies:.1f}%)\n"
                 f"Avg |Z|: {avg_zscore:.2f}\n"
                 f"Max |Z|: {max_zscore:.2f}")
    
    ax.text(0.02, 0.98, stats_text, transform=ax.transAxes, fontsize=9, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='lightcoral', alpha=0.7, edgecolor='darkred'),family='monospace')

handles, labels = axes[0].get_legend_handles_labels()

fig.legend(handles, labels, loc='lower center',bbox_to_anchor=(0.5, -0.02), ncol=len(labels),frameon=True, fontsize=12,
           framealpha=0.95,edgecolor='gray',fancybox=True, shadow=True)
fig.suptitle('Deteksi Pencilan Harga Saham BBCA dengan Z-score (Threshold = 2)', 
             fontsize=17, fontweight='bold', y=0.995)
plt.tight_layout()
plt.show()

In [None]:
# Ringkasan Statistik
print("\n" + "="*80)
print("RINGKASAN DETEKSI PENCILAN PER PERIODE (Z-score Threshold = 2)")
print("="*80)
for start_year, end_year in year_ranges:
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]
    
    if df_sub.empty:
        continue
    
    n_anomalies = df_sub['anomaly_zscore_2'].sum()
    total_days = len(df_sub)
    pct_anomalies = (n_anomalies / total_days * 100) if total_days > 0 else 0
    avg_zscore = df_sub['z_score'].abs().mean()
    max_zscore = df_sub['z_score'].abs().max()
    min_zscore = df_sub['z_score'].min()
    max_zscore_full = df_sub['z_score'].max()
    
    print(f"\nPeriode {start_year}-{end_year}:")
    print(f"  Total hari trading    : {total_days}")
    print(f"  Jumlah Pencilan        : {n_anomalies} ({pct_anomalies:.2f}%)")
    print(f"  Avg |Z-score|         : {avg_zscore:.3f}")
    print(f"  Max |Z-score|         : {max_zscore:.3f}")
    print(f"  Z-score range         : [{min_zscore:.3f}, {max_zscore_full:.3f}]")
    
    # Identifikasi tanggal dengan anomali tertinggi
    if n_anomalies > 0:
        top_anomalies = df_sub[df_sub['anomaly_zscore_2']].nlargest(3, 'z_score', keep='all')
        print(f"  Top 3 anomali:")
        for idx, row in top_anomalies.iterrows():
            print(f"    • {idx.strftime('%d %b %Y')}: Rp{row['Saham_BBCA']:,.0f} (Z={row['z_score']:.2f})")

print("\n" + "="*80)
print("INTERPRETASI Z-SCORE:")
print("="*80)
print("• |Z| < 2   : Normal (95% data)")
print("• |Z| ≥ 2   : Pencilan (outlier, perlu investigasi)")
print("• |Z| > 3   : Pencilan ekstrem (sangat jarang terjadi)")
print("• Z positif : Harga di atas rata-rata")
print("• Z negatif : Harga di bawah rata-rata")

## Finalisasi df

In [None]:
df = df.sort_index()

df['pct_change'] = df['Saham_BBCA'].pct_change() * 100
threshold_gap = 5
df['extreme_up'] = df['pct_change'] > threshold_gap
df['extreme_down'] = df['pct_change'] < -threshold_gap
df['dummy_gap'] = ((df['extreme_up']) | (df['extreme_down'])).astype(int)
threshold_anom = 2
df['dummy_anomaly'] = (df['z_score'].abs() > threshold_anom).astype(int)
cols_to_keep = ['Saham_BBCA'] + [col for col in df.columns if col.startswith('dummy_')]
df_final = df[cols_to_keep].copy()
df_final

In [None]:
#df_final.to_csv('final_data_with_dummy_anomaly.csv', index=True)