In [None]:
from datetime import datetime
from math import sqrt
import numpy as np
import pandas as pd
from pandas import read_csv, DataFrame, Series, concat
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
from matplotlib.ticker import MaxNLocator
from matplotlib.patches import Patch
import matplotlib.patheffects as patheffects
import matplotlib.font_manager as fm
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf, pacf, adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.stattools import jarque_bera
from scipy.stats import t, ttest_rel
from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    mean_absolute_percentage_error)
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from typing import Optional
import keras_tuner as kt
import optuna
import joblib

# ---Libraries Setting ---
%matplotlib inline
sns.set_context('poster')
sns.set_style('white')
sns.set_color_codes()
plot_kwds = {'alpha' : 0.5, 's' : 80, 'linewidths':0}

import warnings
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
plt.rcParams['figure.dpi'] = 400
sns.set(rc = {'axes.facecolor': '#FBFBFB', 'figure.facecolor': '#FBFBFB'})
class clr:
    start = '\033[93m'+'\033[1m'
    color = '\033[93m'
    end = '\033[0m'

# ETL Data

In [None]:
df = pd.read_csv('B.CPO Data.csv')
df['Date'] = pd.to_datetime(df['Date']).dt.normalize()
full_index = pd.date_range('2014-01-24', '2025-06-24', freq='B')

df = df.set_index('Date')
df = df.reindex(full_index)
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
df['Price'] = df['Price'].fillna(method='ffill')

def make_dummy(df, start, end=None, impulse=False, linear_trend=False):
    start = pd.to_datetime(start)
    if end is not None:
        end = pd.to_datetime(end)
    if impulse:
        mask = (df.index == start)
        return mask.astype(int)
    elif linear_trend:
        mask = (df.index >= start) & (df.index <= end)
        trend = np.zeros(len(df))
        idx = np.where(mask)[0]
        if len(idx) > 0:
            trend[idx] = np.arange(1, len(idx) + 1)
        return trend
    else:
        if end is not None:
            mask = (df.index >= start) & (df.index <= end)
        else:
            mask = (df.index >= start)
        return mask.astype(int)

# dummy event
df['dummy_glut'] = make_dummy(df, '2014-04-01', '2015-12-31')
df['dummy_opec_nocut'] = make_dummy(df, '2016-01-01', '2016-01-31')
df['dummy_brexit'] = make_dummy(df, '2016-06-24', '2016-09-16')
df['dummy_tradewar'] = make_dummy(df, '2018-07-01', '2020-01-31')
df['dummy_saudi_attack'] = make_dummy(df, '2019-09-14', '2019-09-20', impulse=True)
df['dummy_covid_lockdown'] = make_dummy(df, '2020-02-01', '2020-04-30')
df['dummy_oilwar'] = make_dummy(df, '2020-03-08', '2020-04-30', impulse=True)
df['dummy_suez'] = make_dummy(df, '2021-03-23', '2021-03-29', impulse=True)
df['trend_rebound'] = make_dummy(df, '2021-04-01', '2022-03-31', linear_trend=True)
df['dummy_ukraine'] = make_dummy(df, '2022-02-24', '2022-12-31')
df['dummy_sanction'] = make_dummy(df, '2022-03-01', '2023-06-30')
df['dummy_fed'] = make_dummy(df, '2022-03-01', '2023-06-30')
df['dummy_china_reopen'] = make_dummy(df, '2024-01-01', '2024-06-30')
df['dummy_iran_war'] = make_dummy(df, '2025-06-01', '2025-06-30')
df['dummy_opec_plus'] = make_dummy(df, '2016-12-01', '2017-12-31')
df['dummy_energy_transition'] = make_dummy(df, '2020-02-01', '2020-11-15')
df.index.name = 'date'
df

# EDA

## Plot Deret Waktu

### Plot Deret Waktu-Penuh

In [None]:
plt.rcParams['font.family'] = 'DejaVu Sans'
sns.set(style='whitegrid', context='talk', palette='colorblind')

fig, ax = plt.subplots(figsize=(16, 8))
sns.lineplot(data=df, x=df.index, y='Price', ax=ax, linewidth=2.5, color='tab:blue')
max_date = df['Price'].idxmax()
min_date = df['Price'].idxmin()
max_price = df['Price'].max()
min_price = df['Price'].min()

ax.scatter(max_date, max_price, color='crimson', s=80, zorder=5, label=f'Max: {max_price:.2f} USD')
ax.scatter(min_date, min_price, color='darkgreen', s=80, zorder=5, label=f'Min: {min_price:.2f} USD')
ax.legend(loc='upper left', fontsize=12, frameon=True)
ax.set_title('Brent Crude Oil Price Over Time', fontsize=22, fontweight='bold', pad=20)
ax.set_xlabel('Date', fontsize=16, labelpad=10)
ax.set_ylabel('Price (USD)', fontsize=16, labelpad=10)

# Format sumbu X per 6 bulan
locator = mdates.MonthLocator(interval=6)
formatter = mdates.DateFormatter('%b %Y')
ax.xaxis.set_major_locator(locator)
ax.xaxis.set_major_formatter(formatter)
fig.autofmt_xdate()
ax.yaxis.grid(True, which='major', linestyle='--', linewidth=0.6, alpha=0.7)
ax.xaxis.grid(False)
sns.despine()
plt.tight_layout(pad=2)
plt.show()

### Plot Deret Waktu-Tahunan

In [None]:
sns.set_style("whitegrid", {
    'axes.edgecolor': '0.8',
    'axes.linewidth': 0.5,
    'grid.color': '0.85',
    'grid.linestyle': '--',
    'axes.titlepad': 15
})
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['figure.titlesize'] = 18
plt.rcParams['figure.titleweight'] = 'bold'
event_colors = {
    'dummy_glut': '#FFA07A',        # Light Salmon - Supply glut
    'dummy_opec_nocut': '#20B2AA',  # Light Sea Green - OPEC decision
    'dummy_brexit': '#9370DB',      # Medium Purple - Political
    'dummy_tradewar': '#3CB371',    # Medium Sea Green - Trade
    'dummy_saudi_attack': '#FF6347',# Tomato - Supply shock
    'dummy_covid_lockdown': '#4682B4', # Steel Blue - Pandemic
    'dummy_oilwar': '#BA55D3',      # Medium Orchid - Price war
    'dummy_suez': '#FFD700',        # Gold - Supply disruption
    'trend_rebound': '#32CD32',     # Lime Green - Recovery
    'dummy_ukraine': '#CD5C5C',     # Indian Red - Geopolitical
    'dummy_sanction': '#87CEFA',    # Light Sky Blue - Sanctions
    'dummy_fed': '#FF8C00',         # Dark Orange - Monetary policy
    'dummy_china_reopen': '#9ACD32',# Yellow Green - Demand
    'dummy_iran_war': '#FF4500',    # Orange Red - Geopolitical
    'dummy_opec_plus': '#48D1CC',   # Medium Turquoise - OPEC+
    'dummy_energy_transition': '#DA70D6' # Orchid - Structural
}

event_labels = {
    'dummy_glut': 'Global Oil Glut (2014-15)',
    'dummy_opec_nocut': 'OPEC No Cut (2016)',
    'dummy_brexit': 'Brexit (2016)',
    'dummy_tradewar': 'US-China Trade War (2018-20)',
    'dummy_saudi_attack': 'Saudi Attack (2019)',
    'dummy_covid_lockdown': 'COVID Lockdowns (2020)',
    'dummy_oilwar': 'Oil Price War (2020)',
    'dummy_suez': 'Suez Blockage (2021)',
    'trend_rebound': 'Recovery Trend (2021-22)',
    'dummy_ukraine': 'Ukraine War (2022)',
    'dummy_sanction': 'Russia Sanctions (2022-23)',
    'dummy_fed': 'Fed Rate Hikes (2022-23)',
    'dummy_china_reopen': 'China Reopening (2024)',
    'dummy_iran_war': 'Iran Conflict (2025)',
    'dummy_opec_plus': 'OPEC+ Agreement (2017)',
    'dummy_energy_transition': 'Energy Transition (2020)'}

fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(25, 22), 
                        gridspec_kw={'hspace': 0.4, 'wspace': 0.1})
axes = axes.flatten()
price_color = '#1a3e72'

for i, year in enumerate(range(2014, 2026)):
    if i >= len(axes):
        break
    ax = axes[i]
    yearly_data = df[df.index.year == year]
    if yearly_data.empty:
        fig.delaxes(ax)
        continue
    ax.plot(yearly_data.index, yearly_data['Price'], 
            color=price_color, linewidth=2.8, zorder=1)
    used_label_positions = []
    y_range = ax.get_ylim()[1] - ax.get_ylim()[0]
    
    for dummy, color in event_colors.items():
        if dummy in df.columns:
            event_mask = (df[dummy] == 1) & (df.index.year == year)
            event_dates = df[event_mask].index
            
            if len(event_dates) > 0:
                start_date = event_dates.min()
                end_date = event_dates.max()

                ax.axvspan(start_date, end_date, color=color, 
                          alpha=0.25, zorder=2)
                
                if (end_date - start_date).days > 5:
                    # Smart label positioning
                    for attempt in np.linspace(0.85, 0.35, 6):
                        y_pos = ax.get_ylim()[0] + y_range * attempt
                        
                        collision = False
                        for existing_pos in used_label_positions:
                            if abs(y_pos - existing_pos) < y_range * 0.15:
                                collision = True
                                break
                                
                        if not collision:
                            used_label_positions.append(y_pos)
                            mid_date = start_date + (end_date - start_date)/2
                            
                            ax.text(mid_date, y_pos, event_labels[dummy],
                                    fontsize=9, color='black', ha='center',
                                    bbox=dict(boxstyle='round,pad=0.2',
                                             facecolor='white',
                                             edgecolor=color,
                                             linewidth=0.8,
                                             alpha=0.8),
                                    zorder=4)
                            break
    
    if not yearly_data.empty:
        max_date = yearly_data['Price'].idxmax()
        min_date = yearly_data['Price'].idxmin()
        max_val = yearly_data['Price'].max()
        min_val = yearly_data['Price'].min()

        ax.scatter([max_date], [max_val], color='#006400', s=70, 
                  zorder=5, edgecolor='white', linewidth=0.8)
        ax.scatter([min_date], [min_val], color='#8B0000', s=70,
                  zorder=5, edgecolor='white', linewidth=0.8)
        ax.annotate(f'H: {max_val:.1f}', xy=(max_date, max_val),
                   xytext=(5, 5), textcoords='offset points',
                   fontsize=10, color='#006400', ha='left',
                   bbox=dict(boxstyle='round,pad=0.2', 
                            facecolor='white', 
                            edgecolor='#006400',
                            alpha=0.7))
        ax.annotate(f'L: {min_val:.1f}', xy=(min_date, min_val),
                   xytext=(5, -15), textcoords='offset points',
                   fontsize=10, color='#8B0000', ha='left',
                   bbox=dict(boxstyle='round,pad=0.2',
                            facecolor='white',
                            edgecolor='#8B0000',
                            alpha=0.7))
    
    # x-xis format
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
    ax.xaxis.set_minor_locator(mdates.MonthLocator())
    ax.tick_params(axis='x', which='major', rotation=45, labelsize=9, pad=2)
    ax.tick_params(axis='y', labelsize=9)
    ax.grid(axis='y', which='major', linestyle=':', linewidth=0.7, alpha=0.6)
    ax.grid(axis='x', which='major', linestyle=':', linewidth=0.3, alpha=0.3)
    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)
    ax.set_title(f'Brent Crude Oil - {year}', fontsize=13, pad=12, weight='bold')
    if i % 3 == 0:
        ax.set_ylabel('Price (USD/bbl)', fontsize=11, labelpad=8)
    else:
        ax.set_ylabel('')
    ax.set_xlabel('')
for j in range(len(range(2014, 2026)), len(axes)):
    fig.delaxes(axes[j])

# Create a well-organized legend
#legend_elements = []
#for dummy, label in event_labels.items():
    #if dummy in df.columns:
        #legend_elements.append(
            #Patch(facecolor=event_colors[dummy], 
                  #edgecolor=event_colors[dummy],
                  #alpha=0.35,
                  #label=label))

# Position legend below with better formatting
#fig.legend(handles=legend_elements, 
           #loc='lower center', 
           #bbox_to_anchor=(0.5, 0.02),
           #ncol=3, 
           #fontsize=9,
           #frameon=True,
           #framealpha=0.9,
           #facecolor='white',
           #edgecolor='0.8',
           #title='Key Events',
           #title_fontsize=10)

plt.suptitle('Brent Crude Oil Price Dynamics with Major Market Events (2014-2025)',
             fontsize=18, y=1.02, weight='bold')
plt.tight_layout(pad=3.0, h_pad=2.5, w_pad=2.0)
plt.subplots_adjust(top=0.93, bottom=0.12)
plt.show()

### Plot Deret Waktu-Bulanan

In [None]:
df_filtered = df[~((df.index.year == 2025) & (df.index.month >= 7))]
df_filtered['month'] = df_filtered.index.month
monthly_avg = df_filtered.groupby('month')['Price'].mean()
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul',
            'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
monthly_avg.index = month_names

plt.figure(figsize=(14,7))
palette = sns.color_palette("viridis", len(monthly_avg))
bars = sns.barplot(x=monthly_avg.index, y=monthly_avg.values, palette=palette, edgecolor='black')
plt.plot(monthly_avg.index, monthly_avg.values, color='darkblue', marker='o', linewidth=2, label='Trend')
for bar, value in zip(bars.patches, monthly_avg.values):
    height = bar.get_height()
    plt.text(
        bar.get_x() + bar.get_width() / 2,
        height + 0.8,
        f'{value:.2f}',
        ha='center',
        va='bottom',
        fontsize=10,
        fontweight='bold',
        color='black',
        path_effects=[patheffects.withStroke(linewidth=1.2, foreground='white')])

plt.title('Average Brent Crude Oil Price by Month (2014 - June 2025)', fontsize=18, fontweight='bold')
plt.ylabel('Average Price (USD)', fontsize=14)
plt.xlabel('Month', fontsize=14)
plt.ylim(60, 75)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.figtext(0.5, -0.08, 'Note: Data for July - December 2025 is excluded due to incompleteness.', 
            ha='center', fontsize=11, style='italic')
plt.legend()
plt.tight_layout()
plt.show()

### Plot Deret Waktu-Harian

In [None]:
df_filtered = df[~((df.index.year == 2025) & (df.index.month >= 7))]
df_filtered['weekday'] = df_filtered.index.weekday
df_filtered = df_filtered[df_filtered['weekday'] <= 4]
weekday_avg = df_filtered.groupby('weekday')['Price'].mean().reset_index()

weekday_mapping = {0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 4: 'Fri'}
weekday_avg['weekday'] = weekday_avg['weekday'].map(weekday_mapping)
weekday_avg['weekday'] = pd.Categorical(
    weekday_avg['weekday'],
    categories=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'],
    ordered=True
)
weekday_avg = weekday_avg.sort_values('weekday')

plt.figure(figsize=(10, 6))
palette = sns.color_palette("cubehelix", len(weekday_avg))
bars = sns.barplot(x='weekday', y='Price', data=weekday_avg, palette=palette, 
                  edgecolor='black', order=['Mon', 'Tue', 'Wed', 'Thu', 'Fri'])
line = plt.plot(weekday_avg['weekday'], weekday_avg['Price'], 
               color='darkred', marker='o', linewidth=2, label='Trend')
for i, (day, price) in enumerate(zip(weekday_avg['weekday'], weekday_avg['Price'])):
    plt.text(
        i,  
        price + 0.1,
        f'{price:.2f}', 
        ha='center',
        va='bottom',
        fontsize=10,
        fontweight='bold',
        color='black',
        path_effects=[patheffects.withStroke(linewidth=2, foreground='white')])

plt.title('Average Brent Crude Oil Price by Weekday (2014 – June 2025)', 
          fontsize=14, fontweight='bold', pad=20)
plt.ylabel('Average Price (USD)', fontsize=12)
plt.xlabel('Weekday', fontsize=12)
min_price = weekday_avg['Price'].min()
max_price = weekday_avg['Price'].max()
plt.ylim(min_price - 0.5, max_price + 0.5)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.figtext(0.5, 0.01, 
           'Note: Weekday data for July–December 2025 is excluded due to incompleteness.', 
           ha='center', fontsize=10, style='italic')
plt.tight_layout()
plt.show()

### Plot Deret Waktu-Tanggal

In [None]:
df_filtered = df[~((df.index.year == 2025) & (df.index.month >= 7))]
df_filtered['day'] = df_filtered.index.day
daily_avg = df_filtered.groupby('day')['Price'].mean().reset_index()

plt.figure(figsize=(16, 6))
palette = sns.color_palette("coolwarm", len(daily_avg))
bars = sns.barplot(x='day', y='Price', data=daily_avg, palette=palette, 
                  edgecolor='black', order=range(1, 32))
line = plt.plot(np.arange(len(daily_avg)), daily_avg['Price'], 
               color='darkblue', marker='o', linewidth=2, 
               markersize=6, label='Trend')
for i, (day, price) in enumerate(zip(daily_avg['day'], daily_avg['Price'])):
    plt.text(
        i, 
        price + 0.5, 
        f'{price:.2f}',
        ha='center',
        va='bottom',
        fontsize=9,
        fontweight='bold',
        color='black',
        path_effects=[patheffects.withStroke(linewidth=2, foreground='white')])

plt.title('Average Brent Crude Oil Price by Calendar Day (1–31)\n2014 – June 2025',fontsize=16, fontweight='bold', pad=20)
plt.ylabel('Average Price (USD)', fontsize=12)
plt.xlabel('Day of Month', fontsize=12)
plt.xticks(ticks=np.arange(len(daily_avg)), labels=daily_avg['day'])
data_min = daily_avg['Price'].min()
data_max = daily_avg['Price'].max()
plt.ylim(max(60, data_min - 1), min(75, data_max + 1))
plt.grid(axis='y', linestyle=':', alpha=0.5)
plt.legend(framealpha=0.9)
plt.figtext(0.5, 0.01, 
           'Note: Daily averages computed across all available months and years', 
           ha='center', fontsize=10, style='italic')
plt.tight_layout()
plt.show()

## Uji T Kejadian

### Model Dasar Simulasi

In [None]:
def analisis_jendela_event_weekdays_only(df, dummy_col, window_days=18, price_col='Price', title_event='Event'):
    event_start = df[df[dummy_col] == 1].index.min()
    if pd.isna(event_start):
        print(f"Tidak ditemukan event pada kolom '{dummy_col}'.")
        return
    start_window = event_start - pd.Timedelta(days=window_days*2) 
    end_window = event_start + pd.Timedelta(days=window_days*2)
    temp_df = df.loc[start_window:end_window, price_col].reset_index()
    temp_df = temp_df[temp_df['date'].dt.weekday < 5].copy()
    event_pos = temp_df.index[temp_df['date'] == event_start]
    if len(event_pos) == 0:
        print("Tanggal event tidak ditemukan dalam data setelah filter hari kerja.")
        return
    event_pos = event_pos[0]
    start_pos = max(0, event_pos - window_days)
    end_pos = min(len(temp_df) - 1, event_pos + window_days)
    event_window_df = temp_df.loc[start_pos:end_pos].copy()
    event_window_df['relative_day'] = np.arange(- (event_pos - start_pos), (end_pos - event_pos) + 1)
    plt.figure(figsize=(12,7))
    plt.plot(event_window_df['relative_day'], event_window_df[price_col], marker='o', linestyle='-', color='darkred')
    plt.axvline(0, color='blue', linestyle='--')
    plt.title(f'Pengaruh {title_event} pada Harga Brent Crude Oil (±{window_days} hari kerja)', fontsize=16, fontweight='bold')
    plt.grid(True, linestyle='--', alpha=0.6)
    step = max(1, window_days // 3)
    for _, row in event_window_df.iterrows():
        if (row['relative_day'] % step == 0) or (row['relative_day'] == 0) or (row['relative_day'] == event_window_df['relative_day'].max()):
            if not pd.isna(row[price_col]):
                plt.text(row['relative_day'], row[price_col] + 0.5, f"{row[price_col]:.2f}", ha='center', fontsize=9,
                         path_effects=[patheffects.withStroke(linewidth=1, foreground="white")])

    plt.tight_layout()
    plt.show()

    before_prices = event_window_df[event_window_df['relative_day'] < 0][price_col]
    after_prices = event_window_df[event_window_df['relative_day'] >= 0][price_col]
    mean_before = before_prices.mean()
    mean_after = after_prices.mean()
    min_len = min(len(before_prices), len(after_prices))
    before_trim = before_prices.iloc[-min_len:]
    after_trim = after_prices.iloc[:min_len]
    
    print("\nHipotesis uji t berpasangan:")
    print("H0: Rata-rata harga sebelum event = rata-rata harga setelah event")
    print("H1: Rata-rata harga sebelum event ≠ rata-rata harga setelah event (uji dua arah)")
    t_stat, p_val = ttest_rel(before_trim, after_trim)

    print("\nHasil uji statistik:")
    print(f"Rata-rata harga sebelum event: {mean_before:.2f} USD")
    print(f"Rata-rata harga setelah event: {mean_after:.2f} USD")
    print(f"Statistik t: {t_stat:.3f}")
    print(f"P-value: {p_val:.3f}")

    alpha = 0.05
    if p_val < alpha:
        print(f"\nKesimpulan: Tolak H0 pada α = {alpha}. Terdapat perbedaan signifikan rata-rata harga sebelum dan setelah event.")
    else:
        print(f"\nKesimpulan: Gagal tolak H0 pada α = {alpha}. Tidak ditemukan perbedaan signifikan rata-rata harga sebelum dan setelah event.")

analisis_jendela_event_weekdays_only(df, 'dummy_iran_war', window_days=18, title_event='Pengaruh Konflik Iran-Israel (2025) pada Harga Brent Crude Oil')

### Simulasi Penuh Kejadian

In [None]:
def analyze_event_impact(
    df: pd.DataFrame,
    dummy_col: str,
    price_col: str = 'Price',
    event_name: Optional[str] = None,
    window_days: int = 24,
    pre_event_days: Optional[int] = None,
    post_event_days: Optional[int] = None,
    weekday_only: bool = False,
    show_plot: bool = True,
    show_stats: bool = True,
    value_markers: bool = True,
    grid_style: str = '--',
    trend_line: bool = True,
    ci_bands: bool = False
) -> dict:
    # Validasi
    if dummy_col not in df.columns:
        raise ValueError(f"Kolom '{dummy_col}' tidak ditemukan.")
    if price_col not in df.columns:
        raise ValueError(f"Kolom '{price_col}' tidak ditemukan.")
    if df[dummy_col].sum() == 0:
        raise ValueError(f"Tidak ditemukan event pada kolom '{dummy_col}'.")

    df = df.copy()
    df[price_col] = pd.to_numeric(df[price_col], errors='coerce')
    df = df.sort_index()
    if df[price_col].isna().all():
        raise ValueError(f"Tidak ada data numerik valid pada kolom '{price_col}'.")
    if event_name is None:
        event_name = dummy_col.replace('dummy_', '').replace('_', ' ').title()
    
    event_dates = df.index[df[dummy_col] == 1]
    event_start = event_dates.min()
    pre_days = pre_event_days if pre_event_days is not None else window_days
    post_days = post_event_days if post_event_days is not None else window_days
   
    if weekday_only:
        valid_dates = df.index[df.index.weekday < 5]
    else:
        valid_dates = df.index
    if event_start not in valid_dates:
        print("Tanggal awal event tidak ada pada rentang tanggal yang dipilih (weekday/hari penuh).")
        return {}
    event_idx = np.where(valid_dates == event_start)[0][0]

    pre_dates = valid_dates[max(0, event_idx - pre_days):event_idx]
    post_dates = valid_dates[event_idx:event_idx + post_days]

    min_len = min(len(pre_dates), len(post_dates))
    pre_dates = pre_dates[-min_len:]
    post_dates = post_dates[:min_len]

    pre_prices = df.loc[pre_dates, price_col]
    post_prices = df.loc[post_dates, price_col]

    mean_before = pre_prices.mean()
    mean_after = post_prices.mean()

    if len(pre_prices.dropna()) < 2 or len(post_prices.dropna()) < 2:
        print("Peringatan: Data tidak cukup untuk melakukan uji t berpasangan.")
        t_stat, p_val = np.nan, np.nan
    else:
        t_stat, p_val = ttest_rel(pre_prices.values, post_prices.values)

    all_dates = pre_dates.append(post_dates)
    rel_days = np.arange(-min_len, min_len)
    timeline_df = pd.DataFrame({
        'price': df.loc[all_dates, price_col].values,
        'rel_day': rel_days
    }, index=all_dates)

    if show_plot:
        plt.figure(figsize=(14, 7))
        plt.plot(timeline_df['rel_day'], timeline_df['price'],
                 color='darkred', marker='o' if trend_line else None,
                 label='Harga', linewidth=1.5)

        plt.axvline(0, color='blue', linestyle='--', linewidth=1.5, label='Hari Event')
        plt.axvspan(0, min_len - 1, color='skyblue', alpha=0.3, label='Periode Pasca Event')

        if ci_bands and len(timeline_df) >= 7:
            roll = timeline_df['price'].rolling(7, center=True, min_periods=1)
            lower = roll.mean() - 1.96 * roll.std()
            upper = roll.mean() + 1.96 * roll.std()
            plt.fill_between(timeline_df['rel_day'], lower, upper,
                             color='gray', alpha=0.2, label='Interval Kepercayaan 95%')

        if value_markers:
            step = max(1, min_len // 4)
            for _, row in timeline_df.iterrows():
                if (row['rel_day'] % step == 0 or row['rel_day'] in [0, min_len - 1]) and not np.isnan(row['price']):
                    plt.text(row['rel_day'], row['price'] + 0.5, f"{row['price']:.2f}",
                             ha='center', fontsize=9,
                             path_effects=[patheffects.withStroke(linewidth=1.5, foreground='white')])

        plt.title(f"Dampak Event: {event_name} ({'Hanya Hari Kerja' if weekday_only else 'Semua Hari'})", fontsize=15)
        plt.xlabel("Hari Relatif terhadap Hari Event")
        plt.ylabel(f"Harga {price_col} (USD)")
        plt.grid(True, linestyle=grid_style, alpha=0.6)
        plt.legend()
        plt.tight_layout()
        plt.show()

    if show_stats:
        print(f"\nHipotesis uji t berpasangan:")
        print("H0: Rata-rata harga sebelum event sama dengan rata-rata harga setelah event.")
        print("H1: Rata-rata harga sebelum event berbeda dengan rata-rata harga setelah event.\n")

        print(f"Rata-rata harga sebelum event: {mean_before:.2f} USD")
        print(f"Rata-rata harga setelah event: {mean_after:.2f} USD")
        if not np.isnan(t_stat):
            print(f"Hasil uji t berpasangan: t = {t_stat:.3f}, p = {p_val:.3f}")
            alpha = 0.05
            if p_val < alpha:
                print(f"Kesimpulan: Tolak H0 pada taraf nyata α = {alpha}. Terdapat perbedaan signifikan rata-rata harga sebelum dan setelah event.")
            else:
                print(f"Kesimpulan: Gagal tolak H0 pada taraf nyata α = {alpha}. Tidak terdapat perbedaan signifikan rata-rata harga sebelum dan setelah event.")
        else:
            print("Uji t berpasangan tidak dapat dilakukan karena data tidak cukup.")

    return {
        'event_name': event_name,
        'event_start': event_start,
        'pre_event_mean': mean_before,
        'post_event_mean': mean_after,
        't_stat': t_stat,
        'p_value': p_val,
        'paired_days': min_len,
        'pre_window': min_len,
        'post_window': min_len}

In [None]:
# Simulasi Kode
analyze_event_impact(
    df,
    dummy_col='dummy_iran_war',
    price_col='Price',
    event_name='Iran-Israel Conflict',
    window_days=17,
    weekday_only=True)

In [None]:
# Simulasi Ukraina-Rusia
analyze_event_impact(
    df,
    dummy_col='dummy_ukraine',
    price_col='Price',
    event_name='Oil War Conflict',
    window_days=120,
    weekday_only=True)

In [None]:
# Simulasi Covid-Lockdown
analyze_event_impact(
    df,
    dummy_col='dummy_covid_lockdown',
    price_col='Price',
    event_name='Covid Lockdown',
    window_days=90,
    weekday_only=True)

## Interaksi Bulan Hari

In [None]:
df['weekday'] = df.index.day_name()
df['month'] = df.index.month_name()
pivot_table = df.pivot_table(
    values='Price',
    index='weekday',
    columns='month',
    aggfunc='mean')

ordered_weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
ordered_months = ['January', 'February', 'March', 'April', 'May', 'June',
                  'July', 'August', 'September', 'October', 'November', 'December']

pivot_table = pivot_table.reindex(index=ordered_weekdays, columns=ordered_months)
pivot_table = pivot_table.round(2)
pivot_table

In [None]:
plt.rcParams['font.family'] = 'DejaVu Sans'
fig, ax = plt.subplots(figsize=(14, 7))
sns.heatmap(
    pivot_table,
    cmap='crest',        
    annot=True,
    fmt='.2f',
    linewidths=0.7,
    linecolor='white',
    cbar_kws={'label': 'Harga Rata-Rata (USD)'},
    ax=ax)

ax.set_title('Heatmap Harga Rata-Rata berdasarkan Hari dan Bulan', fontsize=18, fontweight='bold', pad=20)
ax.set_xlabel('Bulan', fontsize=14, labelpad=10)
ax.set_ylabel('Hari dalam Minggu', fontsize=14, labelpad=10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right', fontsize=11)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0, fontsize=11)
plt.tight_layout()
plt.show()

## Jendela Bergerak Rataan-Stdev

In [None]:
df = df.sort_index()
if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index)
df['rolling_mean_22'] = df['Price'].rolling(window=22, min_periods=1).mean()
df['rolling_std_22'] = df['Price'].rolling(window=22, min_periods=1).std()
df['upper_band'] = df['rolling_mean_22'] + df['rolling_std_22']
df['lower_band'] = df['rolling_mean_22'] - df['rolling_std_22']

In [None]:
plt.figure(figsize=(16, 7))
plt.plot(df.index, df['Price'], label='Harga Aktual', color='black', linewidth=1)
plt.plot(df.index, df['rolling_mean_22'], label='Rolling Mean (22 Hari)', color='orange', linewidth=2)
plt.fill_between(df.index, df['lower_band'], df['upper_band'], color='orange', alpha=0.2, label='±1 Std Dev')

plt.title('Rolling 22 Hari: Tren & Volatilitas Harga', fontsize=18, fontweight='bold')
plt.xlabel('Tanggal', fontsize=13)
plt.ylabel('Harga (USD)', fontsize=13)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
year_ranges = [('2014', '2016'), ('2017', '2019'), ('2020', '2022'), ('2023', '2025')]
fig, axs = plt.subplots(2, 2, figsize=(18, 10), sharey=True)
axs = axs.flatten()

for i, (start, end) in enumerate(year_ranges):
    df_sub = df.loc[start:end]
    axs[i].plot(df_sub.index, df_sub['Price'], label='Harga Aktual', color='black', linewidth=1)
    axs[i].plot(df_sub.index, df_sub['rolling_mean_22'], label='Rolling Mean (22 Hari)', color='orange', linewidth=2)
    axs[i].fill_between(df_sub.index, df_sub['lower_band'], df_sub['upper_band'],
                        color='orange', alpha=0.2, label='±1 Std Dev')

    axs[i].set_title(f'Tren Harga: {start}–{end}', fontsize=13, fontweight='bold')
    axs[i].set_xlabel('Tanggal')
    axs[i].set_ylabel('Harga (USD)')
    axs[i].grid(True, linestyle='--', alpha=0.5)
    axs[i].tick_params(axis='x', rotation=30)
handles, labels = axs[0].get_legend_handles_labels()

fig.legend(
    handles, labels,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.03),
    ncol=len(labels),
    frameon=True,
    fontsize=12)

fig.suptitle('Rolling 22 Hari: Tren & Volatilitas Harga per Periode', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout(rect=[0, 0.04, 1, 0.97])
plt.show()

### Analisis Gap

In [None]:
df = df.sort_index()
df['pct_change'] = df['Price'].pct_change() * 100
threshold = 5
df['extreme_up'] = df['pct_change'] > threshold
df['extreme_down'] = df['pct_change'] < -threshold
df['event_active'] = df['dummy_iran_war'] == 1
extreme_days = df[(df['extreme_up']) | (df['extreme_down'])][
    ['Price', 'pct_change', 'extreme_up', 'extreme_down', 'event_active']]
extreme_days_sorted = extreme_days.sort_values(by='pct_change', ascending=False)
extreme_days_sorted = extreme_days.sort_index()
extreme_days_sorted['Price'] = extreme_days_sorted['Price'].round(2)
extreme_days_sorted['pct_change'] = extreme_days_sorted['pct_change'].round(2)
extreme_days_sorted

In [None]:
plt.figure(figsize=(15, 6))
sns.lineplot(data=df, x=df.index, y='Price', color='gray', label='Harga')
sns.scatterplot(data=df[df['extreme_up']], x=df[df['extreme_up']].index, y='Price',
                color='green', s=100, label='Lonjakan >5%')
sns.scatterplot(data=df[df['extreme_down']], x=df[df['extreme_down']].index, y='Price',
                color='red', s=100, label='Penurunan >5%')
sns.scatterplot(data=df[df['event_active']], x=df[df['event_active']].index, y='Price',
                color='blue', marker='X', s=100, label='Event Aktif')
plt.title('Gap Analysis: Lonjakan / Penurunan Tajam (>5%)', fontsize=18, fontweight='bold', pad=20)
plt.xlabel('Tanggal', fontsize=13)
plt.ylabel('Harga (USD)', fontsize=13)
plt.xticks(rotation=45)
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df = df.sort_index()
if not isinstance(df.index, pd.DatetimeIndex):
    df.index = pd.to_datetime(df.index)
year_ranges = [(2014, 2016), (2017, 2019), (2020, 2022), (2023, 2025)]
fig, axes = plt.subplots(2, 2, figsize=(20, 12), sharex=False, sharey=True)
axes = axes.flatten()

for i, (start_year, end_year) in enumerate(year_ranges):
    ax = axes[i]
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]
    if df_sub.empty:
        ax.set_visible(False)
        continue
    sns.lineplot(data=df_sub, x=df_sub.index, y='Price', color='gray', label='Harga', ax=ax)
    sns.scatterplot(data=df_sub[df_sub['extreme_up']], x=df_sub[df_sub['extreme_up']].index,
                    y='Price', color='green', s=80, label='Lonjakan >5%', ax=ax)
    sns.scatterplot(data=df_sub[df_sub['extreme_down']], x=df_sub[df_sub['extreme_down']].index,
                    y='Price', color='red', s=80, label='Penurunan >5%', ax=ax)
    sns.scatterplot(data=df_sub[df_sub['event_active']], x=df_sub[df_sub['event_active']].index,
                    y='Price', color='blue', marker='X', s=100, label='Event Aktif', ax=ax)
    ax.set_title(f'{start_year} – {end_year}', fontsize=14, fontweight='bold')
    ax.set_xlabel('Tanggal')
    ax.set_ylabel('Harga (USD)')
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True, linestyle='--', alpha=0.4)
    ax.legend(fontsize=9)
plt.suptitle('Gap Analysis (Lonjakan / Penurunan > 5%) per 3 Tahun', fontsize=18, fontweight='bold')
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()

### Cek Anomali

In [None]:
df['z_score'] = (df['Price'] - df['Price'].mean()) / df['Price'].std()
threshold = 2.2
df['anomaly_zscore'] = df['z_score'].abs() > threshold
anomalies_zscore = df[df['anomaly_zscore']][['Price', 'z_score']]
anomalies_zscore

In [None]:
thresholds = [2.0, 2.2, 2.5, 3.0]
fig, axes = plt.subplots(2, 2, figsize=(18, 12), sharex=True, sharey=True)
axes = axes.flatten()

for i, thresh in enumerate(thresholds):
    col_name = f'anomaly_zscore_{thresh}'
    df[col_name] = df['z_score'].abs() > thresh
    
    ax = axes[i]
    sns.lineplot(data=df, x=df.index, y='Price', ax=ax, color='black', label='Harga', linewidth=1.2)
    sns.scatterplot(
        data=df[df[col_name]], 
        x=df[df[col_name]].index, y='Price',
        ax=ax, color='red', s=80, label='Anomali', zorder=5)
    
    ax.set_title(f'Z-score Anomaly Detection (Threshold = {thresh})', fontsize=14, fontweight='bold', pad=14)
    ax.set_xlabel('Tanggal', fontsize=12, labelpad=8)
    ax.set_ylabel('Harga (USD)', fontsize=12, labelpad=8)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.tick_params(axis='x', rotation=45)
    ax.xaxis.set_major_locator(plt.MaxNLocator(6))
    for label in ax.get_xticklabels():
        label.set_horizontalalignment('right')

handles, labels = axes[0].get_legend_handles_labels()
fig.legend(
    handles, labels,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.05),
    ncol=len(labels),
    frameon=True,
    fontsize=13,
    framealpha=0.9)

plt.subplots_adjust(hspace=0.3, wspace=0.15, bottom=0.18)
plt.suptitle('Deteksi Anomali dengan Z-score untuk Berbagai Threshold', fontsize=18, fontweight='bold', y=0.95)
plt.tight_layout(rect=[0, 0.05, 1, 0.95])
plt.show()

In [None]:
threshold = 2.2
df['anomaly_zscore_2.2'] = df['z_score'].abs() > threshold
years = sorted(df.index.year.unique())
year_ranges = [(y, y + 2) for y in range(years[0], years[-1] + 1, 3)]  # rentang 3 tahun

plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['axes.titlesize'] = 15
plt.rcParams['axes.titleweight'] = 'bold'
plt.rcParams['axes.labelsize'] = 13
plt.rcParams['xtick.labelsize'] = 11
plt.rcParams['ytick.labelsize'] = 11
plt.rcParams['legend.fontsize'] = 12

fig, axes = plt.subplots(2, 2, figsize=(18, 10), sharey=True)
axes = axes.flatten()

for i, (start_year, end_year) in enumerate(year_ranges[:4]):  # maksimal 4 rentang
    ax = axes[i]
    mask = (df.index.year >= start_year) & (df.index.year <= end_year)
    df_sub = df.loc[mask]

    sns.lineplot(data=df_sub, x=df_sub.index, y='Price', ax=ax, color='black', label='Harga', linewidth=1.5)
    sns.scatterplot(
        data=df_sub[df_sub['anomaly_zscore_2.2']],
        x=df_sub[df_sub['anomaly_zscore_2.2']].index,
        y='Price', ax=ax, color='red', s=90, label='Anomali', zorder=5)

    ax.set_title(f'Anomali Z-score Threshold 2.2: {start_year}-{end_year}', pad=14)
    ax.set_xlabel('Tanggal', labelpad=8)
    ax.set_ylabel('Harga (USD)', labelpad=8)
    ax.grid(True, linestyle='--', alpha=0.5)
    ax.tick_params(axis='x', rotation=45)
    ax.xaxis.set_major_locator(MaxNLocator(nbins=6))  # maksimal 6 ticks di x-axis

    for label in ax.get_xticklabels():
        label.set_horizontalalignment('right')

for j in range(len(year_ranges[:4]), len(axes)):
    fig.delaxes(axes[j])

handles, labels = axes[0].get_legend_handles_labels()

fig.legend(
    handles, labels,
    loc='lower center',
    bbox_to_anchor=(0.5, -0.08),
    ncol=len(labels),
    frameon=True,
    fontsize=13)

plt.subplots_adjust(hspace=0.35, wspace=0.18, bottom=0.22, top=0.90)
plt.suptitle('Deteksi Anomali Z-score Threshold 2.2 (Per 3 Tahun)', fontsize=19, fontweight='bold', y=0.96)
plt.tight_layout(rect=[0, 0.05, 1, 0.95])  # biar layout tidak kepotong
plt.show()

# Finalisasi Data

In [None]:
df = df.sort_index()

df['pct_change'] = df['Price'].pct_change() * 100
threshold_gap = 5
df['extreme_up'] = df['pct_change'] > threshold_gap
df['extreme_down'] = df['pct_change'] < -threshold_gap
df['dummy_gap'] = ((df['extreme_up']) | (df['extreme_down'])).astype(int)
threshold_anom = 2.2
df['dummy_anomaly'] = (df['z_score'].abs() > threshold_anom).astype(int)
cols_to_keep = ['Price'] + [col for col in df.columns if col.startswith('dummy_')]
df_final = df[cols_to_keep].copy()
df_final

In [None]:
#df_final.to_csv('final_data_with_dummy_anomaly.csv', index=True)