merge all dataset into one data

In [14]:
# If you want to merge specific files by 

import pandas as pd;
import os as os;
from functools import reduce  # Add this import
from datetime import datetime



specific_files = ['crude_oil_wti_cleaned_20250702_114456.csv', 'data_inflasi_cleaned_20250702_102841.csv', 'federal_funds_rate_cleaned_20250630_195749.csv','harga_emas_cleaned.csv','jakarta_stock_exchange_cleaned_20250702_095758.csv']  # Replace with your actual file names
data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"

dfs = []
for file in specific_files:
    file_path = os.path.join(data_path, file)
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        if 'tanggal' in df.columns:
            df['tanggal'] = pd.to_datetime(df['tanggal'])
        dfs.append(df)
        print(f"Loaded {file}")
    else:
        print(f"File not found: {file}")

# Merge using the reduce method
if len(dfs) > 1:
    final_merged = reduce(lambda left, right: pd.merge(left, right, on='tanggal', how='inner'), dfs)
    print(f"Final merged dataset shape: {final_merged.shape}")
    print(f"Columns: {list(final_merged.columns)}")
    print(f"Date range: {final_merged['tanggal'].min()} to {final_merged['tanggal'].max()}")
    
    # Generate filename with timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"final_merged_dataset_{timestamp}.csv"
    output_path = os.path.join(data_path, output_filename)
    
    # Save to new file
    final_merged.to_csv(output_path, index=False)
    print(f"\n✅ Final merged dataset saved to: {output_filename}")
    print(f"Full path: {output_path}")
    
    # Display sample of merged data
    print("\nFirst 5 rows of merged dataset:")
    print(final_merged.head())
    
    # Display basic statistics
    print(f"\nDataset Info:")
    print(f"- Total rows: {len(final_merged)}")
    print(f"- Total columns: {len(final_merged.columns)}")
    print(f"- Memory usage: {final_merged.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    
else:
    print("❌ Need at least 2 dataframes to merge")

Loaded crude_oil_wti_cleaned_20250702_114456.csv
Loaded data_inflasi_cleaned_20250702_102841.csv
Loaded federal_funds_rate_cleaned_20250630_195749.csv
Loaded harga_emas_cleaned.csv
Loaded jakarta_stock_exchange_cleaned_20250702_095758.csv
Final merged dataset shape: (34, 21)
Columns: ['tanggal', 'WTI_Price_USD', 'inflation_rate', 'close_rate', 'open_rate', 'high_rate', 'low_rate', 'volume_billion_x', 'change_percent_x', 'usd_per_ounce', 'kurs_dollar_per_ounce', 'idr_per_ounce', 'usd_per_gram', 'kurs_dollar_per_gram', 'idr_per_gram', 'close_index', 'open_index', 'high_index', 'low_index', 'volume_billion_y', 'change_percent_y']
Date range: 2020-04-01 00:00:00 to 2024-11-01 00:00:00

✅ Final merged dataset saved to: final_merged_dataset_20250703_110915.csv
Full path: D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing\final_merged_dataset_20250703_110915.csv

First 5 rows of merged dataset:
     tanggal  WTI_Price_USD  inflation_rate  close_rate  open_rate  hi

# Merge Dataset dengan Data Emas sebagai Struktur Utama
Pendekatan ini menggunakan data emas sebagai base dataset dan melakukan left join dengan dataset lainnya.

In [17]:
# Merge dengan Data Emas sebagai Struktur Utama
import pandas as pd
import os
from datetime import datetime

# Path ke folder data
data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"

# Load data emas sebagai base dataset
gold_file = 'harga_emas_cleaned.csv'
gold_path = os.path.join(data_path, gold_file)

if os.path.exists(gold_path):
    # Load data emas sebagai base
    gold_df = pd.read_csv(gold_path)
    if 'tanggal' in gold_df.columns:
        gold_df['tanggal'] = pd.to_datetime(gold_df['tanggal'])
    
    print(f"✅ Loaded gold data: {gold_df.shape}")
    print(f"Gold data columns: {list(gold_df.columns)}")
    print(f"Gold date range: {gold_df['tanggal'].min()} to {gold_df['tanggal'].max()}")
    
    # Load dataset lainnya
    other_files = [
        'crude_oil_wti_cleaned_20250702_114456.csv',
        'data_inflasi_cleaned_20250702_102841.csv', 
        'federal_funds_rate_cleaned_20250630_195749.csv',
        'jakarta_stock_exchange_cleaned_20250702_095758.csv'
    ]
    
    # Mulai dengan data emas sebagai base
    merged_gold_base = gold_df.copy()
    
    # Merge dataset lainnya menggunakan left join
    for file in other_files:
        file_path = os.path.join(data_path, file)
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            if 'tanggal' in df.columns:
                df['tanggal'] = pd.to_datetime(df['tanggal'])
            
            # Left join untuk mempertahankan semua data emas
            merged_gold_base = pd.merge(merged_gold_base, df, on='tanggal', how='left')
            print(f"✅ Merged with {file}: Shape {merged_gold_base.shape}")
        else:
            print(f"❌ File not found: {file}")
    
    # Informasi dataset hasil merge
    print(f"\n📊 Final merged dataset (Gold-based):")
    print(f"- Shape: {merged_gold_base.shape}")
    print(f"- Columns: {list(merged_gold_base.columns)}")
    print(f"- Date range: {merged_gold_base['tanggal'].min()} to {merged_gold_base['tanggal'].max()}")
    print(f"- Missing values per column:")
    print(merged_gold_base.isnull().sum())
    
    # Generate filename dengan timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"gold_based_merged_dataset_{timestamp}.csv"
    output_path = os.path.join(data_path, output_filename)
    
    # Save dataset
    merged_gold_base.to_csv(output_path, index=False)
    print(f"\n✅ Gold-based merged dataset saved to: {output_filename}")
    print(f"Full path: {output_path}")
    
    # Display sample data
    print("\nFirst 10 rows of gold-based merged dataset:")
    print(merged_gold_base.head(10))
    
    # Statistik dasar
    print(f"\nDataset Statistics:")
    print(f"- Total rows: {len(merged_gold_base)}")
    print(f"- Total columns: {len(merged_gold_base.columns)}")
    print(f"- Data completeness: {(1 - merged_gold_base.isnull().sum().sum() / (merged_gold_base.shape[0] * merged_gold_base.shape[1])) * 100:.2f}%")
    
else:
    print(f"❌ Gold file not found: {gold_file}")

✅ Loaded gold data: (1899, 7)
Gold data columns: ['tanggal', 'usd_per_ounce', 'kurs_dollar_per_ounce', 'idr_per_ounce', 'usd_per_gram', 'kurs_dollar_per_gram', 'idr_per_gram']
Gold date range: 2020-01-01 00:00:00 to 2025-03-13 00:00:00
✅ Merged with crude_oil_wti_cleaned_20250702_114456.csv: Shape (1899, 8)
✅ Merged with data_inflasi_cleaned_20250702_102841.csv: Shape (1899, 9)
✅ Merged with federal_funds_rate_cleaned_20250630_195749.csv: Shape (1899, 15)
✅ Merged with jakarta_stock_exchange_cleaned_20250702_095758.csv: Shape (1899, 21)

📊 Final merged dataset (Gold-based):
- Shape: (1899, 21)
- Columns: ['tanggal', 'usd_per_ounce', 'kurs_dollar_per_ounce', 'idr_per_ounce', 'usd_per_gram', 'kurs_dollar_per_gram', 'idr_per_gram', 'WTI_Price_USD', 'inflation_rate', 'close_rate', 'open_rate', 'high_rate', 'low_rate', 'volume_billion_x', 'change_percent_x', 'close_index', 'open_index', 'high_index', 'low_index', 'volume_billion_y', 'change_percent_y']
- Date range: 2020-01-01 00:00:00 to 2

In [18]:
# Verifikasi Jumlah Rows dan Struktur Data
import pandas as pd
import os

data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"

# Cek jumlah rows di setiap file
print("📊 Checking individual file row counts:")
files_to_check = [
    'harga_emas_cleaned.csv',
    'crude_oil_wti_cleaned_20250702_114456.csv',
    'data_inflasi_cleaned_20250702_102841.csv', 
    'federal_funds_rate_cleaned_20250630_195749.csv',
    'jakarta_stock_exchange_cleaned_20250702_095758.csv'
]

for file in files_to_check:
    file_path = os.path.join(data_path, file)
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        print(f"- {file}: {len(df)} rows")
        if 'tanggal' in df.columns:
            df['tanggal'] = pd.to_datetime(df['tanggal'])
            print(f"  Date range: {df['tanggal'].min()} to {df['tanggal'].max()}")
    else:
        print(f"- {file}: File not found")

print("\n" + "="*50)

# Cek hasil merge yang sudah ada
latest_files = [f for f in os.listdir(data_path) if f.startswith('gold_based_merged_dataset_')]
if latest_files:
    latest_file = sorted(latest_files)[-1]  # Get the latest file
    latest_path = os.path.join(data_path, latest_file)
    merged_df = pd.read_csv(latest_path)
    print(f"\n📋 Latest merged file: {latest_file}")
    print(f"- Total rows: {len(merged_df)}")
    print(f"- Total columns: {len(merged_df.columns)}")
    
    if 'tanggal' in merged_df.columns:
        merged_df['tanggal'] = pd.to_datetime(merged_df['tanggal'])
        print(f"- Date range: {merged_df['tanggal'].min()} to {merged_df['tanggal'].max()}")
    
    print(f"\n🔍 Missing values analysis:")
    missing_counts = merged_df.isnull().sum()
    for col, count in missing_counts.items():
        if count > 0:
            print(f"- {col}: {count} missing values ({count/len(merged_df)*100:.1f}%)")
else:
    print("\n❌ No merged files found")

📊 Checking individual file row counts:
- harga_emas_cleaned.csv: 1899 rows
  Date range: 2020-01-01 00:00:00 to 2025-03-13 00:00:00
- crude_oil_wti_cleaned_20250702_114456.csv: 1301 rows
  Date range: 2019-12-31 00:00:00 to 2025-03-13 00:00:00
- data_inflasi_cleaned_20250702_102841.csv: 63 rows
  Date range: 2020-01-01 00:00:00 to 2025-03-01 00:00:00
- federal_funds_rate_cleaned_20250630_195749.csv: 1302 rows
  Date range: 2019-12-31 00:00:00 to 2025-03-18 00:00:00
- jakarta_stock_exchange_cleaned_20250702_095758.csv: 1262 rows
  Date range: 2019-12-30 00:00:00 to 2025-03-13 00:00:00


📋 Latest merged file: gold_based_merged_dataset_20250703_111551.csv
- Total rows: 1899
- Total columns: 21
- Date range: 2020-01-01 00:00:00 to 2025-03-13 00:00:00

🔍 Missing values analysis:
- WTI_Price_USD: 599 missing values (31.5%)
- inflation_rate: 1836 missing values (96.7%)
- close_rate: 599 missing values (31.5%)
- open_rate: 599 missing values (31.5%)
- high_rate: 599 missing values (31.5%)
- lo

In [19]:
# Cek Data Emas (Base Dataset)
import pandas as pd
import os

data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"
gold_file = 'harga_emas_cleaned.csv'
gold_path = os.path.join(data_path, gold_file)

# Load dan cek data emas
gold_df = pd.read_csv(gold_path)
print(f"🥇 Data Emas (Base Dataset):")
print(f"Total rows: {len(gold_df)}")
print(f"Columns: {list(gold_df.columns)}")

if 'tanggal' in gold_df.columns:
    gold_df['tanggal'] = pd.to_datetime(gold_df['tanggal'])
    print(f"Date range: {gold_df['tanggal'].min()} to {gold_df['tanggal'].max()}")
    print(f"First 3 dates: {gold_df['tanggal'].head(3).tolist()}")
    print(f"Last 3 dates: {gold_df['tanggal'].tail(3).tolist()}")

print(f"\nExpected final rows after merge: {len(gold_df)} (using left join with gold as base)")

🥇 Data Emas (Base Dataset):
Total rows: 1899
Columns: ['tanggal', 'usd_per_ounce', 'kurs_dollar_per_ounce', 'idr_per_ounce', 'usd_per_gram', 'kurs_dollar_per_gram', 'idr_per_gram']
Date range: 2020-01-01 00:00:00 to 2025-03-13 00:00:00
First 3 dates: [Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00'), Timestamp('2020-01-03 00:00:00')]
Last 3 dates: [Timestamp('2025-03-11 00:00:00'), Timestamp('2025-03-12 00:00:00'), Timestamp('2025-03-13 00:00:00')]

Expected final rows after merge: 1899 (using left join with gold as base)


In [20]:
# Cek Hasil Merge Gold-Based
import os
import pandas as pd

# Cari file hasil merge terbaru
data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"
gold_based_files = [f for f in os.listdir(data_path) if f.startswith('gold_based_merged_dataset_')]

if gold_based_files:
    latest_file = sorted(gold_based_files)[-1]
    file_path = os.path.join(data_path, latest_file)
    
    merged_df = pd.read_csv(file_path)
    print(f"📊 Hasil Merge Gold-Based: {latest_file}")
    print(f"Total rows: {len(merged_df)}")
    print(f"Total columns: {len(merged_df.columns)}")
    
    if len(merged_df) == 1899:
        print("✅ BENAR! Jumlah rows sesuai dengan data emas (1899)")
    else:
        print(f"❌ SALAH! Seharusnya 1899 rows, tapi dapat {len(merged_df)} rows")
    
    # Cek missing values
    missing_summary = merged_df.isnull().sum()
    total_missing = missing_summary.sum()
    print(f"\nMissing values total: {total_missing}")
    
    # Tampilkan kolom dengan missing values
    if total_missing > 0:
        print("Kolom dengan missing values:")
        for col, count in missing_summary.items():
            if count > 0:
                print(f"- {col}: {count} missing ({count/len(merged_df)*100:.1f}%)")
else:
    print("❌ File gold-based merge tidak ditemukan")

📊 Hasil Merge Gold-Based: gold_based_merged_dataset_20250703_111551.csv
Total rows: 1899
Total columns: 21
✅ BENAR! Jumlah rows sesuai dengan data emas (1899)

Missing values total: 11160
Kolom dengan missing values:
- WTI_Price_USD: 599 missing (31.5%)
- inflation_rate: 1836 missing (96.7%)
- close_rate: 599 missing (31.5%)
- open_rate: 599 missing (31.5%)
- high_rate: 599 missing (31.5%)
- low_rate: 599 missing (31.5%)
- volume_billion_x: 1899 missing (100.0%)
- change_percent_x: 599 missing (31.5%)
- close_index: 638 missing (33.6%)
- open_index: 638 missing (33.6%)
- high_index: 638 missing (33.6%)
- low_index: 638 missing (33.6%)
- volume_billion_y: 641 missing (33.8%)
- change_percent_y: 638 missing (33.6%)


# Fill Missing Values
Mengisi missing values pada dataset final dengan berbagai metode:
- Forward Fill: mengisi dengan nilai sebelumnya
- Backward Fill: mengisi dengan nilai berikutnya  
- Interpolasi: mengisi dengan nilai rata-rata antara data sebelum dan sesudah

In [22]:
# Fill Missing Values pada Dataset Final
import pandas as pd
import os
from datetime import datetime

# Load dataset yang sudah di-merge
data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"
gold_based_files = [f for f in os.listdir(data_path) if f.startswith('gold_based_merged_dataset_')]
latest_file = sorted(gold_based_files)[-1]
file_path = os.path.join(data_path, latest_file)

# Load data
df = pd.read_csv(file_path)
df['tanggal'] = pd.to_datetime(df['tanggal'])

print(f"📊 Original dataset: {df.shape}")
print(f"Missing values before filling:")
missing_before = df.isnull().sum()
for col, count in missing_before.items():
    if count > 0:
        print(f"- {col}: {count} missing ({count/len(df)*100:.1f}%)")

# Sort by date untuk memastikan urutan yang benar
df = df.sort_values('tanggal').reset_index(drop=True)

# Buat copy untuk filling
df_filled = df.copy()

# 1. Forward Fill untuk data finansial (rates, prices, indices)
financial_columns = [
    'WTI_Price_USD', 'close_rate', 'open_rate', 'high_rate', 'low_rate',
    'close_index', 'open_index', 'high_index', 'low_index'
]

print(f"\n🔄 Filling financial data with forward fill...")
for col in financial_columns:
    if col in df_filled.columns:
        # Forward fill terlebih dahulu
        df_filled[col] = df_filled[col].fillna(method='ffill')
        # Jika masih ada missing di awal, gunakan backward fill
        df_filled[col] = df_filled[col].fillna(method='bfill')

# 2. Interpolasi untuk data inflasi (data bulanan)
if 'inflation_rate' in df_filled.columns:
    print("📈 Interpolating inflation rate...")
    df_filled['inflation_rate'] = df_filled['inflation_rate'].interpolate(method='linear')
    # Fill remaining NaN with forward/backward fill
    df_filled['inflation_rate'] = df_filled['inflation_rate'].fillna(method='ffill')
    df_filled['inflation_rate'] = df_filled['inflation_rate'].fillna(method='bfill')

# 3. Fill change_percent dengan interpolasi
change_columns = ['change_percent_x', 'change_percent_y']
for col in change_columns:
    if col in df_filled.columns:
        print(f"📊 Interpolating {col}...")
        df_filled[col] = df_filled[col].interpolate(method='linear')
        df_filled[col] = df_filled[col].fillna(method='ffill')
        df_filled[col] = df_filled[col].fillna(method='bfill')

# 4. Fill volume dengan 0 atau interpolasi
volume_columns = ['volume_billion_x', 'volume_billion_y']
for col in volume_columns:
    if col in df_filled.columns:
        if df_filled[col].isnull().all():  # Jika semua NaN
            print(f"🔢 Filling {col} with 0 (all missing)...")
            df_filled[col] = 0
        else:
            print(f"📊 Interpolating {col}...")
            df_filled[col] = df_filled[col].interpolate(method='linear')
            df_filled[col] = df_filled[col].fillna(0)  # Fill remaining with 0

print(f"\n✅ Missing values after filling:")
missing_after = df_filled.isnull().sum()
total_missing_after = missing_after.sum()

if total_missing_after == 0:
    print("🎉 All missing values filled successfully!")
else:
    print(f"Remaining missing values: {total_missing_after}")
    for col, count in missing_after.items():
        if count > 0:
            print(f"- {col}: {count} missing")

# Save filled dataset
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_filename = f"gold_based_merged_filled_{timestamp}.csv"
output_path = os.path.join(data_path, output_filename)

df_filled.to_csv(output_path, index=False)
print(f"\n💾 Filled dataset saved to: {output_filename}")
print(f"Full path: {output_path}")

# Display comparison
print(f"\n📊 Dataset Summary:")
print(f"- Original missing values: {missing_before.sum()}")
print(f"- After filling missing values: {missing_after.sum()}")
print(f"- Improvement: {missing_before.sum() - missing_after.sum()} values filled")
print(f"- Dataset shape: {df_filled.shape}")

# Show sample of filled data
print(f"\nSample data (first 10 rows):")
print(df_filled[['tanggal', 'WTI_Price_USD', 'inflation_rate', 'close_rate', 'close_index']].head(10))

📊 Original dataset: (1899, 21)
Missing values before filling:
- WTI_Price_USD: 599 missing (31.5%)
- inflation_rate: 1836 missing (96.7%)
- close_rate: 599 missing (31.5%)
- open_rate: 599 missing (31.5%)
- high_rate: 599 missing (31.5%)
- low_rate: 599 missing (31.5%)
- volume_billion_x: 1899 missing (100.0%)
- change_percent_x: 599 missing (31.5%)
- close_index: 638 missing (33.6%)
- open_index: 638 missing (33.6%)
- high_index: 638 missing (33.6%)
- low_index: 638 missing (33.6%)
- volume_billion_y: 641 missing (33.8%)
- change_percent_y: 638 missing (33.6%)

🔄 Filling financial data with forward fill...
📈 Interpolating inflation rate...
📊 Interpolating change_percent_x...
📊 Interpolating change_percent_y...
🔢 Filling volume_billion_x with 0 (all missing)...
📊 Interpolating volume_billion_y...

✅ Missing values after filling:
🎉 All missing values filled successfully!

💾 Filled dataset saved to: gold_based_merged_filled_20250703_113946.csv
Full path: D:\College Life\Semester 4\Big Da

  df_filled[col] = df_filled[col].fillna(method='ffill')
  df_filled[col] = df_filled[col].fillna(method='bfill')
  df_filled['inflation_rate'] = df_filled['inflation_rate'].fillna(method='ffill')
  df_filled['inflation_rate'] = df_filled['inflation_rate'].fillna(method='bfill')
  df_filled[col] = df_filled[col].fillna(method='ffill')
  df_filled[col] = df_filled[col].fillna(method='bfill')


In [25]:
# Contoh Spesifik: Fill Federal Funds Rate
# Menunjukkan bagaimana data federal rate diisi berdasarkan range tanggal

# Load dataset yang sudah diisi
data_path = r"D:\College Life\Semester 4\Big Data & Predictive Analysis\Final Project\Testing"
filled_files = [f for f in os.listdir(data_path) if f.startswith('gold_based_merged_filled_')]

if filled_files:
    latest_filled = sorted(filled_files)[-1]
    filled_path = os.path.join(data_path, latest_filled)
    df_filled = pd.read_csv(filled_path)
    df_filled['tanggal'] = pd.to_datetime(df_filled['tanggal'])
    
    # Contoh: Lihat data federal rate di sekitar tanggal 2025-03-10 sampai 2025-03-18
    print("🔍 Contoh Federal Funds Rate filling:")
    print("Sebelum dan sesudah filling missing values\n")
    
    # Filter data untuk contoh tanggal
    example_dates = df_filled[
        (df_filled['tanggal'] >= '2025-03-08') & 
        (df_filled['tanggal'] <= '2025-03-20')
    ].copy()
    
    if not example_dates.empty:
        print("Data Federal Funds Rate di sekitar 2025-03-10 sampai 2025-03-18:")
        print("=" * 70)
        
        federal_cols = ['tanggal', 'close_rate', 'open_rate', 'high_rate', 'low_rate']
        display_cols = [col for col in federal_cols if col in example_dates.columns]
        
        for _, row in example_dates[display_cols].iterrows():
            date_str = row['tanggal'].strftime('%Y-%m-%d')
            if 'close_rate' in row:
                print(f"{date_str}: close={row['close_rate']:.2f}, open={row['open_rate']:.2f}, high={row['high_rate']:.2f}, low={row['low_rate']:.2f}")
    
    print(f"\n📈 Metode yang digunakan:")
    print("1. Forward Fill: Nilai kosong diisi dengan nilai terakhir yang ada")
    print("2. Contoh: Jika tanggal 10 = 4.33 dan tanggal 12 = 4.33")
    print("   Maka tanggal 11 akan diisi = 4.33 (forward fill dari tanggal 10)")
    print("3. Jika tanggal 13-17 kosong, akan diisi dengan nilai tanggal 12")
    print("   sampai ada nilai baru di tanggal 18")
    
    # Show verification
    print(f"\n✅ Verification - Missing values dalam dataset:")
    missing_check = df_filled.isnull().sum()
    total_missing = missing_check.sum()
    
    if total_missing == 0:
        print("🎉 SEMUA missing values sudah terisi!")
    else:
        print(f"⚠️ Masih ada {total_missing} missing values:")
        for col, count in missing_check.items():
            if count > 0:
                print(f"- {col}: {count}")
    
    print(f"\nDataset shape: {df_filled.shape}")
    
else:
    print("❌ Filled dataset tidak ditemukan. Jalankan cell sebelumnya dulu.")

🔍 Contoh Federal Funds Rate filling:
Sebelum dan sesudah filling missing values

Data Federal Funds Rate di sekitar 2025-03-10 sampai 2025-03-18:
2025-03-08: close=4.33, open=4.33, high=4.33, low=4.33
2025-03-09: close=4.33, open=4.33, high=4.33, low=4.33
2025-03-10: close=4.33, open=4.33, high=4.33, low=4.33
2025-03-11: close=4.33, open=4.33, high=4.33, low=4.33
2025-03-12: close=4.33, open=4.33, high=4.33, low=4.33
2025-03-13: close=4.33, open=4.33, high=4.33, low=4.33

📈 Metode yang digunakan:
1. Forward Fill: Nilai kosong diisi dengan nilai terakhir yang ada
2. Contoh: Jika tanggal 10 = 4.33 dan tanggal 12 = 4.33
   Maka tanggal 11 akan diisi = 4.33 (forward fill dari tanggal 10)
3. Jika tanggal 13-17 kosong, akan diisi dengan nilai tanggal 12
   sampai ada nilai baru di tanggal 18

✅ Verification - Missing values dalam dataset:
🎉 SEMUA missing values sudah terisi!

Dataset shape: (1899, 21)


Memberikan missing values jadi terisi data