In [1]:
import pandas as pd
import numpy as np

def demonstrate_data_preparation():
    # 1. 創建範例數據：兩支股票的價格序列，有不同的交易時間點
    dates1 = pd.date_range('2024-01-01', '2024-01-05', freq='D')
    dates2 = pd.date_range('2024-01-02', '2024-01-06', freq='D')
    
    stock1 = pd.Series([100, 102, 101, 103, 102], index=dates1, name='Stock1')
    stock2 = pd.Series([50, 51, 52, 51, 50], index=dates2, name='Stock2')
    
    # 加入一些 NaN 和 Inf 值
    stock1.iloc[2] = np.nan
    stock2.iloc[1] = np.inf
    
    print("原始數據:")
    print("\nStock1:")
    print(stock1)
    print("\nStock2:")
    print(stock2)
    
    # 2. 展示索引對齊
    common_idx = stock1.index.intersection(stock2.index)
    s1_aligned = stock1[common_idx]
    s2_aligned = stock2[common_idx]
    
    print("\n\n索引對齊後的數據:")
    print("\nStock1 aligned:")
    print(s1_aligned)
    print("\nStock2 aligned:")
    print(s2_aligned)
    
    # 3. 展示 NaN 和 Inf 的移除
    mask = ~np.isnan(s1_aligned) & ~np.isnan(s2_aligned) & \
           ~np.isinf(s1_aligned) & ~np.isinf(s2_aligned)
    s1_clean = s1_aligned[mask]
    s2_clean = s2_aligned[mask]
    
    print("\n\n最終清理後的數據:")
    print("\nStock1 clean:")
    print(s1_clean)
    print("\nStock2 clean:")
    print(s2_clean)
    
    # 4. 展示數據變化的統計
    stats = {
        '原始數據長度': {'Stock1': len(stock1), 'Stock2': len(stock2)},
        '對齊後長度': {'Stock1': len(s1_aligned), 'Stock2': len(s2_aligned)},
        '清理後長度': {'Stock1': len(s1_clean), 'Stock2': len(s2_clean)},
        'NaN 數量': {
            'Stock1': stock1.isna().sum(),
            'Stock2': stock2.isna().sum()
        },
        'Inf 數量': {
            'Stock1': np.isinf(stock1).sum(),
            'Stock2': np.isinf(stock2).sum()
        }
    }
    
    print("\n\n數據處理統計:")
    for stat, values in stats.items():
        print(f"\n{stat}:")
        for stock, value in values.items():
            print(f"{stock}: {value}")

demonstrate_data_preparation()

原始數據:

Stock1:
2024-01-01    100.0
2024-01-02    102.0
2024-01-03      NaN
2024-01-04    103.0
2024-01-05    102.0
Freq: D, Name: Stock1, dtype: float64

Stock2:
2024-01-02    50.0
2024-01-03     inf
2024-01-04    52.0
2024-01-05    51.0
2024-01-06    50.0
Freq: D, Name: Stock2, dtype: float64


索引對齊後的數據:

Stock1 aligned:
2024-01-02    102.0
2024-01-03      NaN
2024-01-04    103.0
2024-01-05    102.0
Freq: D, Name: Stock1, dtype: float64

Stock2 aligned:
2024-01-02    50.0
2024-01-03     inf
2024-01-04    52.0
2024-01-05    51.0
Freq: D, Name: Stock2, dtype: float64


最終清理後的數據:

Stock1 clean:
2024-01-02    102.0
2024-01-04    103.0
2024-01-05    102.0
Name: Stock1, dtype: float64

Stock2 clean:
2024-01-02    50.0
2024-01-04    52.0
2024-01-05    51.0
Name: Stock2, dtype: float64


數據處理統計:

原始數據長度:
Stock1: 5
Stock2: 5

對齊後長度:
Stock1: 4
Stock2: 4

清理後長度:
Stock1: 3
Stock2: 3

NaN 數量:
Stock1: 1
Stock2: 0

Inf 數量:
Stock1: 0
Stock2: 1


  stock2.iloc[1] = np.inf
