### log differencing

In [None]:
def calculate_sell_price_changes_with_log_differencing(sell_prices, first_sales_column_dict, save_result=False, save_plot=False):

    # 데이터 복사
    log_differenced_sell_prices = sell_prices.copy()

    # 행별로 시작 컬럼부터 차분 수행
    for idx, sell_price in sell_prices.iterrows():
        # 시작 컬럼 가져오기
        start_col = first_sales_column_dict.get(tuple(sell_price[:2]))
        
        # 시작 컬럼 이후 데이터 선택
        start_index = sell_prices.columns.get_loc(start_col)
        sell_price_values = np.array(sell_price.iloc[start_index:].values, dtype=np.float64)

        # 로그 변환 및 차분 계산
        logged_sell_price_values = np.log(sell_price_values + 1e-9)  # 로그 계산 시 0 방지
        log_differenced_sell_price_values = np.diff(logged_sell_price_values, prepend=logged_sell_price_values[0])

        # 결과 저장
        log_differenced_sell_prices.iloc[idx, start_index:] = log_differenced_sell_price_values

        # 시각화
        if save_plot:
            fig, axs = plt.subplots(2, 1, figsize=(15, 10))

            axs[0].plot(sell_price_values, label=f"Original Sell Prices: {sell_price['state_id']}, {sell_price['item_id']}")
            axs[0].set_xlabel("Time")
            axs[0].set_ylabel("Sell Prices")
            axs[0].legend()

            axs[1].plot(log_differenced_sell_price_values, label=f"Log-Differenced Sell Prices: {sell_price['state_id']}, {sell_price['item_id']}")
            axs[1].set_xlabel("Time")
            axs[1].set_ylabel("Log-Differenced Sell Prices")
            axs[1].legend()

            plt.tight_layout()
            plt.savefig(f"../data/log_differencing/plot/{sell_price['state_id']}_{sell_price['item_id']}.png")
            plt.close()

    if save_result:
        log_differenced_sell_prices.to_csv("../data/log_differencing/log_differenced_sell_prices.csv", index=False)

    return log_differenced_sell_prices

In [None]:
# 저장
# log_differenced_sell_prices = calculate_sell_price_changes_with_log_differencing(sell_prices, first_sales_column_dict, save_result=True, save_plot=True)

# 로드
log_differenced_sell_prices = pd.read_csv("../data/log_differencing/log_differenced_sell_prices.csv")

In [None]:
def analyze_stl_decomposition(data, period):
    """STL 분해 수행 및 분석"""
    from statsmodels.tsa.seasonal import STL
    
    stl = STL(data, period=period)
    result = stl.fit()
    
    return {
        'trend': result.trend,
        'seasonal': result.seasonal,
        'resid': result.resid,
        'strength_of_seasonality': 1 - result.resid.var() / (result.seasonal + result.resid).var()
    }

def optimize_period_selection(data, periods, snr_threshold=10):
    """SNR 기반 최적 주기 선택"""
    selected_periods = []
    current_snr = 0
    
    for period in sorted(periods):
        # 해당 주기로 신호 재구성
        reconstructed = reconstruct_signal_with_period(data, period)
        new_snr = calculate_snr(data, reconstructed)
        
        if new_snr - current_snr > snr_threshold:
            selected_periods.append(period)
            current_snr = new_snr
            
    return selected_periods

def analyze_residual_periodicity(residuals):
    """잔차의 주기성 분석"""
    fft = np.fft.fft(residuals)
    frequencies = np.fft.fftfreq(len(residuals))
    
    # 주요 주파수 성분 식별
    magnitude_threshold = np.percentile(np.abs(fft), 95)
    significant_freqs = frequencies[np.abs(fft) > magnitude_threshold]
    
    return [1/freq for freq in significant_freqs if freq != 0]
