In [1]:
import warnings
import pickle
import pandas as pd
from utils import *

warnings.filterwarnings("ignore")

In [2]:
sales = pd.read_csv("../data/preprocessed/sales.csv")
sell_prices = pd.read_csv("../data/preprocessed/sell_prices.csv")

In [3]:
# 저장
# first_sales_column_dict = create_first_sales_column_dict(sell_prices, save_result==True)

# 로드
with open('../data/preprocessed/first_sales_column_dict.pkl', 'rb') as f:
    first_sales_column_dict = pickle.load(f)

### fourier transform

In [4]:
# 저장
fourier_results = analyze_period_with_fourier(sales, first_sales_column_dict, save_result=True, save_plot=False) # Fundamental Period, Period Strength 데이터

# 로드
with open('../data/fourier/results.pkl', 'rb') as f:
    fourier_results = pickle.load(f)

In [5]:
# 데이터프레임 변환
new_data_attempt = [
    (key, value['fundamental_period'], value['normalized_period_strength'])
    for key, value in fourier_results.items()
]
new_df_attempt = pd.DataFrame(new_data_attempt, columns=["Item", "Fundamental_Period", "Normalized_Period_Strength"])

# 카테고리 추출
new_df_attempt["Category"] = new_df_attempt["Item"].apply(lambda x: x[1].split("_")[0])

# 상위 10개의 기본 주기 계산
top_10_periods_final = new_df_attempt["Fundamental_Period"].value_counts(normalize=True).head(10)

# 각 기본 주기-카테고리별 비율 계산
category_period_ratio_final = new_df_attempt.groupby("Fundamental_Period")["Category"].value_counts(normalize=True).unstack()

# 각 기본 주기에서 전체 주기 세기의 평균 계산
average_strength_by_period_final = new_df_attempt.groupby("Fundamental_Period")["Normalized_Period_Strength"].mean()

# 각 기본 주기-카테고리별 주기 세기의 평균 계산
category_strength_by_period_final = new_df_attempt.groupby(["Fundamental_Period", "Category"])["Normalized_Period_Strength"].mean().unstack()

# 상위 10개 주기에 대해 데이터 결합
final_top10_analysis = category_period_ratio_final.loc[top_10_periods_final.index].copy()
final_top10_analysis["Overall_Period_Ratio"] = top_10_periods_final
final_top10_analysis["Average_Strength"] = average_strength_by_period_final.loc[top_10_periods_final.index]
final_top10_analysis["FOODS_Strength"] = category_strength_by_period_final.loc[top_10_periods_final.index, "FOODS"]
final_top10_analysis["HOBBIES_Strength"] = category_strength_by_period_final.loc[top_10_periods_final.index, "HOBBIES"]
final_top10_analysis["HOUSEHOLD_Strength"] = category_strength_by_period_final.loc[top_10_periods_final.index, "HOUSEHOLD"]

# 결과 출력
final_top10_analysis

Category,FOODS,HOBBIES,HOUSEHOLD,Overall_Period_Ratio,Average_Strength,FOODS_Strength,HOBBIES_Strength,HOUSEHOLD_Strength
Fundamental_Period,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
7,0.425587,0.167522,0.406891,0.59648,0.526726,0.551467,0.437506,0.537581
30,0.973082,0.005384,0.021534,0.081229,0.562273,0.567891,0.323285,0.368153
3,0.268421,0.371053,0.360526,0.041544,0.332755,0.332413,0.33587,0.329804
2,0.315615,0.332226,0.352159,0.032907,0.332818,0.338327,0.333384,0.327347
4,0.292453,0.320755,0.386792,0.023177,0.333497,0.343062,0.330033,0.329137
31,0.953642,0.019868,0.02649,0.016508,0.54288,0.550997,0.314079,0.422277
5,0.262411,0.411348,0.326241,0.015415,0.328471,0.32804,0.328654,0.328587
6,0.268908,0.310924,0.420168,0.01301,0.332074,0.346819,0.32384,0.328731
61,0.658824,0.211765,0.129412,0.009293,0.411923,0.425351,0.375374,0.403369
52,0.513514,0.418919,0.067568,0.00809,0.411677,0.43299,0.386601,0.405165


### loess regression

In [5]:
# 저장
# detrended_sales = detrend_with_loess(sales, first_sales_column_dict, span=0.1, save_result=True, save_plot=True)

# 로드
detrended_sales = pd.read_csv("../data/loess/detrended_sales.csv")

### log differencing

In [6]:
# 저장
# log_differenced_sell_prices = calculate_sell_price_changes_with_log_differencing(sell_prices, first_sales_column_dict, save_result=True, save_plot=True)

# 로드
log_differenced_sell_prices = pd.read_csv("../data/log_differencing/log_differenced_sell_prices.csv")