In [13]:
!pip install kaggle pandas numpy




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
import os
import json
import getpass
from pathlib import Path

kaggle_dir = Path.home() / ".kaggle"
kaggle_dir.mkdir(exist_ok=True)

kaggle_json = kaggle_dir / "kaggle.json"

if not kaggle_json.exists():
    print("Kaggle API Key")
    KAGGLE_USERNAME = input("KAGGLE_USERNAME: ")
    KAGGLE_KEY = getpass.getpass("KAGGLE_KEY: ")
    
    kaggle_json.write_text(json.dumps({
        "username": KAGGLE_USERNAME,
        "key": KAGGLE_KEY
    }))

# 권한 설정
os.chmod(kaggle_json, 0o600)


In [15]:
# raw 폴더 생성
RAW_DIR = Path("raw")
RAW_DIR.mkdir(exist_ok=True)

# Kaggle 다운로드
!kaggle competitions download -c demand-forecasting-kernels-only -p raw


demand-forecasting-kernels-only.zip: Skipping, found more recently modified local copy (use --force to force download)


In [16]:
import zipfile

zip_path = RAW_DIR / "demand-forecasting-kernels-only.zip"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(RAW_DIR)

# 압축 해제 결과 확인
os.listdir(RAW_DIR)


['demand-forecasting-kernels-only.zip',
 'sample_submission.csv',
 'test.csv',
 'train.csv']

In [17]:
import pandas as pd

df = pd.read_csv("raw/train.csv")
df["date"] = pd.to_datetime(df["date"])


In [18]:
weekly = (
    df.groupby(["store", "item", pd.Grouper(key="date", freq="W-MON")])["sales"]
      .sum()
      .reset_index()
      .rename(columns={
          "store": "store_id",
          "item": "product_id",
          "date": "target_date",
          "sales": "actual_order_qty"
      })
)

product_map = {
    1: "TV", 2: "냉장고", 3: "세탁기", 4: "에어컨", 5: "전자레인지",
    6: "청소기", 7: "공기청정기", 8: "커피머신", 9: "오븐", 10: "식기세척기"
}
weekly["product_name"] = weekly["product_id"].map(product_map).fillna("기타가전")
weekly["warehouse_id"] = (weekly["store_id"] % 3) + 1

weekly.to_csv("weekly_sales.csv", index=False)
weekly.sample(300, random_state=42).to_csv("weekly_sales_sample.csv", index=False)

len(weekly), weekly.head()


(130500,
    store_id  product_id target_date  actual_order_qty product_name  \
 0         1           1  2013-01-07                83           TV   
 1         1           1  2013-01-14                68           TV   
 2         1           1  2013-01-21                76           TV   
 3         1           1  2013-01-28                73           TV   
 4         1           1  2013-02-04                89           TV   
 
    warehouse_id  
 0             2  
 1             2  
 2             2  
 3             2  
 4             2  )