#### 모듈 설치

In [None]:
%pip install python-dotenv requests boto3 psycopg2-binary

#### 환경변수 로드 및 저장할 폴더 준비

In [1]:
import os
from dotenv import load_dotenv
from pathlib import Path

# 환경 변수 로드
load_dotenv()

API_KEY = os.getenv("KOBIS_API_KEY")

AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_REGION = os.getenv("AWS_REGION")
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")

REDSHIFT_HOST = os.getenv("REDSHIFT_HOST")
REDSHIFT_PORT = os.getenv("REDSHIFT_PORT")
REDSHIFT_DB = os.getenv("REDSHIFT_DB")
REDSHIFT_USER = os.getenv("REDSHIFT_USER")
REDSHIFT_PASSWORD = os.getenv("REDSHIFT_PASSWORD")
IAM_ROLE_ARN = os.getenv("IAM_ROLE_ARN")

# 저장할 폴더 준비
project_folder = project_folder = Path.cwd().parent
raw_data_dir = project_folder / "raw_data" / "boxoffice"    # 일별 박스오피스
raw_data_dir.mkdir(exist_ok=True)

#### 데이터 수집

In [None]:
import requests
from datetime import datetime, timedelta
import json

# 데이터 수집
base_url = "http://kobis.or.kr/kobisopenapi/webservice/rest/boxoffice/searchDailyBoxOfficeList.json"    # 일별 박스오피스

for i in range(30):
    target_date = (datetime(2025, 9, 1) + timedelta(days=i)).strftime("%Y%m%d")
    params = {"key": API_KEY, "targetDt": target_date}

    try:
        res = requests.get(base_url, params=params, timeout=10)
        res.raise_for_status()
        data = res.json()

        showRange = data["boxOfficeResult"]["showRange"].split("~")[0]
        dailyBoxOfficeList = data["boxOfficeResult"]["dailyBoxOfficeList"]

        file_path = raw_data_dir / f"dailyBoxOffice_{target_date}.json"

        with open(file_path, "w", encoding="utf-8") as f:
            for movie in dailyBoxOfficeList:
                # Redshift 테이블 스키마에 맞게 데이터 타입 변환 및 가공
                movie_data = {
                    "movieCd": int(movie["movieCd"]),
                    "showRange": datetime.strptime(showRange, "%Y%m%d").strftime("%Y-%m-%d"),
                    "rank": int(movie["rank"]),
                    "salesAmt": int(movie["salesAmt"]),
                    "salesShare": float(movie["salesShare"]),
                    "salesInten": int(movie["salesInten"]),
                    "salesChange": float(movie["salesChange"]),
                    "salesAcc": int(movie["salesAcc"]),
                    "audiCnt": int(movie["audiCnt"]),
                    "audiInten": int(movie["audiInten"]),
                    "audiChange": float(movie["audiChange"]),
                    "audiAcc": int(movie["audiAcc"]),
                    "scrnCnt": int(movie["scrnCnt"]),
                    "showCnt": int(movie["showCnt"])
                }
                
                json_line = json.dumps(movie_data, ensure_ascii=False)
                f.write(json_line + '\n') 

        print(f"{target_date} 저장 완료 (JSON Lines 형식)")

    except Exception as e:
        print(f"{target_date} 데이터 수집 실패: {e}")

20250901 저장 완료 (JSON Lines 형식)
20250902 저장 완료 (JSON Lines 형식)
20250903 데이터 수집 실패: HTTPConnectionPool(host='kobis.or.kr', port=80): Read timed out. (read timeout=10)
20250904 데이터 수집 실패: HTTPConnectionPool(host='kobis.or.kr', port=80): Read timed out. (read timeout=10)
20250905 저장 완료 (JSON Lines 형식)
20250906 저장 완료 (JSON Lines 형식)
20250907 저장 완료 (JSON Lines 형식)
20250908 저장 완료 (JSON Lines 형식)
20250909 저장 완료 (JSON Lines 형식)
20250910 저장 완료 (JSON Lines 형식)
20250911 저장 완료 (JSON Lines 형식)
20250912 저장 완료 (JSON Lines 형식)
20250913 저장 완료 (JSON Lines 형식)
20250914 저장 완료 (JSON Lines 형식)
20250915 저장 완료 (JSON Lines 형식)
20250916 저장 완료 (JSON Lines 형식)
20250917 저장 완료 (JSON Lines 형식)
20250918 저장 완료 (JSON Lines 형식)
20250919 저장 완료 (JSON Lines 형식)
20250920 저장 완료 (JSON Lines 형식)
20250921 저장 완료 (JSON Lines 형식)
20250922 저장 완료 (JSON Lines 형식)
20250923 저장 완료 (JSON Lines 형식)
20250924 저장 완료 (JSON Lines 형식)
20250925 저장 완료 (JSON Lines 형식)
20250926 저장 완료 (JSON Lines 형식)
20250927 저장 완료 (JSON Lines 형식)
20250928 데이터 수집 실패:

#### 실패한 데이터 다시 추출

In [4]:
import requests
from datetime import datetime, timedelta
import json

# 데이터 수집
base_url = "http://kobis.or.kr/kobisopenapi/webservice/rest/boxoffice/searchDailyBoxOfficeList.json"

for i in [3, 4, 28]:
    target_date = (datetime(2025, 9, i)).strftime("%Y%m%d")
    params = {"key": API_KEY, "targetDt": target_date}

    try:
        res = requests.get(base_url, params=params, timeout=10)
        res.raise_for_status()
        data = res.json()

        showRange = data["boxOfficeResult"]["showRange"].split("~")[0]
        dailyBoxOfficeList = data["boxOfficeResult"]["dailyBoxOfficeList"]

        file_path = raw_data_dir / f"dailyBoxOffice_{target_date}.json"

        with open(file_path, "w", encoding="utf-8") as f:
            for movie in dailyBoxOfficeList:
                # Redshift 테이블 스키마에 맞게 데이터 타입 변환 및 가공
                movie_data = {
                    "movieCd": int(movie["movieCd"]),
                    "showRange": datetime.strptime(showRange, "%Y%m%d").strftime("%Y-%m-%d"),
                    "rank": int(movie["rank"]),
                    "salesAmt": int(movie["salesAmt"]),
                    "salesShare": float(movie["salesShare"]),
                    "salesInten": int(movie["salesInten"]),
                    "salesChange": float(movie["salesChange"]),
                    "salesAcc": int(movie["salesAcc"]),
                    "audiCnt": int(movie["audiCnt"]),
                    "audiInten": int(movie["audiInten"]),
                    "audiChange": float(movie["audiChange"]),
                    "audiAcc": int(movie["audiAcc"]),
                    "scrnCnt": int(movie["scrnCnt"]),
                    "showCnt": int(movie["showCnt"])
                }
                
                json_line = json.dumps(movie_data, ensure_ascii=False)
                f.write(json_line + '\n') 

        print(f"{target_date} 저장 완료 (JSON Lines 형식)")

    except Exception as e:
        print(f"{target_date} 데이터 수집 실패: {e}")

20250903 저장 완료 (JSON Lines 형식)
20250904 저장 완료 (JSON Lines 형식)
20250928 저장 완료 (JSON Lines 형식)


#### S3 에 업로드

In [5]:
print(raw_data_dir)

/Users/kang/Team4_4Ward/BI-Dashboard/raw_data/boxoffice


In [4]:
import boto3

# S3 업로드
if any(raw_data_dir.glob("*.json")):
    s3 = boto3.client(
        "s3",
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
        region_name=AWS_REGION
    )

    for file in raw_data_dir.glob("dailyBoxOffice_*.json"):
        file_name = f"raw_data/boxoffice/{file.name}"
        try:
            s3.upload_file(str(file), S3_BUCKET_NAME, file_name)
            print(f"[success] S3 업로드 완료: {file_name}")
        except Exception as e:
            print(f"[fail] S3 업로드 실패 ({file_name}): {e}")
else:
    print("[error] 업로드할 파일이 없습니다.")

[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250927.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250911.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250907.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250906.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250910.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250930.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250926.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250917.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250901.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250921.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250920.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250916.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250903.json
[success] S3 업로드 완료: raw_data/boxoffice/dailyBoxOffice_20250915.json
[success] S3 업로드 완료: raw_data/boxo