In [30]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from datetime import date
from dateutil.relativedelta import relativedelta
import requests
from datetime import datetime, timedelta
# 비동기로 공연 상세 api 조회

import aiohttp
import asyncio
from tqdm import tqdm

In [31]:
# API 키 불러오기

from dotenv import load_dotenv
import os

load_dotenv()
SERVICE_KEY = os.getenv("KOPIS_KEY")

if not SERVICE_KEY:
    raise ValueError("서비스 키가 설정되지 않았습니다. .env 파일을 확인하세요.")

In [103]:
def xml_to_records(xml_text, row_tag="box-statsofs"):
    root = ET.fromstring(xml_text)
    records = []
    for row in root.findall(f".//{row_tag}"):
        rec = {}
        for child in row:   # 자식 태그만 가져오기
            rec[child.tag] = child.text
        records.append(rec)
    return records

In [None]:
# 예매통계 시간대별
async def fetch_box_stats_time(session, date):
    url = "http://www.kopis.or.kr/openApi/restful/boxStatsTime"
    params = {
        "service": SERVICE_KEY,
        "stdate": date,
        "eddate": date
    }
    
    try:
        async with session.get(url, params=params) as resp:
            text = await resp.text()
            return xml_to_records(text, row_tag = "boxStatsof")
    except Exception as e:
        print ("Error: ", date, e)
        return []

In [105]:
# 날짜 생성
def generate_dates(start, end):
    start_date = datetime.strptime(start, "%Y%m%d")
    end_date = datetime.strptime(end, "%Y%m%d")
    delta = (end_date - start_date).days + 1
    return [(start_date + timedelta(days=i)).strftime("%Y%m%d") for i in range(delta)]

In [106]:
sem = asyncio.Semaphore(3)

genre = "AAAA"
start = "20230101"
end = "20241231"
dates = generate_dates(start, end)

all_data = []
async with aiohttp.ClientSession() as session:
    for d in dates:
        rows = await fetch_box_stats_time(session, d)
        for r in rows:
            r['date'] = d
            all_data.append(r)
        print("Fetched:", d, "rows:", len(rows))  # 진행 상황 확인

Fetched: 20230101 rows: 9
Fetched: 20230102 rows: 9
Fetched: 20230103 rows: 9
Fetched: 20230104 rows: 9
Fetched: 20230105 rows: 9
Fetched: 20230106 rows: 9
Fetched: 20230107 rows: 9
Fetched: 20230108 rows: 9
Fetched: 20230109 rows: 9
Fetched: 20230110 rows: 9
Fetched: 20230111 rows: 9
Fetched: 20230112 rows: 9
Fetched: 20230113 rows: 9
Fetched: 20230114 rows: 9
Fetched: 20230115 rows: 9
Fetched: 20230116 rows: 9
Fetched: 20230117 rows: 9
Fetched: 20230118 rows: 9
Fetched: 20230119 rows: 9
Fetched: 20230120 rows: 9
Fetched: 20230121 rows: 9
Fetched: 20230122 rows: 9
Fetched: 20230123 rows: 9
Fetched: 20230124 rows: 9
Fetched: 20230125 rows: 9
Fetched: 20230126 rows: 9
Fetched: 20230127 rows: 9
Fetched: 20230128 rows: 9
Fetched: 20230129 rows: 9
Fetched: 20230130 rows: 9
Fetched: 20230131 rows: 9
Fetched: 20230201 rows: 9
Fetched: 20230202 rows: 9
Fetched: 20230203 rows: 9
Fetched: 20230204 rows: 9
Fetched: 20230205 rows: 9
Fetched: 20230206 rows: 9
Fetched: 20230207 rows: 9
Fetched: 202

In [108]:
box_stats_time = pd.DataFrame(all_data)

In [109]:
box_stats_time.columns

Index(['timename', 'prfcnt', 'prfdtcnt', 'cancelnmrssm', 'totnmrssm',
       'ntssamountsm', 'ntssnmrssm', 'date'],
      dtype='object')

In [None]:
box_stats_time_col_mapping = {
    "timename": "시간대",
    "prfcnt": "공연건수",
    "prfdtcnt": "상연횟수",
    "ntssnmrssm": "예매수",
    "cancelnmrssm": "취소수",
    "totnmrssm": "총티켓판매수",
    "ntssamountsm": "총티켓판매액",
    "date": "날짜"
}

box_stats_time = box_stats_time.rename(columns=box_stats_time_col_mapping)

In [117]:
box_stats_time.to_csv("시간대별예매통계.csv", index=False, encoding="utf-8-sig")

In [118]:
# 예매통계 장르별
async def fetch_box_stats_cate(session, date, catecode):
    url = "http://www.kopis.or.kr/openApi/restful/boxStatsCate"
    params = {
        "service": SERVICE_KEY,
        "stdate": date,
        "eddate": date,
        "catecode": catecode
    }
    
    try:
        async with session.get(url, params=params) as resp:
            text = await resp.text()
            return xml_to_records(text, row_tag = "boxStatsof")
    except Exception as e:
        print ("Error: ", date, e)
        return []

In [123]:
sem = asyncio.Semaphore(3)

genre = "AAAA"
start = "20230101"
end = "20241231"
dates = generate_dates(start, end)

box_stats_cate = []
async with aiohttp.ClientSession() as session:
    for d in dates:
        rows = await fetch_box_stats_cate(session, d, genre)
        for r in rows:
            r['date'] = d
            box_stats_cate.append(r)
        print("Fetched:", d, "rows:", len(rows))  # 진행 상황 확인

Fetched: 20230101 rows: 2
Fetched: 20230102 rows: 2
Fetched: 20230103 rows: 2
Fetched: 20230104 rows: 2
Fetched: 20230105 rows: 2
Fetched: 20230106 rows: 2
Fetched: 20230107 rows: 2
Fetched: 20230108 rows: 2
Fetched: 20230109 rows: 2
Fetched: 20230110 rows: 2
Fetched: 20230111 rows: 2
Fetched: 20230112 rows: 2
Fetched: 20230113 rows: 2
Fetched: 20230114 rows: 2
Fetched: 20230115 rows: 2
Fetched: 20230116 rows: 2
Fetched: 20230117 rows: 2
Fetched: 20230118 rows: 2
Fetched: 20230119 rows: 2
Fetched: 20230120 rows: 2
Fetched: 20230121 rows: 2
Fetched: 20230122 rows: 2
Fetched: 20230123 rows: 2
Fetched: 20230124 rows: 2
Fetched: 20230125 rows: 2
Fetched: 20230126 rows: 2
Fetched: 20230127 rows: 2
Fetched: 20230128 rows: 2
Fetched: 20230129 rows: 2
Fetched: 20230130 rows: 2
Fetched: 20230131 rows: 2
Fetched: 20230201 rows: 2
Fetched: 20230202 rows: 2
Fetched: 20230203 rows: 2
Fetched: 20230204 rows: 2
Fetched: 20230205 rows: 2
Fetched: 20230206 rows: 2
Fetched: 20230207 rows: 2
Fetched: 202

In [126]:
box_stats_cate = pd.DataFrame(box_stats_cate)

In [128]:
box_stats_cate = box_stats_cate[box_stats_cate["catenm"] == "연극"].copy()

In [136]:
cate_mapping = {
    "catenm": "장르",
    "prfcnt": "공연건수",
    "prfdtcnt": "상연횟수",
    "ntssnmrssm": "예매수",
    "cancelnmrssm": "취소수",
    "totnmrssm": "총티켓판매수",
    "ntssamountsm": "총티켓판매액",
    "date": "날짜"
}

In [137]:
box_stats_cate = box_stats_cate.rename(columns=cate_mapping)

In [140]:
box_stats_cate.to_csv("장르별예매통계.csv", index=False, encoding="utf-8-sig")

In [None]:
# 예매통계 가격대별
async def fetch_box_stats_price(session, date, catecode):
    url = "http://www.kopis.or.kr/openApi/restful/boxStatsPrice"
    params = {
        "service": SERVICE_KEY,
        "stdate": date,
        "eddate": date
    }
    
    try:
        async with session.get(url, params=params) as resp:
            text = await resp.text()
            print(text[:500])
            return xml_to_records(text, row_tag = "boxStatsof")
    except Exception as e:
        print ("Error: ", date, e)
        return []

In [149]:
sem = asyncio.Semaphore(3)

genre = "AAAA"
start = "20240418"
end = "20241231"
dates = generate_dates(start, end)

async with aiohttp.ClientSession() as session:
    for d in dates:
        rows = await fetch_box_stats_price(session, d, genre)
        for r in rows:
            r['date'] = d
            box_stats_price.append(r)
        print("Fetched:", d, "rows:", len(rows))  # 진행 상황 확인

Fetched: 20240418 rows: 63
Fetched: 20240419 rows: 63
Fetched: 20240420 rows: 63
Fetched: 20240421 rows: 63
Fetched: 20240422 rows: 63
Fetched: 20240423 rows: 63
Fetched: 20240424 rows: 63
Fetched: 20240425 rows: 63
Fetched: 20240426 rows: 63
Fetched: 20240427 rows: 63
Fetched: 20240428 rows: 63
Fetched: 20240429 rows: 63
Fetched: 20240430 rows: 63
Fetched: 20240501 rows: 63
Fetched: 20240502 rows: 63
Fetched: 20240503 rows: 63
Fetched: 20240504 rows: 63
Fetched: 20240505 rows: 63
Fetched: 20240506 rows: 63
Fetched: 20240507 rows: 63
Fetched: 20240508 rows: 63
Fetched: 20240509 rows: 63
Fetched: 20240510 rows: 63
Fetched: 20240511 rows: 63
Fetched: 20240512 rows: 63
Fetched: 20240513 rows: 63
Fetched: 20240514 rows: 63
Fetched: 20240515 rows: 63
Fetched: 20240516 rows: 63
Fetched: 20240517 rows: 63
Fetched: 20240518 rows: 63
Fetched: 20240519 rows: 63
Fetched: 20240520 rows: 63
Fetched: 20240521 rows: 63
Fetched: 20240522 rows: 63
Fetched: 20240523 rows: 63
Fetched: 20240524 rows: 63
F

box_stats_price = []

In [150]:
box_stats_price_df = pd.DataFrame(box_stats_price)

In [152]:
price_mapping = {
    "catenm": "장르",
    "price": "가격대",
    "ntssnmrssm": "예매수",
    "cancelnmrssm": "취소수",
    "totnmrssm": "총 티켓판매수",
    "pertotnmrssm": "총 티켓판매수_장르비중(%)",
    "ntssamountsm": "총 티켓판매액",
    "date": "날짜"
}

box_stats_price_df = box_stats_price_df.rename(columns=price_mapping)

In [153]:
box_stats_price_df.head()

Unnamed: 0,장르,예매수,취소수,총 티켓판매수,총 티켓판매액,가격대,총 티켓판매수_장르비중(%),날짜
0,연극,1249,1597,0,0,0원,0.0,20230101
1,연극,4287,454,3833,66476,3만원미만,80.8,20230101
2,연극,712,237,475,17631,3만원이상~5만원미만,10.0,20230101
3,연극,620,220,400,23427,5만원이상~7만원미만,8.4,20230101
4,연극,492,142,350,27191,7만원이상~10만원미만,7.4,20230101


In [168]:
box_stats_price_df.to_csv("가격대별예매통계.csv", index=False, encoding="utf-8-sig")

In [155]:
# 장르별 통계 목록
async def fetch_prfsts_cate_service(session, date):
    url = "http://www.kopis.or.kr/openApi/restful/prfstsCate"
    params = {
        "service": SERVICE_KEY,
        "stdate": date,
        "eddate": date
    }
    
    try:
        async with session.get(url, params=params) as resp:
            text = await resp.text()
            return xml_to_records(text, row_tag = "prfst")
    except Exception as e:
        print ("Error: ", date, e)
        return []

In [159]:
sem = asyncio.Semaphore(3)

start = "20230101"
end = "20241231"
dates = generate_dates(start, end)

prfsts_cate_service = []
async with aiohttp.ClientSession() as session:
    for d in dates:
        rows = await fetch_prfsts_cate_service(session, d)
        for r in rows:
            r['date'] = d
            prfsts_cate_service.append(r)
        print("Fetched:", d, "rows:", len(rows))  # 진행 상황 확인

Fetched: 20230101 rows: 9
Fetched: 20230102 rows: 9
Fetched: 20230103 rows: 9
Fetched: 20230104 rows: 9
Fetched: 20230105 rows: 9
Fetched: 20230106 rows: 9
Fetched: 20230107 rows: 9
Fetched: 20230108 rows: 9
Fetched: 20230109 rows: 9
Fetched: 20230110 rows: 9
Fetched: 20230111 rows: 9
Fetched: 20230112 rows: 9
Fetched: 20230113 rows: 9
Fetched: 20230114 rows: 9
Fetched: 20230115 rows: 9
Fetched: 20230116 rows: 9
Fetched: 20230117 rows: 9
Fetched: 20230118 rows: 9
Fetched: 20230119 rows: 9
Fetched: 20230120 rows: 9
Fetched: 20230121 rows: 9
Fetched: 20230122 rows: 9
Fetched: 20230123 rows: 9
Fetched: 20230124 rows: 9
Fetched: 20230125 rows: 9
Fetched: 20230126 rows: 9
Fetched: 20230127 rows: 9
Fetched: 20230128 rows: 9
Fetched: 20230129 rows: 9
Fetched: 20230130 rows: 9
Fetched: 20230131 rows: 9
Fetched: 20230201 rows: 9
Fetched: 20230202 rows: 9
Fetched: 20230203 rows: 9
Fetched: 20230204 rows: 9
Fetched: 20230205 rows: 9
Fetched: 20230206 rows: 9
Fetched: 20230207 rows: 9
Fetched: 202

In [160]:
prfsts_cate_service = pd.DataFrame(prfsts_cate_service)

In [163]:
prfsts_cate_mapping = {
    "cate": "장르",
    "prfprocnt": "개막편수",
    "prfdtcnt": "상연횟수",
    "amount": "매출액",
    "amountshr": "매출액점유율(%)",
    "nmrs": "관객수",
    "nmrsshr": "관객점유율(%)",
    "date": "날짜"
}

In [164]:
prfsts_cate_service = prfsts_cate_service.rename(columns=prfsts_cate_mapping)

In [165]:
prfsts_cate_service.head()

Unnamed: 0,장르,개막편수,상연횟수,매출액,매출액점유율(%),관객수,관객점유율(%),날짜
0,연극,1,187,203037050,3.8,8158,13.4,20230101
1,뮤지컬,1,128,3052317930,56.5,34729,57.1,20230101
2,서양음악(클래식),0,0,0,0.0,0,0.0,20230101
3,한국음악(국악),0,0,0,0.0,0,0.0,20230101
4,대중음악,1,5,2054854200,38.0,16207,26.6,20230101


In [166]:
prfsts_cate_service.to_csv("장르별통계목록.csv", index=False, encoding="utf-8-sig")