# Gathering KOBIS data

## Load Libraries

In [35]:
# !pip install tqdm

Collecting tqdm
  Downloading tqdm-4.65.0-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.1/77.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.65.0


In [2]:
import requests
import io
import base64
import pandas as pd
import json
from tqdm import tqdm

## Set common API variables

In [3]:
headers = {
 'Accept': 'application/json'
  }

key = "f5eef3421c602c6cb7ea224104795888"

## Daily Box Office

In [12]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/boxoffice/searchDailyBoxOfficeList.json"

In [13]:
dfs = []
for d in tqdm(pd.date_range("2018/01/01", "2022/12/31").strftime("%Y%m%d")):
    data = {
    "key" : key, 
    "targetDt" : d,
    }

    response = requests.get(url, params = data, headers=headers)
    df = pd.DataFrame(json.loads(response.text)['boxOfficeResult']['dailyBoxOfficeList'])
    df['date'] = d
    dfs.append(df)

100%|██████████| 1826/1826 [16:46<00:00,  1.81it/s]


In [14]:
len(dfs)

1826

In [15]:
daily_box_office = pd.concat(dfs)
daily_box_office

Unnamed: 0,rnum,rank,rankInten,rankOldAndNew,movieCd,movieNm,openDt,salesAmt,salesShare,salesInten,salesChange,salesAcc,audiCnt,audiInten,audiChange,audiAcc,scrnCnt,showCnt,date
0,1,1,0,OLD,20150976,신과함께-죄와 벌,2017-12-20,7715149100,52.9,-853021100,-10,76065645532,916652,-107305,-10.5,9456147,1644,7957,20180101
1,2,2,0,OLD,20170590,1987,2017-12-27,4460462100,30.6,-224621200,-4.8,19714466137,531469,-28577,-5.1,2473546,1097,4982,20180101
2,3,3,0,OLD,20170402,강철비,2017-12-14,944228600,6.5,-48816300,-4.9,32939126166,114298,-6548,-5.4,4128591,602,1451,20180101
3,4,4,0,OLD,20179462,위대한 쇼맨,2017-12-20,509947700,3.5,25348100,5.2,7166974900,60070,2402,4.2,874655,432,843,20180101
4,5,5,0,OLD,20174965,극장판 포켓몬스터 너로 정했다!,2017-12-21,262886500,1.8,-7375400,-2.7,3380416200,34103,-1037,-3,449358,417,605,20180101
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,6,6,0,OLD,20228640,신비아파트 극장판 차원도깨비와 7개의 세계,2022-12-14,188096008,1.8,89649287,91.1,3417319076,19955,9293,87.2,356783,490,635,20221231
6,7,7,0,NEW,20215315,스위치,2023-01-04,106467884,1.0,106467884,100,220588884,10579,10579,100,23148,412,562,20221231
7,8,8,-1,OLD,20228829,눈의 여왕5: 스노우 프린세스와 미러랜드의 비밀,2022-12-22,84996568,0.8,20166085,31.1,1093855619,9136,2067,29.2,118023,346,406,20221231
8,9,9,-1,OLD,20227925,몬스터 신부: 101번째 프로포즈,2022-12-28,59368300,0.6,17636000,42.3,190485000,6308,1868,42.1,21038,226,292,20221231


In [16]:
daily_box_office.to_csv('daily_box_office.csv')

## Weekly Box Office (weekend: Fri, Sat, Sun)

In [8]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/boxoffice/searchWeeklyBoxOfficeList.json"

In [9]:
dfs = []
for d in tqdm(pd.date_range("2018/01/01", "2022/12/31", freq = "W").strftime("%Y%m%d")):
    data = {
    "key" : key, 
    "targetDt" : d,
    }

    response = requests.get(url, params = data, headers=headers)
    df = pd.DataFrame(json.loads(response.text)['boxOfficeResult']['weeklyBoxOfficeList'])
    df['date'] = d
    dfs.append(df)

100%|██████████| 260/260 [03:54<00:00,  1.11it/s]


In [11]:
weekly_box_office = pd.concat(dfs)
weekly_box_office

Unnamed: 0,rnum,rank,rankInten,rankOldAndNew,movieCd,movieNm,openDt,salesAmt,salesShare,salesInten,salesChange,salesAcc,audiCnt,audiInten,audiChange,audiAcc,scrnCnt,showCnt,date
0,1,1,0,OLD,20150976,신과함께-죄와 벌,2017-12-20,11047975705,36.4,-9491237208,-46.2,92571580337,1318754,-1146248,-46.5,11502259,1260,16497,20180107
1,2,2,0,OLD,20170590,1987,2017-12-27,8919543895,29.4,-2401051542,-21.2,32847722932,1060097,-293609,-21.7,4089298,1094,14006,20180107
2,3,3,0,NEW,20171909,쥬만지: 새로운 세계,2018-01-03,5557904150,18.3,5557904150,100.0,7342405650,659519,659519,100.0,894544,791,9557,20180107
3,4,4,0,NEW,20177104,페르디난드,2018-01-03,1886998500,6.2,1886998500,100.0,2625604300,247007,247007,100.0,355767,698,4388,20180107
4,5,5,-2,OLD,20170402,강철비,2017-12-14,998370700,3.3,-1394704944,-58.3,34705414466,119809,-171616,-58.9,4350728,495,2938,20180107
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,6,6,0,NEW,20228829,눈의 여왕5: 스노우 프린세스와 미러랜드의 비밀,2022-12-22,655073067,1.9,655073067,100.0,715363766,67729,67729,100.0,75248,626,2136,20221225
6,7,7,36,OLD,20228309,핑크퐁 시네마 콘서트 2: 원더스타 콘서트 대작전,2022-12-21,213626979,0.6,211626979,10581.3,249894974,22112,21912,10956.0,26490,432,1201,20221225
7,8,8,-2,OLD,20228308,극장판 뽀로로와 친구들: 바이러스를 없애줘!,2022-12-01,107517600,0.3,-62359400,-36.7,1578754400,11332,-7094,-38.5,175473,114,299,20221225
8,9,9,-4,OLD,20210364,탄생,2022-11-30,93002495,0.3,-161650519,-63.5,2930747491,9600,-17403,-64.4,325724,99,332,20221225


In [17]:
weekly_box_office.to_csv('weekly_box_office.csv')

## Movie List

In [18]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/movie/searchMovieList.json"

In [19]:
dfs = []

for p in tqdm(range((9358 // 10) + 1)):
    data = {
        "key": key,
        "curPage": p + 1,
        "openStartDt": '2018',
        "openEndDt": '2022',
    }

    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame(json.loads(response.text)['movieListResult']['movieList'])
    dfs.append(df)

movie_list = pd.concat(dfs)
movie_list

100%|██████████| 936/936 [04:06<00:00,  3.80it/s]


Unnamed: 0,movieCd,movieNm,movieNmEn,prdtYear,openDt,typeNm,prdtStatNm,nationAlt,genreAlt,repNationNm,repGenreNm,directors,companys
0,20182280,졸업,Graduation,2018,20191107,장편,개봉,한국,다큐멘터리,한국,다큐멘터리,[{'peopleNm': '박주환'}],"[{'companyCd': '20230135', 'companyNm': '미디어나무..."
1,20183772,태일이,Chun Tae-il,2021,20211201,장편,개봉,한국,애니메이션,한국,애니메이션,[{'peopleNm': '홍준표'}],"[{'companyCd': '20100545', 'companyNm': '(주)명필..."
2,20192462,마리아 칼라스: 세기의 디바,Maria by Callas: In Her Own Words,2017,20190711,장편,개봉,프랑스,다큐멘터리,프랑스,다큐멘터리,[{'peopleNm': '톰 볼프'}],[]
3,20219337,"1975 킬링필드, 푸난",Funan,2018,20220127,장편,개봉,프랑스,"애니메이션,드라마",프랑스,애니메이션,[{'peopleNm': '드니 도'}],[]
4,20196655,존 윅 3: 파라벨룸,John Wick: Chapter 3 - Parabellum,2019,20190626,장편,개봉,미국,"액션,범죄,스릴러",미국,액션,[{'peopleNm': '채드 스타헬스키'}],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,20161724,스타박'스 다방,Bittersweet Brew,2016,20180111,장편,개봉,한국,"코미디,드라마",한국,코미디,[{'peopleNm': '이상우'}],"[{'companyCd': '20040209', 'companyNm': '영화사 온..."
4,20173547,다운사이징,Downsizing,2017,20180111,장편,개봉,미국,"드라마,코미디",미국,드라마,[{'peopleNm': '알렉산더 페인'}],[]
5,20175812,아오 오니,Ao Oni: The Animation,2017,20180104,장편,개봉,일본,"애니메이션,공포(호러)",일본,애니메이션,[{'peopleNm': '하마무라 토시로'}],[]
6,20177104,페르디난드,FERDINAND,2017,20180103,장편,개봉,미국,"애니메이션,어드벤처,코미디",미국,애니메이션,[{'peopleNm': '카를로스 살다나'}],[]


In [73]:
movie_list.to_csv('movie_list.csv', index = False)

## Movie Info

In [75]:
movie_list = pd.read_csv('movie_list.csv')
movie_list

Unnamed: 0,movieCd,movieNm,movieNmEn,prdtYear,openDt,typeNm,prdtStatNm,nationAlt,genreAlt,repNationNm,repGenreNm,directors,companys
0,20182280,졸업,Graduation,2018.0,20191107,장편,개봉,한국,다큐멘터리,한국,다큐멘터리,[{'peopleNm': '박주환'}],"[{'companyCd': '20230135', 'companyNm': '미디어나무..."
1,20183772,태일이,Chun Tae-il,2021.0,20211201,장편,개봉,한국,애니메이션,한국,애니메이션,[{'peopleNm': '홍준표'}],"[{'companyCd': '20100545', 'companyNm': '(주)명필..."
2,20192462,마리아 칼라스: 세기의 디바,Maria by Callas: In Her Own Words,2017.0,20190711,장편,개봉,프랑스,다큐멘터리,프랑스,다큐멘터리,[{'peopleNm': '톰 볼프'}],[]
3,20219337,"1975 킬링필드, 푸난",Funan,2018.0,20220127,장편,개봉,프랑스,"애니메이션,드라마",프랑스,애니메이션,[{'peopleNm': '드니 도'}],[]
4,20196655,존 윅 3: 파라벨룸,John Wick: Chapter 3 - Parabellum,2019.0,20190626,장편,개봉,미국,"액션,범죄,스릴러",미국,액션,[{'peopleNm': '채드 스타헬스키'}],[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9353,20161724,스타박'스 다방,Bittersweet Brew,2016.0,20180111,장편,개봉,한국,"코미디,드라마",한국,코미디,[{'peopleNm': '이상우'}],"[{'companyCd': '20040209', 'companyNm': '영화사 온..."
9354,20173547,다운사이징,Downsizing,2017.0,20180111,장편,개봉,미국,"드라마,코미디",미국,드라마,[{'peopleNm': '알렉산더 페인'}],[]
9355,20175812,아오 오니,Ao Oni: The Animation,2017.0,20180104,장편,개봉,일본,"애니메이션,공포(호러)",일본,애니메이션,[{'peopleNm': '하마무라 토시로'}],[]
9356,20177104,페르디난드,FERDINAND,2017.0,20180103,장편,개봉,미국,"애니메이션,어드벤처,코미디",미국,애니메이션,[{'peopleNm': '카를로스 살다나'}],[]


In [76]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/movie/searchMovieInfo.json"

In [79]:
dfs = []

for movieCd in tqdm(movie_list['movieCd']):
    data = {
        "key": key,
        "movieCd": movieCd,
    }
    
    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame({k: [v] for k, v in json.loads(response.text)['movieInfoResult']['movieInfo'].items()})
    dfs.append(df)
movie_info = pd.concat(dfs)
movie_info

  3%|▎         | 237/9358 [00:58<37:21,  4.07it/s] 


KeyboardInterrupt: 

In [None]:
movie_info.to_csv('movie_info.csv')

## Company List

In [23]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/company/searchCompanyList.json"

In [24]:
dfs = []

for p in tqdm(range((12966 // 10) + 1)):
    data = {
        "key": key,
        "curPage": p + 1,
    }

    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame(json.loads(response.text)['companyListResult']['companyList'])
    dfs.append(df)

company_list = pd.concat(dfs)
company_list

100%|██████████| 1297/1297 [04:47<00:00,  4.52it/s]


Unnamed: 0,companyCd,companyNm,companyNmEn,companyPartNames,ceoNm,filmoNames
0,20230136,미디어톡,,제작사,유정현,소녀작가 입문기
1,20124975,(주)미디어톡,,제작사,유정현,
2,20203841,원더필름,,제작사,윤서영,숨비소리
3,20230135,미디어나무(주),MediaNamu. Inc,"제작사,배급사",김성환,"수라,오늘 출가합니다,졸업"
4,20230134,SDM스튜디오,,,,소원을 빨아봐
...,...,...,...,...,...,...
1,20121820,프리챌,,부가시장,,
2,20121821,하동철기념사업회,,부가시장,하준수,
3,20121824,한아름닷컴,,부가시장,,
4,20121813,해피프랜즈,,부가시장,양영욱,


In [71]:
company_list.to_csv('company_list.csv', index = False)

## Company Info

In [72]:
company_list = pd.read_csv('company_list.csv')
company_list

Unnamed: 0,companyCd,companyNm,companyNmEn,companyPartNames,ceoNm,filmoNames
0,20230136,미디어톡,,제작사,유정현,소녀작가 입문기
1,20124975,(주)미디어톡,,제작사,유정현,
2,20203841,원더필름,,제작사,윤서영,숨비소리
3,20230135,미디어나무(주),MediaNamu. Inc,"제작사,배급사",김성환,"수라,오늘 출가합니다,졸업"
4,20230134,SDM스튜디오,,,,소원을 빨아봐
...,...,...,...,...,...,...
12961,20121820,프리챌,,부가시장,,
12962,20121821,하동철기념사업회,,부가시장,하준수,
12963,20121824,한아름닷컴,,부가시장,,
12964,20121813,해피프랜즈,,부가시장,양영욱,


In [None]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/company/searchCompanyInfo.json"

In [None]:
dfs = []

for companyCd in tqdm(company_list['companyCd']):
    data = {
        "key": key,
        "companyCd": companyCd,
    }

    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame({k: [v] for k, v in json.loads(response.text)['companyInfoResult']['companyInfo'].items()})
    dfs.append(df)

company_info = pd.concat(dfs)
company_info

  1%|          | 69/12966 [00:13<40:49,  5.27it/s]


KeyboardInterrupt: 

In [None]:
company_info.to_csv('company_info.csv')

## People List

In [66]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/people/searchPeopleList.json"

In [67]:
dfs = []

for p in tqdm(range((165730 // 10) + 1)):
    data = {
        "key": key,
        "curPage": p + 1,
    }

    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame(json.loads(response.text)['peopleListResult']['peopleList'])
    dfs.append(df)

people_list = pd.concat(dfs)
people_list

  2%|▏         | 343/16574 [7:00:26<331:35:54, 73.55s/it]    


KeyboardInterrupt: 

In [None]:
people_list.to_csv('people_list.csv', index = False)

## People Info

In [None]:
people_list = pd.read_csv('people_list.csv')
people_list

In [63]:
url = "http://www.kobis.or.kr/kobisopenapi/webservice/rest/people/searchPeopleInfo.json"

In [65]:
dfs = []

for peopleCd in tqdm(people_list['peopleCd']):
    data = {
        "key": key,
        "peopleCd": peopleCd,
    }

    response = requests.get(url, params = data, headers = headers)
    df = pd.DataFrame({k: [v] for k, v in json.loads(response.text)['peopleInfoResult']['peopleInfo'].items()})
    dfs.append(df)

people_info = pd.concat(dfs)
people_info

100%|██████████| 120/120 [00:21<00:00,  5.69it/s]


Unnamed: 0,peopleCd,peopleNm,peopleNmEn,sex,repRoleNm,homepages,filmos
0,20388687,포스타,,,감독,[],"[{'movieCd': '20231124', 'movieNm': 'X같은 상사의 연..."
0,20216746,알레한드로 몬테베르드,Alejandro Monteverde,남자,감독,[],"[{'movieCd': '20155725', 'movieNm': '리틀 보이', '..."
0,20388671,서유담,,남자,배우,[],"[{'movieCd': '20217364', 'movieNm': '이번엔 잘 되겠지..."
0,20303549,유다미,YOO Da-mi,여자,배우,[],"[{'movieCd': '20183848', 'movieNm': '수상한 이웃', ..."
0,20388686,박중일,Park Jung Il,남자,조감독,[],"[{'movieCd': '20227627', 'movieNm': '물꽃의 전설', ..."
...,...,...,...,...,...,...,...
0,20373888,김혜강,,여자,예고편,[],"[{'movieCd': '20218476', 'movieNm': '나만 보이니', ..."
0,20376610,조대흠,,남자,조감독,[],"[{'movieCd': '20217892', 'movieNm': '식물카페, 온정'..."
0,20388531,박나은,,,감독,[],"[{'movieCd': '20230866', 'movieNm': '그녀가 순결했던 ..."
0,20311836,마메자와 마메타로,,남자,감독,[],"[{'movieCd': '20230315', 'movieNm': '가슴 큰 목욕탕 ..."


In [None]:
people_info.to_csv('people_info.csv', index = False)