# Library

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings(action='ignore')

# 한글 폰트 인식 - Windows
import matplotlib 
font_name = matplotlib.font_manager.FontProperties(fname="c:/Windows/Fonts/malgun.ttf").get_name()
matplotlib.rc('font', family=font_name)

# 마이너스 부호 인식
matplotlib.rc("axes", unicode_minus = False)

# [오픈API를 통한 데이터 수집]

## 금융통계손해보험정보

https://www.data.go.kr/data/15061307/openapi.do

### (1) 필요 라이브러리 불러오기

In [2]:
# pip install bs4

import requests
from bs4 import BeautifulSoup

### (2) open_api 요청 테스트

In [3]:
# 본인의 ServiceKey 입력

# ServiceKey = 'ZN0B7TMQ6ADGaVcUiE2QnB9GdE9e8FWZ%2Bj8tQ%2FRJ8k6XNjjT6lyLzF6W0056eoZyhdORyM3dfXcWZEMAtpODDQ%3D%3D'
ServiceKey = 'g0OhTQflzDelAVixM%2Bm0EtPvJvzcv1ZYwO%2Bj9b%2Fi4bmToDzAyifU%2FHxjCAhbsRMcozdUhj7E9i%2Bc5S7l3JzP9w%3D%3D'

In [4]:
# OPEN API 활용 가이드 > 요청메시지 명세 참고

# 손해보험주요영업활동조회
url = "http://apis.data.go.kr/1160100/service/GetNonlInsuCompInfoService/getNonlInsuCompMajoBusiActi?"

api_url = url + "serviceKey="+ ServiceKey 

In [5]:
api_url

'http://apis.data.go.kr/1160100/service/GetNonlInsuCompInfoService/getNonlInsuCompMajoBusiActi?serviceKey=g0OhTQflzDelAVixM%2Bm0EtPvJvzcv1ZYwO%2Bj9b%2Fi4bmToDzAyifU%2FHxjCAhbsRMcozdUhj7E9i%2Bc5S7l3JzP9w%3D%3D'

### (3) 가져온 XML 데이터 파싱

In [6]:
req = requests.get(api_url)
xml = req.text

In [7]:
xml

'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n<response>\n    <header>\n        <resultCode>00</resultCode>\n        <resultMsg>NORMAL SERVICE.</resultMsg>\n        <pageNo>1</pageNo>\n        <numOfRows>10</numOfRows>\n    </header>\n    <body>\n        <table>\n            <title>손보_주요영업활동_보험종류별 경과손해율</title>\n            <totalCount>20940</totalCount>\n            <items>\n                <item>\n                    <basYm>201709</basYm>\n                    <crno>1101110013328</crno>\n                    <fncoCd>0010626</fncoCd>\n                    <fncoNm>메리츠화재해상보험주식회사</fncoNm>\n                    <isuKindElpsLosRatClsfAmt>76.97</isuKindElpsLosRatClsfAmt>\n                    <isuKindElpsLosRatDcd>A</isuKindElpsLosRatDcd>\n                    <isuKindElpsLosRatDcdNm>경과손해율_자동차</isuKindElpsLosRatDcdNm>\n                </item>\n                <item>\n                    <basYm>202109</basYm>\n                    <crno>1101110013328</crno>\n                    <fncoCd>

In [8]:
soup = BeautifulSoup(xml, 'html.parser')

In [9]:
soup

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<response>
<header>
<resultcode>00</resultcode>
<resultmsg>NORMAL SERVICE.</resultmsg>
<pageno>1</pageno>
<numofrows>10</numofrows>
</header>
<body>
<table>
<title>손보_주요영업활동_보험종류별 경과손해율</title>
<totalcount>20940</totalcount>
<items>
<item>
<basym>201709</basym>
<crno>1101110013328</crno>
<fncocd>0010626</fncocd>
<fnconm>메리츠화재해상보험주식회사</fnconm>
<isukindelpslosratclsfamt>76.97</isukindelpslosratclsfamt>
<isukindelpslosratdcd>A</isukindelpslosratdcd>
<isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>
</item>
<item>
<basym>202109</basym>
<crno>1101110013328</crno>
<fncocd>0010626</fncocd>
<fnconm>메리츠화재해상보험주식회사</fnconm>
<isukindelpslosratclsfamt>75.77</isukindelpslosratclsfamt>
<isukindelpslosratdcd>A</isukindelpslosratdcd>
<isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>
</item>
<item>
<basym>201609</basym>
<crno>1101110013328</crno>
<fncocd>0010626</fncocd>
<fnconm>메리츠화재해상보험주식회사</fnconm>
<isukindelpslosratclsfamt>83.

In [10]:
# 전체 결과수 
totalCount = soup.select('totalCount')
totalCount

[<totalcount>20940</totalcount>]

In [11]:
# 한 페이지 결과수 
numOfRows = soup.select('numOfRows')
numOfRows

[<numofrows>10</numofrows>]

In [12]:
# 기준년월
basYm = soup.select('basYm')
basYm

[<basym>201709</basym>,
 <basym>202109</basym>,
 <basym>201609</basym>,
 <basym>201403</basym>,
 <basym>202203</basym>,
 <basym>201503</basym>,
 <basym>201806</basym>,
 <basym>201106</basym>,
 <basym>201809</basym>,
 <basym>201203</basym>]

In [13]:
# 금융회사코드
fncoCd = soup.select('fncoCd')
fncoCd

[<fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>,
 <fncocd>0010626</fncocd>]

In [14]:
# 금융회사명
fncoNm = soup.select('fncoNm')
fncoNm

[<fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>,
 <fnconm>메리츠화재해상보험주식회사</fnconm>]

In [15]:
# 보험종류경과손해율구분금액
ClsfAmt = soup.select('isuKindElpsLosRatClsfAmt')
ClsfAmt

[<isukindelpslosratclsfamt>76.97</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>75.77</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>83.09</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>91.3</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>73.07</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>90.89</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>77.41</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>81.78</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>79.69</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>83.94</isukindelpslosratclsfamt>]

In [16]:
# 보험종류경과손해율구분코드
RatDcd = soup.select('isuKindElpsLosRatDcd')
RatDcd

[<isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>,
 <isukindelpslosratdcd>A</isukindelpslosratdcd>]

In [17]:
# 보험종류경과손해율구분코드명
RatDcdNm = soup.select('isuKindElpsLosRatDcdNm')
RatDcdNm

[<isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>,
 <isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>]

In [18]:
ClsfAmt

[<isukindelpslosratclsfamt>76.97</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>75.77</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>83.09</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>91.3</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>73.07</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>90.89</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>77.41</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>81.78</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>79.69</isukindelpslosratclsfamt>,
 <isukindelpslosratclsfamt>83.94</isukindelpslosratclsfamt>]

In [19]:
ClsfAmt[0]

<isukindelpslosratclsfamt>76.97</isukindelpslosratclsfamt>

In [20]:
ClsfAmt[0].text

'76.97'

In [21]:
for n in ClsfAmt :
    print(n.text)

76.97
75.77
83.09
91.3
73.07
90.89
77.41
81.78
79.69
83.94


In [22]:
ClsfAmt_result = []

for n in ClsfAmt :
    ClsfAmt_result.append(n.text)
    
ClsfAmt_result

['76.97',
 '75.77',
 '83.09',
 '91.3',
 '73.07',
 '90.89',
 '77.41',
 '81.78',
 '79.69',
 '83.94']

### 데이터 정리하기

In [23]:
# 한 페이지 아이템 목록
items = soup.select('item')
len(items)

10

In [24]:
items[0]

<item>
<basym>201709</basym>
<crno>1101110013328</crno>
<fncocd>0010626</fncocd>
<fnconm>메리츠화재해상보험주식회사</fnconm>
<isukindelpslosratclsfamt>76.97</isukindelpslosratclsfamt>
<isukindelpslosratdcd>A</isukindelpslosratdcd>
<isukindelpslosratdcdnm>경과손해율_자동차</isukindelpslosratdcdnm>
</item>

In [25]:
items[0].select('fnconm')[0].text

'메리츠화재해상보험주식회사'

In [26]:
basYm = []
crno = []
fncoCd = []
fncoNm = []
ClsfAmt = []
RatDcd = []
RatDcdNm = []

for item in items:
    basYm.append(item.select('basym')[0].text)
    crno.append(item.select('crno')[0].text)    
    fncoCd.append(item.select('fncocd')[0].text)
    fncoNm.append(item.select('fnconm')[0].text)
    ClsfAmt.append(item.select('isukindelpslosratclsfamt')[0].text)
    RatDcd.append(item.select('isukindelpslosratdcd')[0].text)
    RatDcdNm.append(item.select('isukindelpslosratdcdnm')[0].text)

In [27]:
len(ClsfAmt)

10

In [28]:
ClsfAmt

['76.97',
 '75.77',
 '83.09',
 '91.3',
 '73.07',
 '90.89',
 '77.41',
 '81.78',
 '79.69',
 '83.94']

### 반복문을 활용해 한번에 API 요청 - 페이지별로 호출하여 결합
총 20160개 아이템을 페이지당 100개씩 호출하면, 총 202페이지 필요 

In [29]:
int(totalCount[0].text) // 100 + 1

210

In [30]:
"http://apis.data.go.kr/1160100/service/GetNonlInsuCompInfoService/getNonlInsuCompMajoBusiActi?serviceKey=" + ServiceKey + "&numOfRows=100&pageNo=" + str(1)

'http://apis.data.go.kr/1160100/service/GetNonlInsuCompInfoService/getNonlInsuCompMajoBusiActi?serviceKey=g0OhTQflzDelAVixM%2Bm0EtPvJvzcv1ZYwO%2Bj9b%2Fi4bmToDzAyifU%2FHxjCAhbsRMcozdUhj7E9i%2Bc5S7l3JzP9w%3D%3D&numOfRows=100&pageNo=1'

In [31]:
basYm = []
crno = []
fncoCd = []
fncoNm = []
ClsfAmt = []
RatDcd = []
RatDcdNm = []

# 손해보험주요영업활동조회
url = "http://apis.data.go.kr/1160100/service/GetNonlInsuCompInfoService/getNonlInsuCompMajoBusiActi?"

api_url = url + "serviceKey="+ ServiceKey + "&numOfRows="+ str(100)

for page in range(1, 203, 1):
    if page % 10 == 0:
        print(page)
    
    page_url = api_url + "&pageNo=" + str(page)
    
    req = requests.get(page_url)
    xml = req.text
    soup = BeautifulSoup(xml, 'html.parser')   
    
    # 한 페이지 아이템 목록
    items = soup.select('item')
    
    for item in items:
        basYm.append(item.select('basym')[0].text)
        crno.append(item.select('crno')[0].text)    
        fncoCd.append(item.select('fncocd')[0].text)
        fncoNm.append(item.select('fnconm')[0].text)
        ClsfAmt.append(item.select('isukindelpslosratclsfamt')[0].text)
        RatDcd.append(item.select('isukindelpslosratdcd')[0].text)
        RatDcdNm.append(item.select('isukindelpslosratdcdnm')[0].text)           

10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200


In [32]:
len(fncoNm)

20200

In [33]:
result = {
    'basYm': basYm, 
    'crno': crno, 
    'fncoCd': fncoCd, 
    'fncoNm': fncoNm, 
    'ClsfAmt': ClsfAmt, 
    'RatDcd': RatDcd, 
    'RatDcdNm': RatDcdNm,     
}

data = pd.DataFrame(result)
data.head()

Unnamed: 0,basYm,crno,fncoCd,fncoNm,ClsfAmt,RatDcd,RatDcdNm
0,201709,1101110013328,10626,메리츠화재해상보험주식회사,76.97,A,경과손해율_자동차
1,202109,1101110013328,10626,메리츠화재해상보험주식회사,75.77,A,경과손해율_자동차
2,201609,1101110013328,10626,메리츠화재해상보험주식회사,83.09,A,경과손해율_자동차
3,201403,1101110013328,10626,메리츠화재해상보험주식회사,91.3,A,경과손해율_자동차
4,202203,1101110013328,10626,메리츠화재해상보험주식회사,73.07,A,경과손해율_자동차


In [34]:
data.tail()

Unnamed: 0,basYm,crno,fncoCd,fncoNm,ClsfAmt,RatDcd,RatDcdNm
20195,201912,1101810046041,11952,하노버재보험(주) 한국지점,68.45,B,경과손해율_일반
20196,201912,1101810046041,11952,하노버재보험(주) 한국지점,78.17,C,경과손해율_장기
20197,201912,1101810046041,11952,하노버재보험(주) 한국지점,70.19,D,경과손해율_합계
20198,201912,1101810046041,11952,하노버재보험(주) 한국지점,0.0,E,순사업비율_자동차
20199,201912,1101810046041,11952,하노버재보험(주) 한국지점,26.04,F,순사업비율_일반


In [35]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20200 entries, 0 to 20199
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   basYm     20200 non-null  object
 1   crno      20200 non-null  object
 2   fncoCd    20200 non-null  object
 3   fncoNm    20200 non-null  object
 4   ClsfAmt   20200 non-null  object
 5   RatDcd    20200 non-null  object
 6   RatDcdNm  20200 non-null  object
dtypes: object(7)
memory usage: 1.1+ MB


In [36]:
# 숫자로 변환
data['ClsfAmt'] = data['ClsfAmt'].astype('float')

In [37]:
data['fncoNm'].str.contains("주식회사")

0         True
1         True
2         True
3         True
4         True
         ...  
20195    False
20196    False
20197    False
20198    False
20199    False
Name: fncoNm, Length: 20200, dtype: bool

In [38]:
data.loc[data['fncoNm'].str.contains("DB"), :]

Unnamed: 0,basYm,crno,fncoCd,fncoNm,ClsfAmt,RatDcd,RatDcdNm
259,201709,1101110095285,0010636,DB손해보험주식회사,79.05,A,경과손해율_자동차
260,202109,1101110095285,0010636,DB손해보험주식회사,77.93,A,경과손해율_자동차
261,201609,1101110095285,0010636,DB손해보험주식회사,80.64,A,경과손해율_자동차
262,201403,1101110095285,0010636,DB손해보험주식회사,85.64,A,경과손해율_자동차
263,202203,1101110095285,0010636,DB손해보험주식회사,77.20,A,경과손해율_자동차
...,...,...,...,...,...,...,...
19961,201903,1101110095285,0010636,DB손해보험주식회사,21.16,H,순사업비율_합계
19962,201903,1101110095285,0010636,DB손해보험주식회사,102.32,I,합산비율_자동차
19963,201903,1101110095285,0010636,DB손해보험주식회사,104.44,J,합산비율_일반
19964,201903,1101110095285,0010636,DB손해보험주식회사,106.39,K,합산비율_장기


In [39]:
# data.to_csv('파일이름.csv', index=False)

# [실습] 
"손해보험주요경영지표" 데이터를 수집하여 정리합니다. 