# EDINETから「主な相手先別の販売実績」を抽出する

import libraries

In [55]:
!pip install edinet_xbrl



In [56]:
import requests
import pandas as pd
import zipfile
import os
import shutil
from glob import glob
from edinet_xbrl.edinet_xbrl_parser import EdinetXbrlParser
from typing import Dict, List
from datetime import datetime
from dateutil.relativedelta import relativedelta
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

In [71]:
END_POINT = 'https://disclosure.edinet-fsa.go.jp/api/v1'
submission_info_endpoint = f'{END_POINT}/documents.json'

# 最終的な返り値のdataframe
output_df = pd.DataFrame(columns=['相手先', '前連結_金額（百万円）', '前連結_割合（%）', '当連結_金額（百万円）', '当連結_割合（%）'])

# 開始日と終了日を設定
start_date = datetime(2022, 5, 1)
end_date = datetime(2022, 5, 31)

# 1日ごとにデータを取得
current_date = start_date
while current_date <= end_date:
    submission_request_parameters = {
        'date': current_date.strftime('%Y-%m-%d'),
        'type': 2
    }
    submission_info_response = requests.get(submission_info_endpoint, params=submission_request_parameters)
    submission_info_json = submission_info_response.json()

    
    # 取得したデータを処理するコードをここに追加

    raw_submission_info_df = pd.DataFrame(submission_info_json['results'])
    # raw_submission_info_df.columns
    
    if any(col not in raw_submission_info_df.columns 
           for col in ['docID', 'edinetCode', 'secCode', 'filerName', 'docDescription']):
        print(f'{current_date}: 有価証券報告書の提出情報がありません。')
        current_date += timedelta(days=1)
        continue

    # 重要なカラムに絞る
    submission_info_df = raw_submission_info_df[['docID', 'edinetCode', 'secCode', 'filerName', 'docDescription']]
    # submission_info_df.head()
    
    # 有価証券報告書の情報を抽出する。
    securities_report_infos = []
    for i, row in submission_info_df.iterrows():
        doc_desc = row['docDescription']
        doc_seccode = row['secCode']
        
        if doc_desc is None:
            continue
        
        if doc_seccode is None:
            continue
        
        if ('有価証券報告書' in doc_desc) and ('受益証券' not in doc_desc) and ('訂正' not in doc_desc) and ('外国投資証券' not in doc_desc):
            row_to_dataframe = pd.DataFrame([row])
            securities_report_infos.append(row_to_dataframe)

    if len(securities_report_infos) == 0:
        print(f'{current_date}: 有価証券報告書の提出情報がありません。')
        current_date += timedelta(days=1)
        continue
    else:
        print(f'{current_date}: {len(securities_report_infos)} 件の有価証券報告書が抽出されました。')
        securities_report_info_df = pd.concat(securities_report_infos)

    for docID in securities_report_info_df['docID']:
        
        label = securities_report_info_df[securities_report_info_df['docID'] == docID]
        document_endpoint = f'{END_POINT}/documents/{docID}'
        document_request_parameters = {
            'type': 1
        }
        document_response = requests.get(document_endpoint, document_request_parameters)
        
        # まず、返ってきたデータを zip 形式で保存する。
        zip_file_full_path = f'D:/EDINET_DATA/{docID}.zip'
        with open(zip_file_full_path, 'wb') as f:
            for chunk in document_response.iter_content(chunk_size=1024):
                f.write(chunk)
        
        # zip ファイルを解凍する
        output_dir = f'D:/EDINET_DATA/{docID}'
        os.makedirs(output_dir, exist_ok=True)
        with zipfile.ZipFile(zip_file_full_path) as zip_f:
            zip_f.extractall(output_dir)
        
        # xbrl ファイルを発見する
        # PublicDoc 内に格納されている xbrl ファイルが分析対象となるファイルである。
        xbrl_expression = f'D:/EDINET_DATA/{docID}/**/PublicDoc/**/*.xbrl'
        xbrl_paths = glob(xbrl_expression, recursive=True)
        
        # print(xbrl_paths)
        
        parser = EdinetXbrlParser()
        # Step2で特定した XBRL ファイルのパスを選択
        if xbrl_paths == []:
            continue
        else:
            xbrl_path = xbrl_paths[0]
        parsed_xbrl = parser.parse_file(xbrl_path)
        
        
        # 経営者による財政状態、経営成績及びキャッシュ・フローの状況の分析 [テキストブロック]の取得
        key = 'jpcrp_cor:ManagementAnalysisOfFinancialPositionOperatingResultsAndCashFlowsTextBlock'
        context_ref = 'FilingDateInstant'
        extracted_data = parsed_xbrl.get_data_by_context_ref(key, context_ref)
        
        if extracted_data is None:
            continue
        else:
            ManagementAnalysis = extracted_data.get_value()
    
        def extract_paragraph_and_following_table(html_code, target_text):
            # BeautifulSoupを使ってパース
            soup = BeautifulSoup(html_code, 'html.parser')

            # 特定の文字列を含むパラグラフを抽出
            target_paragraphs = [p for p in soup.find_all('p') if target_text in p.text]

            # 対応する表を抽出
            tables = []
            for paragraph in target_paragraphs:
                table = paragraph.find_next('table')
                if table:
                    tables.append(table)

            return target_paragraphs, tables
    
    
        def extract_table_content(table):
            # テーブルの行を取得
            rows = table.find_all('tr')

            # 各行のデータを取得
            table_data = []
            for row in rows:
                cols = row.find_all(['th', 'td'])
                cols = [col.text.strip() for col in cols]
                table_data.append(cols)

            return table_data

        # 使用例
        html_code = ManagementAnalysis

        # 例として特定の文字列を含むパラグラフとその次に続く表を抽出する関数を呼び出す
        target_text = "主な相手先別"
        paragraphs, tables = extract_paragraph_and_following_table(html_code, target_text)
        
        for p in paragraphs:
            if '主な相手先別' in p.get_text():
                supplier_text = p.get_text()
        
        if '省略' in supplier_text:
            # table_content = [[], [], ['-', '-', '-', '-', '-']]
            table_content = []
        else:
            # 抽出したテーブルの内容を表示
            if tables:
                for table in tables:
                    table_content = extract_table_content(table)
            else:
                # table_content = [[], [], ['-', '-', '-', '-', '-']]
                table_content = []
        
        # テーブルの内容をデータフレームに変換
        additional_columns = ['相手先', '前連結_金額（百万円）', '前連結_割合（%）', '当連結_金額（百万円）', '当連結_割合（%）']

        result = [item for item in table_content if len(item) == 5]
        
        # データフレームを結合
        for element in result:
            label[additional_columns] = element
            output_df = pd.concat([label, output_df], axis=0)

        # display(output_df)
        
        # 使用が終わったファイルを削除する
        shutil.rmtree(output_dir)  # ディレクトリごと削除
        os.remove(zip_file_full_path)  # zipファイルを削除

    current_date += timedelta(days=1)

2022-05-02 00:00:00: 有価証券報告書の提出情報がありません。
2022-05-06 00:00:00: 有価証券報告書の提出情報がありません。
2022-05-09 00:00:00: 有価証券報告書の提出情報がありません。
2022-05-10 00:00:00: 有価証券報告書の提出情報がありません。
2022-05-11 00:00:00: 1 件の有価証券報告書が抽出されました。




2022-05-12 00:00:00: 2 件の有価証券報告書が抽出されました。




2022-05-13 00:00:00: 1 件の有価証券報告書が抽出されました。




2022-05-16 00:00:00: 2 件の有価証券報告書が抽出されました。




2022-05-17 00:00:00: 2 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-18 00:00:00: 2 件の有価証券報告書が抽出されました。




2022-05-19 00:00:00: 4 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-20 00:00:00: 5 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-23 00:00:00: 5 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-24 00:00:00: 6 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-25 00:00:00: 21 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-26 00:00:00: 42 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-27 00:00:00: 74 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-30 00:00:00: 44 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

2022-05-31 00:00:00: 13 件の有価証券報告書が抽出されました。


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  label[additional_columns] = element
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col

EDINET API より財務情報の一覧を取得

In [72]:
output_df.head(-5)

Unnamed: 0,docID,edinetCode,secCode,filerName,docDescription,相手先,前連結_金額（百万円）,前連結_割合（%）,当連結_金額（百万円）,当連結_割合（%）
413,S100O5VV,E32736,35500,株式会社スタジオアタオ,有価証券報告書－第18期(令和3年3月1日－令和4年2月28日),㈱デジサーチアンドアドバタイジング,2113905,52.8,1865491,51.6
351,S100O5SZ,E35278,44900,株式会社ビザスク,有価証券報告書－第10期(令和3年3月1日－令和4年2月28日),株式会社ボストン・コンサルティング・グループ,183729,11.4,403713,10.9
304,S100O54H,E03054,80050,株式会社スクロール,有価証券報告書－第81期(令和3年4月1日－令和4年3月31日),コープデリ生活協同組合連合会,11323,13.3,10888,13.4
304,S100O54H,E03054,80050,株式会社スクロール,有価証券報告書－第81期(令和3年4月1日－令和4年3月31日),,金額（百万円）,割合（％）,金額（百万円）,割合（％）
263,S100O5IH,E31694,61730,株式会社アクアライン,有価証券報告書－第27期(令和3年3月1日－令和4年2月28日),ＲＯＹ株式会社,5942,0.1,1537823,29.2
...,...,...,...,...,...,...,...,...,...,...
97,S100O3KM,E35288,70770,株式会社ＡＬｉＮＫインターネット,有価証券報告書－第9期(令和3年3月1日－令和4年2月28日),Outbrain Japan株式会社,72768,11.9,94482,14.5
97,S100O3KM,E35288,70770,株式会社ＡＬｉＮＫインターネット,有価証券報告書－第9期(令和3年3月1日－令和4年2月28日),グーグル合同会社,239958,39.3,261045,40.2
67,S100O3AO,E01165,52710,株式会社トーヨーアサノ,有価証券報告書－第79期(令和3年3月1日－令和4年2月28日),ＪＦＥ建材㈱,－,－,1920460,10.8
149,S100O3OR,E05328,23540,株式会社ＹＥ　ＤＩＧＩＴＡＬ,有価証券報告書－第45期(令和3年3月1日－令和4年2月28日),富士通株式会社,1417114,9.8,1769434,12.9
