# Import Package

In [1]:
import requests
import pandas as pd
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# 抓取單一公司

In [2]:
import requests
import pandas as pd

# 爬取目標網站
BS = "https://mops.twse.com.tw/mops/web/ajax_t164sb03";  # Balance Sheet
PL = "https://mops.twse.com.tw/mops/web/ajax_t164sb04";  # Income Statement
CF = "https://mops.twse.com.tw/mops/web/ajax_t164sb05";  # Cash Flow Statement

def financial_report(year, season, stock_number, report):
    form_data = {
        'encodeURIComponent': 1,
        'step': 1,
        'firstin': 1,
        'off': 1,
        'co_id': stock_number,
        'year': year,
        'season': season,
    }

    r = requests.post(report, form_data)
    df = pd.read_html(r.text)[1].fillna("")

    # 得到欄位的數量
    num_columns = len(df.columns)

    # 使用 range 函數生成相應數量的數字列表，並賦值給 df.columns
    df.columns = list(range(1, num_columns + 1))

    # 新增 Year、Season、Company 欄位
    df['Year'] = year + 1911
    df['Season'] = season
    df['Company'] = stock_number

    # 重新排序欄位
    df = df[['Year', 'Season', 'Company', 1, 2, 3]]
    # 重新命名欄位
    df.columns = ['Year', 'Season', 'Company', 'Account_Code', 'Amount', 'Percentage']
    return df

## Balance Sheet

In [3]:
financial_report(102, 1, 1101, BS)

Unnamed: 0,Year,Season,Company,Account_Code,Amount,Percentage
0,2013,1,1101,流動資產,,
1,2013,1,1101,現金及約當現金,18994984.0,6.96
2,2013,1,1101,透過損益按公允價值衡量之金融資產－流動,271687.0,0.1
3,2013,1,1101,備供出售金融資產－流動淨額,15066125.0,5.52
4,2013,1,1101,應收票據淨額,8919088.0,3.27
5,2013,1,1101,應收帳款淨額,9489955.0,3.48
6,2013,1,1101,應收帳款－關係人淨額,236032.0,0.09
7,2013,1,1101,其他應收款－關係人淨額,1820536.0,0.67
8,2013,1,1101,存貨,10148150.0,3.72
9,2013,1,1101,預付款項,4568191.0,1.67


## Income Statement

In [4]:
financial_report(102, 1, 1101, PL)

Unnamed: 0,Year,Season,Company,Account_Code,Amount,Percentage
0,2013,1,1101,營業收入合計,24114047.0,100.0
1,2013,1,1101,營業成本合計,20990175.0,87.05
2,2013,1,1101,營業毛利（毛損）,3123872.0,12.95
3,2013,1,1101,營業毛利（毛損）淨額,3123872.0,12.95
4,2013,1,1101,營業費用,,
5,2013,1,1101,推銷費用,221917.0,0.92
6,2013,1,1101,管理費用,863958.0,3.58
7,2013,1,1101,研究發展費用,11268.0,0.05
8,2013,1,1101,營業費用合計,1097143.0,4.55
9,2013,1,1101,營業利益（損失）,2026729.0,8.4


## Cash Flow Statement

In [5]:
financial_report(102, 1, 1101, CF)

Unnamed: 0,Year,Season,Company,Account_Code,Amount,Percentage
0,2013,1,1101,營業活動之現金流量－間接法,,
1,2013,1,1101,繼續營業單位稅前淨利（淨損）,2340789.0,2579068.0
2,2013,1,1101,本期稅前淨利（淨損）,2340789.0,2579068.0
3,2013,1,1101,折舊費用,1571884.0,1489383.0
4,2013,1,1101,攤銷費用,88802.0,90539.0
...,...,...,...,...,...,...
62,2013,1,1101,匯率變動對現金及約當現金之影響,417270.0,41947.0
63,2013,1,1101,本期現金及約當現金增加（減少）數,-6616422.0,-3970292.0
64,2013,1,1101,期初現金及約當現金餘額,25611406.0,22140268.0
65,2013,1,1101,期末現金及約當現金餘額,18994984.0,18169976.0


# Company List

# 一次抓取多個年度、多間公司



In [6]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import pandas as pd

# 爬取目標網站
BS = "https://mops.twse.com.tw/mops/web/ajax_t164sb03"  # 資產負債表
PL = "https://mops.twse.com.tw/mops/web/ajax_t164sb04"  # 損益表
CF = "https://mops.twse.com.tw/mops/web/ajax_t164sb05"  # 現金流量表

# 新增一個具備重試機制的 Session
session = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

def fetch_financial_reports(years, quarters, company_list, report):
    result_df = pd.DataFrame()

    for year in years:
        for quarter in quarters:
            for stock_number in company_list:
                # 抓取資產負債表
                bs_report = financial_report(year, quarter, stock_number, report, session)
                result_df = result_df.append(bs_report, ignore_index=True)
    return result_df

def financial_report(year, season, stock_number, report, session):
    form_data = {
        'encodeURIComponent': 1,
        'step': 1,
        'firstin': 1,
        'off': 1,
        'co_id': stock_number,
        'year': year,
        'season': season,
    }

    # 使用 session.post 來發送請求
    r = session.post(report, form_data)

    # 檢查是否成功獲取頁面
    if r.status_code != 200:
        print(f"Failed to retrieve data for {year}-{season} and company {stock_number}. Status code: {r.status_code}")
        return pd.DataFrame()

    # 使用 try-except 塊檢查索引是否超出範圍
    try:
        # 嘗試讀取表格
        df = pd.read_html(r.text)[1].fillna("")

        # 得到欄位的數量
        num_columns = len(df.columns)

        # 使用 range 函數生成相應數量的數字列表，並賦值給 df.columns
        df.columns = list(range(1, num_columns + 1))

        # 新增 Year、Season、Company 欄位
        df['Year'] = year + 1911
        df['Season'] = season
        df['Company'] = stock_number

        # 重新排序欄位
        df = df[['Year', 'Season', 'Company', 1, 2, 3]]
        # 重新命名欄位
        df.columns = ['Year', 'Season', 'Company', 'Account_Code', 'Amount', 'Percentage']
        return df
    except (IndexError, ValueError):
        # 如果發生索引超出範圍的錯誤，顯示錯誤訊息並返回空的 DataFrame
        print(f"No table found for {year}-{season} and company {stock_number}")
        return pd.DataFrame()

In [7]:
# 定義民國年度和季度範圍以及公司清單
years = list(range(110, 112))
quarters = [1, 2, 3, 4]

company_list = [1101, 1102]
result_df = fetch_financial_reports(years, quarters, company_list, BS)

In [8]:
result_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1063 entries, 0 to 1062
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Year          1063 non-null   int64 
 1   Season        1063 non-null   int64 
 2   Company       1063 non-null   int64 
 3   Account_Code  1063 non-null   object
 4   Amount        1063 non-null   object
 5   Percentage    1063 non-null   object
dtypes: int64(3), object(3)
memory usage: 50.0+ KB


In [10]:
result_df.tail()

Unnamed: 0,Year,Season,Company,Account_Code,Amount,Percentage
1058,2022,4,1102,非控制權益,23416398.0,7.22
1059,2022,4,1102,權益總額,184233245.0,56.82
1060,2022,4,1102,負債及權益總計,324243127.0,100.0
1061,2022,4,1102,預收股款（權益項下）之約當發行股數（單位：股）,0.0,
1062,2022,4,1102,母公司暨子公司所持有之母公司庫藏股股數（單位：股）,0.0,


In [11]:
result_df['Year'].unique()

array([2021, 2022])

In [12]:
result_df['Company'].unique()

array([1101, 1102])