In [1]:
import requests
import pandas as pd
import json
import numpy as np
from datetime import datetime, date
import csv
import time
import os
from io import StringIO
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
time_pre_data = [2024, 4, 17]
time_data = [2024, 4, 18]

In [3]:
def monthly_report(year, month):
    
    year -= 1911
    
    url = 'https://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'_0.html'
    # if year <= 98:
    #     url = 'https://mops.twse.com.tw/nas/t21/sii/t21sc03_'+str(year)+'_'+str(month)+'.html'
    
    # 偽瀏覽器
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    
    # 下載該年月的網站，並用pandas轉換成 dataframe
    r = requests.get(url, headers=headers)
    r.encoding = 'big5'

    dfs = pd.read_html(StringIO(r.text), encoding='big-5')

    df = pd.concat([df for df in dfs if df.shape[1] <= 11 and df.shape[1] > 5])
    
    if 'levels' in dir(df.columns):
        df.columns = df.columns.get_level_values(1)
    # # else:
    # #     df = df[list(range(0,10))]
    # #     column_index = df.index[(df[0] == '公司代號')][0]
    # #     df.columns = df.iloc[column_index]
    
    # df.columns = ['ID', 'name', 'note', 'mom', 'revenue_last_month', 'yoy', 'revenue_last_year', 'revenue', 'cumulative_yoy', 'year_cumulative_revenue', 'month_cumulative_revenue']
    
    # df['revenue'] = pd.to_numeric(df['revenue'], 'coerce')
    # # df = df[~df['當月營收'].isnull()]
    df.rename(columns = {'公司代號':'ID'}, inplace = True)
    df.rename(columns = {'上月比較增減(%)':'mom'}, inplace = True)
    df.rename(columns = {'去年同月增減(%)':'yoy'}, inplace = True)
    
    df = df[df['ID'] != '合計']
    df = df.reset_index()
    # # 偽停頓
    time.sleep(0.15)

    return df

def get_stock_revenue_info(stock, year, month):
    df = monthly_report(year, month)
    data = df.loc[df['ID'] == stock]
    if len(data) == 0:
        return None
    mom = data.iloc[0]['mom']
    yoy = data.iloc[0]['yoy']
    return [mom, yoy]
# revenue_info = get_stock_revenue_info('1101', 2024, 3)

In [4]:
stock_pool_path = r"D:\Stock\pool\stock_pool_%s%s%s.xlsx" % (time_pre_data[0], str(time_pre_data[1]).zfill(2), str(time_pre_data[2]).zfill(2))
# stock_pool_list = []
# with open(stock_pool_path, newline='') as csvfile:
#     rows = csv.reader(csvfile)
#     row_idx = 0
#     for row in rows:
#         if row_idx == 0:
#             title = row
#         else:
#             stock_pool_list.append(row)
#         row_idx = row_idx + 1
# df_pool = pd.DataFrame(stock_pool_list)
df_pool = pd.read_excel(stock_pool_path, converters={'推薦日期':str, '代號':str})
df_pool.drop(columns=df_pool.columns[0], axis=1, inplace=True)

In [5]:
target_csv_path = r"D:\Stock\daily_result\%s%s%s_trend.csv" % (time_data[0], str(time_data[1]).zfill(2), str(time_data[2]).zfill(2))
daily_date = None
daily_data_list = []
with open(target_csv_path, newline='') as csvfile:
    rows = csv.reader(csvfile)
    row_idx = 0
    for row in rows:
        if row_idx == 0:
            daily_date = row
        elif row_idx == 1:
            title = row
        else:
            daily_data_list.append(row)
        row_idx = row_idx + 1
df_daily = pd.DataFrame(daily_data_list)

In [6]:
profit_loss_path = r"D:\Stock\analyze_result\%s%s%s.xlsx" % (time_data[0], str(time_data[1]).zfill(2), str(time_data[2]).zfill(2))
df_profit_loss = pd.read_excel(profit_loss_path, converters={'推薦日期':str, '代號':str})
df_stock_out = df_profit_loss.loc[df_profit_loss['停損停利'] != 0]
df_stock_out.drop(columns=df_stock_out.columns[0], axis=1, inplace=True)

In [7]:
new_pool_list = []
for i in range(len(df_daily)):
    daily_date_str = daily_date[0] + daily_date[1].zfill(2) + daily_date[2].zfill(2)
    id = df_daily.iloc[i, 0]
    name = df_daily.iloc[i, 1]
    pe = df_daily.iloc[i, 2]
    stock_yield = df_daily.iloc[i, 3]
    pb = df_daily.iloc[i, 4]
    price_suggest = df_daily.iloc[i, 5]
    isOTC = df_daily.iloc[i, 6]
    if not any(df_pool['代號'] == id):
        new_pool_list.append([daily_date_str, id, name, pe, stock_yield, pb, price_suggest, isOTC])

In [8]:
df_new = pd.DataFrame()
for idx in df_pool.index:
    id = df_pool['代號'][idx]
    if not any(df_stock_out['代號'] == id):
        df_new = df_new.append(df_pool.loc[idx], ignore_index=True)
df_new_add = pd.DataFrame(new_pool_list)
df_new_add.columns = ['推薦日期', '代號', '名稱', '本益比', '殖利率', '淨值比', '推薦股價', '是否上櫃']
df_new = df_new.append(df_new_add)
df_new = df_new.reset_index()
df_new.drop(columns=df_new.columns[0], axis=1, inplace=True)
df_new['本益比'] = df_new['本益比'].replace('nan', 999.9).replace('N/A', 999.9)
df_new['殖利率'] = df_new['殖利率'].replace('nan', 0).replace('N/A', 0)
df_new['淨值比'] = df_new['淨值比'].replace('nan', 999.9).replace('N/A', 999.9)

In [9]:
df_new = df_new.astype({'推薦日期': 'str', '代號': 'str', '本益比': 'float', '殖利率': 'float', '淨值比': 'float', '推薦股價': 'float', '是否上櫃': 'int32'})

In [10]:
df_new.columns = ['推薦日期', '代號', '名稱', '本益比', '殖利率', '淨值比', '推薦股價', '是否上櫃', '是否買進', '技術面', '基本面', '營收']
output_file_path = r"D:\Stock\pool\stock_pool_%s%s%s.xlsx" % (time_data[0], str(time_data[1]).zfill(2), str(time_data[2]).zfill(2))
if os.path.exists(output_file_path):
    print("File exist!!!")
else:
    df_new.to_excel(output_file_path, encoding='utf_8_sig')

In [11]:
# stock_pool_path = r"D:\Stock\pool\stock_pool_test.csv"
# with open(stock_pool_path, 'a', newline='') as csvfile:
#     writer = csv.writer(csvfile)
    
#     writer.writerows(new_pool_list)