In [1]:
import sys

sys.path.append(r'../util')

In [None]:
import kis_auth as ka
import kis_domstk as kb

import os
import time
import logging
import pandas as pd
from datetime import datetime, timedelta
from tqdm import tqdm

import duckdb


ka.auth()

destdir = os.path.join(os.path.expanduser('~'),'data', 'news')  
os.makedirs(destdir, exist_ok=True)

db_path = os.path.join(destdir, 'news_database.duckdb')
table_name = 'news_titles'

log_file_path = os.path.join(destdir, 'news_processing.log')

# Ensure 'cntt_usiq_srno' is NOT in dropcols
dropcols = ['iscd2', 'iscd3', 'iscd4', 'iscd5', 'iscd6',
            'iscd7', 'iscd8', 'iscd9', 'iscd10',
            'kor_isnm2', 'kor_isnm3', 'kor_isnm4',
            'kor_isnm5', 'kor_isnm6', 'kor_isnm7',
            'kor_isnm8', 'kor_isnm9', 'kor_isnm10'] # Example list
con = None
pbar = None # Initialize progress bar variable

try:
    con = duckdb.connect(database=db_path, read_only=False)
    logging.info(f"Connected to DuckDB database: {db_path}")

    create_table_sql = f"""
    CREATE TABLE IF NOT EXISTS {table_name} (
        "cntt_usiq_srno"        VARCHAR NOT NULL UNIQUE,
        "news_ofer_entp_code"   VARCHAR,
        "data_dt"               VARCHAR(8),
        "data_tm"               VARCHAR(6),
        "hts_pbnt_titl_cntt"    VARCHAR,
        "news_lrdv_code"        VARCHAR,
        "dorg"                  VARCHAR,
        "iscd1"                 VARCHAR,
        "kor_isnm1"             VARCHAR
    );
    """
    try:
        con.sql(create_table_sql)
        logging.info(f"Ensured table '{table_name}' exists with the correct schema and unique constraint.")
    except Exception as e:
        logging.error(f"FATAL: Failed to create or verify table '{table_name}': {e}")
        raise 

    total_rows_processed_today = 0

    while True: # Runs indefinitely until interrupted
        try:
            yyyymmdd_api = datetime.now().strftime('%Y%m%d')
            hhmmss_api = datetime.now().strftime("%H%M%S").rjust(10, "0")

            try:
                news_data = kb.get_news_titles(date_1=yyyymmdd_api, hour_1=hhmmss_api)
            except Exception as fetch_err:
                 logging.error(f"Error fetching data for {yyyymmdd_api} at API time {hhmmss_api}: {fetch_err}")
                 time.sleep(10) 
            if news_data and len(news_data) > 0:
                try:
                    news_chunk_df = pd.DataFrame(news_data)
                    rows_in_chunk = len(news_chunk_df)

                    cols_to_drop_actual = [col for col in dropcols if col in news_chunk_df.columns]
                    if len(cols_to_drop_actual) > 0:
                        news_chunk_df.drop(cols_to_drop_actual, axis=1, inplace=True)

                    if 'cntt_usiq_srno' not in news_chunk_df.columns:
                         logging.warning(f"Unique key 'cntt_usiq_srno' not found in chunk for {yyyymmdd_api} at {hhmmss_api[-6:]}. Skipping insert.")
                    else:
                         con.sql(f"INSERT OR IGNORE INTO {table_name} SELECT * FROM news_chunk_df")
                         total_rows_processed_today += rows_in_chunk
                         logging.debug(f"Processed {rows_in_chunk} rows (inserted new) for {yyyymmdd_api} at {hhmmss_api[-6:]} into DB table '{table_name}'.")

                except Exception as proc_err:
                    logging.error(f"Error processing or inserting chunk for {yyyymmdd_api} at {hhmmss_api[-6:]}: {proc_err}")

            time.sleep(5)

        except KeyboardInterrupt:
            logging.info("KeyboardInterrupt received. Shutting down gracefully...")
            break # Exit the main while loop

        except Exception as loop_err:
            # Catch any other unexpected errors in the main loop
            logging.error(f"An unexpected error occurred in the main processing loop: {loop_err}", exc_info=True)
            logging.info("Pausing for 60 seconds before potentially retrying...")
            time.sleep(60) # Pause to prevent rapid error loops

except Exception as setup_err:
    logging.error(f"A critical error occurred during setup: {setup_err}", exc_info=True)

finally:
    if con:
        con.close()
        logging.info("Closed DuckDB connection.")
        
logging.info("News processing script finished.")
print("Processing stopped.")
print(f"Check log file: {log_file_path}")
print(f"Data stored in DuckDB file: {db_path}, table: {table_name}")