In [None]:
# !pip install --upgrade google-ai-generativelanguage
!pip install -q -U google-generativeai
!pip install ollama pydantic pdfplumber langchain

!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# 실행 환경
from google.colab import drive
from google.colab import userdata
from dotenv import load_dotenv

# 모델
from google import genai
import ollama
from ollama import chat

# RAG
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pydantic import BaseModel
import pdfplumber

import numpy as np
import pandas as pd

from datetime import datetime
import os, subprocess, time, json, re, shutil
import warnings
from typing import Callable

# 병렬처리
from concurrent.futures import ThreadPoolExecutor, as_completed#, ProcessPoolExecutor

코랩 환경 여부

In [None]:
def is_colab():
    try:
        import google.colab  # 모듈이 있다면 Colab 환경
        return True
    except ImportError:
        return False

IS_COLAB_ENV = is_colab()

# 데이터 전처리

## PDF 임포트

In [None]:
drive.mount('/content/drive', force_remount=False)

In [None]:
pdf_path = '/content/drive/My Drive/이어드림/pdfs'
pdf_finished_path = '/content/drive/My Drive/이어드림/pdf_finished'

if os.path.exists(pdf_path):
    print(f"디렉터리 '{pdf_path}'가 존재합니다.")
    # print("디렉터리 내용:")
    # for item in os.listdir(pdf_path):
    #     print(item)
else:
    print(f"디렉터리 '{pdf_path}'가 존재하지 않습니다.")

In [None]:
# 테스트 5개만
# 실제 실행 시는 이거 주석처리
# pdf_path = '/content/drive/My Drive/이어드림/pdfs_test'

In [None]:
def get_report_pdf_files(directory_path, is_test:bool=False, test_num:int=5, verbose:bool=False) -> list:
    all_items = os.listdir(directory_path)

    # PDF 필터링
    pdf_files = [item for item in all_items if item.endswith('.pdf')]

    if is_test:
        pdf_files = pdf_files[:test_num]

    if verbose:
      print("Selected PDF files for processing:")
      for file in pdf_files:
          print(file)

    return pdf_files


# selected_pdf_files = get_report_pdf_files(pdf_path, is_test=True, test_num=5)
# selected_pdf_files

In [None]:
system_prompt = """
You are a highly skilled information extraction bot.
Your task is to extract specific information from the provided securities report PDF file.
Extract the following details and return them in JSON format:

- 종목명 (Stock Name)
- 종목코드 (티커) (Stock Code/Ticker)
- 작성일 (Date of Report)
- 현재 주가 (Current Stock Price - only numeric value)
- 목표 주가 (Target Stock Price - only numeric value)
- 투자 의견 (Investment Opinion - only in "Buy", "Hold" or "Sell")
- 작성 애널리스트 (Author Analyst)
- 소속 증권사 (Affiliated Securities Firm)

If a piece of information is not found, use 'N/A' for string values and 0 for numeric values.

Return only the JSON object. Do not include any other text.

Example JSON format:
{{
  "종목명": "Example Stock",
  "종목코드": "000000",
  "작성일": "YYYY-MM-DD",
  "현재 주가": 10000,
  "목표 주가": 12000,
  "투자 의견": "BUY",
  "작성 애널리스트": "Analyst Name",
  "소속 증권사": "Securities Firm Name"
}}
"""

## gemini api

In [None]:
def ask_gemini(directory_path:str, file_name:str, prompt:str=system_prompt, api_key:str=None, return_dict:bool=True, sleep:int=5) -> str:
    if api_key is None:
        if IS_COLAB_ENV:
            api_key = userdata.get('GOOGLE_API_KEY') # Load the API key from Colab secrets
        else:
            load_dotenv()
            api_key = os.getenv('GOOGLE_API_KEY')

    client = genai.Client(api_key=api_key)

    if file_name is not None:
        file_path = os.path.join(directory_path, file_name)

        try:
            # Upload the file using the genai client
            sample_file = client.files.upload(file=file_path)

            # Generate content using the uploaded file and the prompt
            response = client.models.generate_content(model="gemini-2.5-flash",
                                                      contents=[sample_file, prompt])

            result = response.text.replace("```json", "").replace("```", "")

            if sleep > 0:
                time.sleep(sleep)

            if return_dict:
                return json.loads(result)
            else:
                return result

        except Exception as e:
            print(f"An error occurred: {e}")
    else:
        print("No PDF files were selected for processing. Please run the previous cell.")

테스트

In [None]:
# for pdf_file in selected_pdf_files:
#     print("Extracted Information (JSON) =>", pdf_file)
#     print(ask_gemini(pdf_path, pdf_file))

## ollama 로컬 (qwen, llama)

**ollama 사용하지 않을 경우 실행 셀 주석처리**

ollama 실행

In [None]:
# subprocess.Popen(["ollama", "serve"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
# time.sleep(1)

### 로컬 모델 저장

In [None]:
def ollama_model_check(drive_model_dir:str=None, require_lite:bool=True) -> str:
    drive_model_dir = "/content/drive/MyDrive/ollama_models" if drive_model_dir is None else drive_model_dir
    local_model_dir = os.path.expanduser("~/.ollama")

    # 모델 저장 폴더 설정
    os.makedirs(drive_model_dir, exist_ok=True)
    # os.makedirs(os.path.expanduser("~/.ollama"), exist_ok=True)

    if os.path.exists(local_model_dir):
        os.system(f"rm -rf {local_model_dir}")
    os.system(f"ln -s {drive_model_dir} {local_model_dir}")

    model_names = ["qwen3:8b", "llama3:8b"] if not require_lite else ["qwen3:1.7b", "llama3:8b"]

    try:
        result = subprocess.run(["ollama", "list"], capture_output=True, text=True, check=True)
        installed_models_output = result.stdout
    except subprocess.CalledProcessError:
        installed_models_output = ""

    for model in model_names:
        if model in installed_models_output:
            print(f"{model} 이미 설치됨 — 다운로드 생략")
        else:
            print(f"{model} 다운로드 중...")
            subprocess.run(["ollama", "pull", model], check=True)

    print("\n모델 확인")

    return drive_model_dir

In [None]:
# # %time
# model_path = ollama_model_check()

로컬 환경 ollama 모델 리스트

In [None]:
# !ollama list

### 모델 출력 포맷

In [None]:
class ReportInfo(BaseModel):
    stock: str
    ticker: str
    published_date: str
    current_price: float
    target_price: float
    investment_opinion: bool
    author_analyst: str
    affiliated_firm: str

In [None]:
ollama_prompts = {"종목명": "본 보고서가 취급하는 종목명이 무엇인가요?",
                  "종목코드": "본 보고서가 취급하는 기업의 티커(ticker)가 무엇인가요?",
                  "작성일": "본 보고서가 발행된 일시를 yyyy-mm-dd 형태로 답하세요.",
                  "현재 주가": "본 보고서에 발표된 현재 주가를 답하세요. (KRW)",
                  "목표 주가": "본 보고서에 발표된 목표 주가를 답하세요. (KRW)",
                  "투자 의견": "본 보고서에 발표된 투자 의견을 답하세요. (True: 매수, False: 보유)",
                  "작성 애널리스트": "본 보고서에 발표된 작성 애널리스트는 누구인가요?",
                  "소속 증권사": "본 보고서에 발표된 소속 증권사의 기업명은 무엇인가요?"}

### 문서 참조

PDF에서 표, 그래프 제외한 텍스트만 추출

In [None]:
def pdf_to_text(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text() + "\n"
    return text

# def pdf_to_text_no_tables(pdf_path):
#     text = ""
#     with pdfplumber.open(pdf_path) as pdf:
#         for page in pdf.pages:
#             # 페이지의 표 영역 추출
#             tables = page.find_tables()
#             table_bboxes = [table.bbox for table in tables]  # bbox = (x0, top, x1, bottom)

#             # 문자 단위로 필터링
#             page_text = ""
#             for char in page.chars:
#                 in_table = any(
#                     bbox[0] <= char['x0'] <= bbox[2] and
#                     bbox[1] <= char['top'] <= bbox[3]
#                     for bbox in table_bboxes
#                 )
#                 if not in_table:
#                     page_text += char['text']
#             text += page_text + "\n"
#     return text

코사인 유사도 topk

In [None]:
def get_simliar_chunks(store, query_vector, chunks:list, k:int=2):
    # Convert the store (list of vectors) to a numpy array
    store_np = np.array(store)
    query_vector_np = np.array(query_vector)

    # Calculate cosine similarity between the query vector and each chunk vector
    dot_products = np.dot(store_np, query_vector_np)
    norm_store = np.linalg.norm(store_np, axis=1)
    norm_query = np.linalg.norm(query_vector_np)

    # Avoid division by zero for zero vectors
    norm_store[norm_store == 0] = 1e-10
    norm_query = norm_query if norm_query != 0 else 1e-10

    cosine_similarities = dot_products / (norm_store * norm_query)

    # Get the indices that would sort the cosine similarities in descending order
    sorted_indices = np.argsort(cosine_similarities)[-1:-k-1:-1]

    # Get the sorted chunks and similarity scores
    sorted_chunks = [chunks[i] for i in sorted_indices]
    sorted_scores = cosine_similarities[sorted_indices]

    # Return the sorted chunks and similarity scores
    return sorted_chunks, sorted_scores

In [None]:
def ask_ollama(directory_path:str, file_name:str, model_name:str, prompt:str=system_prompt, format:BaseModel=ReportInfo, return_dict:bool=True):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)

    file_path = os.path.join(directory_path, file_name)
    text = pdf_to_text(file_path)
    chunks = text_splitter.split_text(text)

    store_emb = ollama.embed(model=model_name, input=chunks)['embeddings'] # Extract embeddings from the EmbedResponse objects
    topk_chunks = []

    for question in ollama_prompts.values():
        query_v_emb = ollama.embed(model=model_name, input=question)['embeddings'][0]
        rel_chunks, _ = get_simliar_chunks(store_emb, query_v_emb, chunks)
        topk_chunks.extend(rel_chunks)
        # print(question, "\n==>", rel_chunks)

    response = chat(model=model_name,
                    messages=[{'role': 'system', 'content': prompt},
                              {'role': 'user', 'content': 'Give me the context!'},
                              {'role': 'assistant', 'content': '\n'.join(topk_chunks)},
                              {'role': 'user', 'content': 'Return a JSON object from the reference as per my instructions.'}],
                    format=ReportInfo.model_json_schema(),
                    # think=True,
                    stream=False)

    result = ReportInfo.model_validate_json(response.message.content)

    if return_dict:
        return result.model_dump()
    else:
        return result.model_dump_json(indent=2)

테스트

In [None]:
# ask_ollama('/content/drive/My Drive/이어드림/pdfs', "210216_4분기는_성수기_1분기는_최성수기.pdf", "llama3:8b")

In [None]:
# for pdf_file in ['210226_이제_조정은_충분하다.pdf', '210226_플랫폼_기반의_간편건강식_전문기업_중국_시.pdf', '210226_재무_건전성_및_투자여력_확보.pdf',
#                   '210302_DRAM_가격_전망치_상향_조정.pdf', '210226_소방설비_제작부터_소방시설_공사까지_종합_.pdf']:
#     print("Extracted Information (JSON) =>", pdf_file)
#     print(ask_ollama(pdf_path, pdf_file, "llama3:8b"))

## 목표 주가 수집

In [None]:
!pip install pykrx

from pykrx import stock
import psycopg2

In [None]:
def find_target_hit_date(ticker:str, report_date:str, target_price:float):
    start_date = report_date.replace("-", "")
    end_date = datetime.today().strftime("%Y%m%d")

    df = stock.get_market_ohlcv_by_date(start_date, end_date, ticker)
    df = df[["종가"]]

    reached = df[df["종가"] >= target_price]

    if not reached.empty:
        first_hit_date = reached.index[0].strftime("%Y-%m-%d")

        report_dt = datetime.strptime(report_date, "%Y-%m-%d")
        hit_dt = datetime.strptime(first_hit_date, "%Y-%m-%d")

        return first_hit_date, (hit_dt - report_dt).days
    else:
        return None

## PDF 전처리 파이프라인

PDF로부터 LLM이 추출한 데이터 중 필수 데이터 검증

In [None]:
def is_validate_report_data(report_data:dict) -> bool:
    """
    종목코드, 작성일, 목표 주가 => 필수
    종목명, 현재 주가, 투자 의견, 작성 애널리스트, 소속 증권사 => 선택
    """
    na_val = [None, "N/A", "n/a", "", 0]
    return report_data \
          and isinstance(report_data, dict) \
          and report_data.get("종목코드") not in na_val \
          and report_data.get("작성일") not in na_val \
          and report_data.get("목표 주가") not in na_val

단일 PDF 파일

In [None]:
def process_single_pdf(file_name:str, directory_path:str, feat_extractor:Callable, target_hitter:Callable, feat_extractor_kwargs:dict=None) -> dict:
    """
    Processes a single PDF file by extracting info and finding target hit date.
    """
    report_info = None
    hit_date_info = None

    try:
        feat_extractor_kwargs = {} if feat_extractor_kwargs is None else feat_extractor_kwargs
        # Task 1: Extract information using ask_gemini (I/O-bound)
        report_info = feat_extractor(directory_path, file_name, **feat_extractor_kwargs)

        # Check if essential info is available for target hit date calculation
        if is_validate_report_data(report_info):
            ticker = report_info["종목코드"]
            report_date = report_info["작성일"]
            target_price = report_info["목표 주가"]

            try:
                # Task 2: Find target hit date (potentially CPU-bound, but often quick with PyKRX)
                hit_date_info = target_hitter(ticker, report_date, target_price)
            except Exception as hit_error:
                hit_date_info = f"Error finding target hit date: {hit_error}"

            # Combine the results
            return {
                "pdf_file": file_name,
                "report_info": report_info, # Return report_info even if hit_date_info failed
                "hit_date_info": hit_date_info
            }
        else:
            return {
                "pdf_file": file_name,
                "report_info": report_info, # Return potentially incomplete report_info
                "hit_date_info": "Could not extract essential information for target hit date."
            }

    except Exception as extract_error:
        print(f"Error processing {file_name} during feature extraction: {extract_error}")
        return {
            "pdf_file": file_name,
            "report_info": None, # Return None if feature extraction failed
            "hit_date_info": f"Error during feature extraction: {extract_error}"
        }

파이프라인 (DB 작업 분리용)

In [None]:
def report_preprocessing_parallel(directory_path:str, get_files_fn:Callable, pipeline_fn:Callable, feat_extractor:Callable, target_hitter:Callable, feat_extractor_kwargs:dict=None,
                                  num_workers:int=5, verbose:bool=True) -> list:
    """
    Parallel Processing Implementation for Report Preprocessing.
    """
    processed_results = []

    # Using ThreadPoolExecutor for parallel execution
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        # Get the list of PDF files using the executor
        future_to_get_files = executor.submit(get_files_fn, directory_path)
        selected_pdf_files = future_to_get_files.result() # Wait for the file list to be ready

        if verbose:
            print(f"Starting parallel processing for {len(selected_pdf_files)} files...")

        # Submit tasks for processing each PDF file
        future_to_pdf = {executor.submit(pipeline_fn, pdf_file, directory_path, feat_extractor, target_hitter, feat_extractor_kwargs): pdf_file for pdf_file in selected_pdf_files}

        # Process the results as they complete
        for future in as_completed(future_to_pdf):
            pdf_file = future_to_pdf[future]
            try:
                result = future.result() # result like dict {"pdf_file": str, "report_info": json, "hit_date_info": tuple}
                processed_results.append(result)
                if verbose:
                    print(f"Successfully processed data for: {pdf_file}")
            except Exception as exc:
                print(f'{pdf_file} generated an exception during processing: {exc}')

    return processed_results

In [None]:
# # 다중 LLM 함수 사용 시에도 파라미터 등 달라서 아래 함수 다중 호출로 처리
# processed_results = report_preprocessing_parallel(pdf_path, get_report_pdf_files, process_single_pdf, ask_gemini)

# # --- Displaying Results ---
# print("\n--- Summary of Processed Results ---")
# for result in processed_results:
#     print(f"File: {result['pdf_file']}")
#     print(f"  Report Info: {result['report_info']}")
#     print(f"  Target Hit Info: {result['hit_date_info']}")
#     print("-" * 20)

파이프라인 (DB 작업 통합용)

In [None]:
def report_preprocessing_parallel_with_db(directory_path:str, get_files_fn:Callable, pipeline_fn:Callable, feat_extractor:Callable, target_hitter:Callable, feat_extractor_kwargs:dict=None,
                                          num_workers:int=5, verbose:bool=True, conn=None, cursor=None) -> tuple:
    """
    Parallel Processing Implementation for Report Preprocessing with direct DB insertion.
    Assumes DB connection 'conn' and cursor 'cursor' are available in the scope where this function is called.
    """
    if conn is None or cursor is None:
        raise ValueError("DB connection and cursor must be provided.")

    processed_results = []

    # Using ThreadPoolExecutor for parallel execution
    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        # Get the list of PDF files using the executor
        future_to_get_files = executor.submit(get_files_fn, directory_path)
        selected_pdf_files = future_to_get_files.result() # Wait for the file list to be ready

        if verbose:
            print(f"Starting parallel processing for {len(selected_pdf_files)} files...")

        # Submit tasks for processing each PDF file
        future_to_pdf = {executor.submit(pipeline_fn, pdf_file, directory_path, feat_extractor, target_hitter, feat_extractor_kwargs): pdf_file for pdf_file in selected_pdf_files}

        # Process the results as they complete
        for future in as_completed(future_to_pdf):
            pdf_file = future_to_pdf[future]
            try:
                result = future.result() # result like dict {"pdf_file": str, "report_info": json, "hit_date_info": tuple}
                processed_results.append(result)

                # --- Direct DB Insertion within the loop ---
                report_info = result.get("report_info")
                hit_date_info = result.get("hit_date_info") # tuple or None (normal) / str (abnormal)
                file_name = result.get("pdf_file")

                # Check if essential information was extracted successfully before attempting insert
                if is_validate_report_data(report_info):
                    # Prepare data for report_info table
                    # If a constraint error occurs, judge as a data error and passed
                    report_info_data = (
                        file_name,
                        report_info.get("종목명"),
                        report_info.get("종목코드"),
                        report_info.get("작성일"),
                        report_info.get("현재 주가"),
                        report_info.get("목표 주가"),
                        report_info.get("투자 의견").lower() == "buy",
                        report_info.get("작성 애널리스트"),
                        report_info.get("소속 증권사")
                    )

                    try:
                        cursor.execute("""
                            INSERT INTO report_info (pdf_file, stock, ticker, published_date, current_price, target_price, investment_opinion, author_analyst, affiliated_firm)
                            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);
                        """, report_info_data)

                        conn.commit()
                        if verbose:
                            print(f"Successfully extracted data for: {pdf_file}")
                            try:
                                shutil.move(os.path.join(pdf_path, pdf_file), os.path.join(pdf_finished_path, pdf_file))
                                print(f"Moved {pdf_file} with completely extracted data to: {pdf_finished_path}/")
                            except FileNotFoundError:
                                print(f"{pdf_file} not found in {pdf_path}/")
                            except PermissionError:
                                print(f"No permission to move file: {pdf_file}")
                            except Exception as shutil_exc:
                                print(f"Failed to move file {pdf_file}: {shutil_exc}")

                    except (Exception, psycopg2.Error) as db_error_info:
                        conn.rollback()
                        print(f"REPORT_INFO table INSERT error for {pdf_file}: {db_error_info}")

                    if isinstance(hit_date_info, str) and hit_date_info.startswith("Error finding target hit date:"):
                        # If target_hitter occurs error alone, prevent insert None into REPORT_HIT.
                        # None in REPORT_HIT table means Hit miss, not error.
                        print(f"Error occurs only on target_hitter. REPORT_HIT table INSERT is passed for {pdf_file}: {hit_date_info}")
                    else:
                        # Prepare data for report_hit table
                        hit_date = hit_date_info[0] if isinstance(hit_date_info, tuple) else None
                        hit_days = hit_date_info[1] if isinstance(hit_date_info, tuple) else None

                        # If both hit_date and hit_days are None, judge as a Hit miss
                        report_hit_data = (
                            file_name,
                            hit_date,
                            hit_days
                        )

                        try:
                            cursor.execute("""
                                INSERT INTO report_hit (pdf_file, hit_date, hit_days)
                                VALUES (%s, %s, %s);
                            """, report_hit_data)

                            conn.commit()
                            if verbose:
                                print(f"Successfully processed and inserted data for: {pdf_file}")

                        except (Exception, psycopg2.Error) as db_error_hit:
                            conn.rollback()
                            print(f"REPORT_HIT table INSERT error for {pdf_file}: {db_error_hit}")
                            # Log this error or handle it as needed without stopping the loop

                else:
                    if verbose:
                         print(f"Skipping DB insert for {pdf_file}: Essential info missing or processing error.")

            except Exception as exc:
                print(f'{pdf_file} generated an exception during processing: {exc}')
                # This catches errors during the PDF processing pipeline_fn

    return processed_results, conn, cursor # You might still want to return results for logging or further processing

## DB 연결 (localhost)

In [None]:
if IS_COLAB_ENV:
    postgres_key = userdata.get('POSTGRES_KEY')
else:
    load_dotenv()
    postgres_key = os.getenv('POSTGRES_KEY')

db_name = "stockdb"
db_user = "stock"

user_config_query = f"CREATE USER {db_user} WITH PASSWORD '{postgres_key}';"
db_config_query = f"CREATE DATABASE {db_name} OWNER {db_user};"

In [None]:
# !sudo apt-get update
!sudo apt-get install -y postgresql

!sudo service postgresql start

# Drop user and database if they exist to ensure a clean setup
!sudo -u postgres psql -c "DROP DATABASE IF EXISTS {db_name};"
!sudo -u postgres psql -c "DROP USER IF EXISTS {db_user};"

# Create the user with password and login privileges
!sudo -u postgres psql -c "CREATE USER {db_user} WITH PASSWORD '{postgres_key}' LOGIN;"

# Create the database owned by the user
!sudo -u postgres psql -c "CREATE DATABASE {db_name} OWNER {db_user};"

!pip install psycopg2-binary

In [None]:
import psycopg2

# 연결 정보 설정
try:
    # 데이터베이스 연결
    conn = psycopg2.connect(
        host="localhost",
        dbname=db_name,
        user=db_user,
        password=postgres_key
    )
    cursor = conn.cursor()

    # 테이블 생성
    cursor.execute("""
        CREATE TABLE IF NOT EXISTS report_info (
            pdf_file VARCHAR(100) PRIMARY KEY,
            stock VARCHAR(50),
            ticker VARCHAR(6) NOT NULL CHECK (LENGTH(ticker) = 6),
            published_date DATE NOT NULL,
            current_price INT,
            target_price INT NOT NULL,
            investment_opinion BOOLEAN,
            author_analyst VARCHAR(15),
            affiliated_firm VARCHAR(50)
        );
    """)
    conn.commit()
    print("REPORT_INFO 테이블이 성공적으로 생성되었습니다.")

    cursor.execute("""
        CREATE TABLE IF NOT EXISTS report_hit (
            pdf_file VARCHAR(100) PRIMARY KEY,
            hit_date DATE,
            hit_days INT
        );
    """)
    conn.commit()
    print("REPORT_HIT 테이블이 성공적으로 생성되었습니다.\n")

    # Assuming pdf_path, get_report_pdf_files, process_single_pdf, ask_gemini are defined in previous cells
    # You might need to ensure these are defined or modify this part if they are not
    try:
      processed_results, conn, cursor = report_preprocessing_parallel_with_db(pdf_path,
                                                                              get_report_pdf_files,
                                                                              process_single_pdf,
                                                                              ask_gemini,
                                                                              find_target_hit_date,
                                                                              num_workers=1,
                                                                              conn=conn, cursor=cursor)
    except NameError as e:
      print(f"Error: Required functions or variables are not defined. Please ensure all preceding cells are executed. Details: {e}")


    # 데이터 조회
    cursor.execute("SELECT * FROM report_info;")
    rows = cursor.fetchall()
    print("\n================ 데이터 조회 ================", end="")
    print("\n테이블 데이터:")
    for row in rows:
        print(row)

    cursor.execute("SELECT * FROM report_hit;")
    rows = cursor.fetchall()
    print("\n테이블 데이터:")
    for row in rows:
        print(row)

    # 연결 종료
    cursor.close()
    conn.close()

except (Exception, psycopg2.Error) as error:
    print(f"PostgreSQL 오류 발생: {error}")

# 중간평가

* 테스트용 5개 정상 작동이지만 데이터 많아지면 429 에러 남

  - 병렬 처리 안하면 에러 안 날지도...
  - ollama 사용하면 자체 요청이니까 에러 안 날까...

* report_hit 테이블 None, None은 목표 주가 달성 못함 (에러 아님)

막대그래프,