# 모닝 브리핑
긴 밤 동안 쌓인 미국 주식 뉴스와 최신 정보를 요약하여 유저에게 필요한 정보를 빠르게 전달
 
### Table of Contents
* <b> Session 1. Collecting Data </b>: Collecting Data from Amercia News Paper and reddit stock page / using other LLM <br> 
    - 1-1 web crolling
    - 1-2 ChatGPT


- <b> Session 2. Prompt Engineering and build LLM with lang chain </b> : <br>
   

- <b> Session 3. Making Demo by Gradio <br>



### 1-1 web crolling
- collect news data from Amercia Internet Business news site
- collect stock data from Redit stock channels

In [1]:
# 필요한 라이브러리 설치
!pip install beautifulsoup4
!pip install requests
!pip install selenium

import requests
from bs4 import BeautifulSoup



In [28]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys

import time

def get_articles(url):
    content=''

    # Launch the browser driver
    driver = webdriver.Chrome()

    # Open a webpage
    driver.get(url)
    driver.implicitly_wait(2)

    # scroll 5 times
    for c in range(3): 
        elements = driver.find_elements(By.CSS_SELECTOR, 'div[slot="title"].contents > a') 
        driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)
        time.sleep(1)

    elements = driver.find_elements(By.CSS_SELECTOR, 'a.resultlink')

    # href 속성을 urls 리스트에 저장
    urls = [element.get_attribute('href') for element in elements]

    # 결과 출력
    for url in urls:
        try:
            driver.get(url)
            try: 
                element = driver.find_element(By.CSS_SELECTOR, 'h1.ArticleHeader-headline')
            
            except NoSuchElementException:
                continue
            content+=element.text
            time.sleep(2)
            elements = driver.find_elements(By.CSS_SELECTOR, 'div.group') 
        except NoSuchElementException:
            continue
        # 선택된 요소들의 텍스트를 출력 
        for element in elements: 
             content+=element.text

    # Close the browser
    driver.quit()
    return content

In [None]:
msft_cnbc = get_articles('https://www.cnbc.com/search/?query=msft&qsearchterm=msft')
nvda_cnbc = get_articles('https://www.cnbc.com/search/?query=nvda&qsearchterm=nvda')
tsla_cnbc = get_articles('https://www.cnbc.com/search/?query=tsla&qsearchterm=tsla')

target_file = open("/data/summary.txt","w")
target_file.write(msft_cnbc + '\n')
target_file.write(nvda_cnbc + '\n')
target_file.write(tsla_cnbc + '\n')

target_file.close()

### 1-2 Using Chat GPT
- collect current business data from chat gpt
* instead of solar <font color = red> retrieving information from internet

In [2]:
#f = open("chg_promt.txt","r")
c = open("LLMsearch.txt","r")
input_txt = c.read()
#input_txt = input_txt + "\n" + c.read()
#f.close()
c.close()

### 2 prompt engineering and define model
- collect news data from Amercia Internet Business news site
- collect stock data from Redit stock channels

In [11]:
! pip3 install -qU  markdownify  langchain-upstage rank_bm25 python-dotenv

In [3]:
# @title set API key
import os
import getpass
from pprint import pprint
import warnings

warnings.filterwarnings("ignore")

from IPython import get_ipython

if "google.colab" in str(get_ipython()):
    # Running in Google Colab. Please set the UPSTAGE_API_KEY in the Colab Secrets
    from google.colab import userdata
    os.environ["UPSTAGE_API_KEY"] = userdata.get("UPSTAGE_API_KEY")
else:
    # Running locally. Please set the UPSTAGE_API_KEY in the .env file
    from dotenv import load_dotenv

    load_dotenv()

if "UPSTAGE_API_KEY" not in os.environ:
    os.environ["UPSTAGE_API_KEY"] = getpass.getpass("Enter your Upstage API key: ")


In [4]:
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_upstage import ChatUpstage

llm = ChatUpstage()

#prompt always summarize whole business text to three lines

prompt_template = PromptTemplate.from_template(
    """
    Please write three most important contents on the following text  
    ---
    TEXT: {text}
    """
)
chain = prompt_template | llm | StrOutputParser()

In [5]:
def brief(dialog):
    return chain.invoke({"text": dialog})

### 3 Making Demo by Gradio
- Initiate with yesterday news information
- If put some news article app will return 3 line summary

In [14]:
!pip install -qU gradio python-dotenv langchain-upstage python-dotenv

In [6]:
import gradio as gr

from langchain_upstage import (
    ChatUpstage,
    UpstageEmbeddings,
    UpstageLayoutAnalysisLoader,
)
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain.schema import AIMessage, HumanMessage
from langchain_text_splitters import (
    Language,
    RecursiveCharacterTextSplitter,
)
from langchain_chroma import Chroma

llm = ChatUpstage(streaming=True)

In [7]:
def start():
    return chain.invoke({"text": input_txt})

with gr.Blocks() as demo:
    output = gr.Textbox(label = "Today Stock")
    demo.load(fn=start,outputs=output)
    chatbot = gr.ChatInterface(
        brief,
        title="Morning Brief",
        description="Check latest stock market news",
        
        
    )
    chatbot.chatbot.height = 300

In [8]:
 

if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.
