In [6]:
# simple code for chat with web 

In [7]:
# Import of the packages

In [23]:
import os
from pathlib import Path
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from typing import List

In [24]:
#aded prompts for templates
from langchain_openai import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate

In [25]:
# aded acces to the web via langchinabs

In [26]:
from langchain_community.document_loaders import WebBaseLoader

In [27]:
# add parsers

In [28]:
from langchain.output_parsers import PydanticOutputParser

In [29]:
# load env and vriables
load_dotenv()
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [30]:
# main function

In [31]:
def ask_chat(model_name, template, variables, parser):
    llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model_name=model_name, temperature=0.7)
    message = HumanMessagePromptTemplate.from_template(template=template)
    prompt = ChatPromptTemplate.from_messages(messages=[message])
    # Formatting the prompt with the given variables
    prompt_with_values = prompt.format_prompt(**variables) 
    # creation of messge
    messages = prompt_with_values.to_messages()
    answer = llm(messages)
    output = parser.parse(answer.content)
    
    return output

In [32]:
# function for accessing the web

In [41]:
def get_web_content(url):
    loader = WebBaseLoader(url)
    document = loader.load()
    return document

In [42]:
# create template

In [98]:
PROMPT_TEMPLATE = """
    Get the information about the webpage statistics from the table from this {webpage_content}. Put them in the order
    today, yesterday, at the average at today, at the average at yesterday. ignore number of diferences
    {output_format}
    """

In [99]:
#create output report / from gpt promt for simplicity 

In [100]:
class Metrics(BaseModel):
    views: List[int] = Field(..., description="Number of views for each period.")
    sessions: List[int] = Field(..., description="Number of sessions for each period.")
    visitors: List[int] = Field(..., description="Number of unique visitors for each period.")
    hosts: List[int] = Field(..., description="Number of hosts for each period.")
    reloads: List[int] = Field(..., description="Number of page reloads for each period.")
    average_online: List[int] = Field(..., description="Average number of users online for each period.")
    average_active_online: List[int] = Field(..., description="Average number of active users online for each period.")
    average_duration: List[float] = Field(..., description="Average duration of sessions in minutes for each period.")
    views_per_visitor: List[float] = Field(..., description="Average number of views per visitor for each period.")

    def __str__(self):
        msgs = [f"{k}: \t {v}" for k,v in self.__dict__.items()]
        return '\n'.join(msgs)
      

In [101]:
# get content

In [102]:
url = "https://www.liveinternet.ru/stat/rg.ru/index.html"

In [103]:
web_content = get_web_content(url)

In [104]:
content_to_summarize = " ".join([doc.page_content for doc in web_content])

In [105]:
# creation of the parser

In [106]:
parser = PydanticOutputParser(pydantic_object=Metrics)

In [107]:
variables = {"webpage_content": content_to_summarize,
            "output_format": parser.get_format_instructions()}

In [108]:
answer =  ask_chat(model_name="gpt-4o",  template=PROMPT_TEMPLATE, variables=variables, parser=parser)

In [109]:
print(answer)

views: 	 [406, 56819, 54907, 64722]
sessions: 	 [438, 51289, 49480, 58514]
visitors: 	 [360, 49243, 47652, 56215]
hosts: 	 [374, 45215, 44012, 51085]
reloads: 	 [0, 98, 94, 93]
average_online: 	 [588, 543, 524, 620]
average_active_online: 	 [52, 46, 45, 52]
average_duration: 	 [0.46, 0.24, 0.24, 0.24]
views_per_visitor: 	 [1.1, 1.1, 1.1, 1.1]
