In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import openai
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from trafilatura import fetch_url
from trafilatura import extract
from bs4 import BeautifulSoup as bs
from random import choice
from colorama import Fore, Back, Style
from datetime import datetime
from rich import print as rprint

In [3]:
from rich import pretty
pretty.install()

In [4]:
from actions import get_page_source, text_extract, search_info, get_links, decide_SearchorLinks
from session_manager import start_session, USER_AGENTS
from config import Config

In [6]:
config = json.load(open("local_config.json", "r"))
openai.api_key = config["KEY"]

In [8]:
model = Config.MODEL_NAME

In [10]:
READ_PROMPTS = json.load(open("prompts/read_prompts.json", "r"))
SEARCH_PROMPTS = json.load(open("prompts/search_prompts.json", "r"))

In [11]:
MEMORIES = {"questions": [], "theses": []}

def structure_memory(raw_thesis: str, page_url: str):
    theses = [thesis[2:].strip() for thesis in raw_thesis.split("\n")]
    timed_theses = {page_url: [{"memory": thesis, "time": datetime.now().isoformat()[:-7]} for thesis in theses]}
    return timed_theses

In [9]:
functions = [
        {
            "name": "decide_SearchorLinks",
            "description": "Returns dict with decision.",
            "parameters": {
                "type": "object",
                "properties": {
                    "what_to_search": {
                        "type": "string",
                        "description": "Query to search for",
                    },
                    "link_num": {"type": "string", "description": "Link number to click"},
                },
                "required": ["what_to_search", "link_num"],
            },
        }
    ]

In [14]:
S = start_session(headless=True)
#page_source = get_page_source(S, "https://www.cs.bham.ac.uk/~jxb/IAI/w4.pdf")
page_source = get_page_source(S, "https://en.wikipedia.org/wiki/Battle_of_the_Trebia")
page_url = S.current_url
text = text_extract(page_source)
print(text)
# print(Style.BRIGHT+Fore.BLUE + f"I am reading the following page: {page_url}", end="\n\n")
# #links_to_follow = [l.get_attribute("href") for l in get_links(S) if l.get_attribute("href")]
# links_to_follow = [(l.accessible_name, l.get_attribute("href")) for l in get_links(S) if l.get_attribute("href") and l.accessible_name]
# if len(links_to_follow) > 100:
#     links_to_follow = links_to_follow[:100]
# texted_links_to_follow = "\n".join([f"{i+1}. {link[0]}, {link[1]}" for i, link in enumerate(links_to_follow)])

# messages = [{"role": "system", "content": READ_PROMPTS["navigator_manifest"]},
#             {"role": "user", "content": READ_PROMPTS["reflection"].format(page_url, text)}]


# # Generates memories from the content of the page
# raw_page_memories = openai.ChatCompletion.create(
#     model=model,
#     temperature=0.8,
#     messages=messages
# )

# page_memories = raw_page_memories["choices"][0]["message"]["content"]
# print(Style.BRIGHT + Fore.LIGHTGREEN_EX + page_memories, end="\n\n")
# MEMORIES["theses"].append(structure_memory(page_memories, page_url))
# messages.append({"role": "assistant", "content": page_memories})
# messages.append({"role": "user", "content": READ_PROMPTS["autoreflection"].format(page_memories)})


# # Generates the question to ask
# generated_question = openai.ChatCompletion.create(
#     model=model,
#     temperature=0.8,
#     messages=messages
# )

# current_question = generated_question["choices"][0]["message"]["content"]
# print(Style.BRIGHT + Fore.YELLOW + current_question, end="\n\n")
# MEMORIES["questions"].append({page_url: current_question})
# messages.append({"role": "assistant", "content": current_question})
# messages.append({"role": "user", "content": READ_PROMPTS["next_move_consideration"].format(current_question,
#                                                                             texted_links_to_follow,
#                                                                             Config.SERCH_ENGINE)})


# # Make decision about the futher steps
# filtered_messages = [messages[0], messages[-1]]
# raw_decision = openai.ChatCompletion.create(
#         model=model,
#         temperature=0.8,
#         messages=filtered_messages,
#         functions=functions,
#         function_call="auto",  # auto is default, but we'll be explicit
#     )

# decision_message = raw_decision["choices"][0]["message"]
# if decision_message.get("function_call"):
#     #print(decision_message)
#     decision = json.loads(decision_message["function_call"]["arguments"])
#     if decision["link_num"]:
#         print(Style.BRIGHT + Fore.LIGHTMAGENTA_EX +  f"follow link: {decision['link_num']}", end="\n\n")
#     else:
#         print(Style.BRIGHT + Fore.LIGHTMAGENTA_EX + f"Searching for: {decision['what_to_search']}", end="\n\n")
    
#     search_phrase = decision["what_to_search"]


Battle of the Trebia
|Battle of the Trebia
|Part of the Second Punic War
|Belligerents
|Rome
|Carthage
|Commanders and leaders
|Sempronius Longus
|Hannibal
|Strength
|
40,000 total
|
40,000 total
|Casualties and losses
|
|
The battle of the Trebia (or Trebbia) was the first major battle of the Second Punic War, fought between the Carthaginian forces of Hannibal and a Roman army under Sempronius Longus on 22 or 23 December 218 BC. Each army mustered about 40,000 men; the Carthaginians were stronger in cavalry, the Romans in infantry. The battle took place on the flood plain of the west bank of the lower Trebia River, not far from the settlement of Placentia (modern Piacenza), and resulted in a heavy defeat for the Romans.
War broke out between Carthage and Rome in 218 BC. The leading Carthaginian general, Hannibal, responded by leading a large army out of Iberia (modern Spain and Portugal), through Gaul, across the Alps and into Cisalpine Gaul (in northern Italy). The Romans went on the

In [11]:
Ssearch = start_session(headless=True)
#search_source = get_page_source(Ssearch, Config.SERCH_ENGINE)
search_links, link_synopsis = search_info(Ssearch, search_phrase, S.current_url)


search_messages = [{"role": "system", "content": READ_PROMPTS["navigator_manifest"]},
            {"role": "user", "content": SEARCH_PROMPTS["search_consideration"].format(search_phrase, link_synopsis)}]

raw_search_link_num = openai.ChatCompletion.create(
    model=model,
    temperature=0.8,
    messages=search_messages
)

search_link_num = raw_search_link_num["choices"][0]["message"]["content"]
print(Style.BRIGHT + Fore.LIGHTYELLOW_EX + search_link_num, end="\n\n")

#Ssearch.quit()

[1m[93mlink_num: 1



In [121]:
# options = Options()

# options.add_argument("--headless")
# options.add_argument(f"user-agent={choice(USER_AGENTS)}")
# driver = webdriver.Chrome(options=options)
# driver.get("https://plato.stanford.edu/entries/poincare/")

In [26]:
# prompts = {}
# for prompt_name in [navigator_manifest, reflection, autoreflection, next_move_consideration]:
#     variable_name = [k for k, v in locals().items() if v is prompt_name][0]
#     prompts[variable_name] = prompt_name
# json.dump(prompts, open("prompts.json", "w"))

In [16]:
from reading import read
from searching import search
from session_manager import start_session

#readS = start_session(headless=False)
#searchS = start_session(headless=False)
root = "https://shandou.medium.com/export-and-create-conda-environment-with-yml-5de619fe5a2"
#search_concept = read(readS, "https://plato.stanford.edu/entries/poincare/")
#link2content4read = search(searchS, search_concept, readS.current_url)
try_num = 2

In [17]:
readS = start_session(headless=False)
searchS = start_session(headless=False)

while True:
    concept = read(readS, root)
    if concept["link2follow"]:
        root = concept["link2follow"]
    elif concept["search_for"]:
        concept = concept["search_for"]
        searchS = start_session(headless=False)
        link2content4read = search(searchS, concept, readS.current_url)
        root = link2content4read

[1m[34mI am reading the following page: https://shandou.medium.com/export-and-create-conda-environment-with-yml-5de619fe5a2

[1m[92m1. The author of the page is discussing their experience with developing and testing code on different systems, including MacOS, DigitalOcean droplet, and AWS ec2. 
2. The author mentions the desire to share environments between the DigitalOcean droplet and AWS platforms, and provides a quick note on how to export dependencies from one platform and clone it onto another platform.
3. The author explains the process of creating an environment.yml file via conda and how to generate a dependency yaml file from the current conda environment.
4. The author suggests committing the generated yml file to a git repository and cloning it onto the target operating system to create a conda environment from it.
5. The author also mentions a few other frequently used conda commands, such as listing all available conda environments, creating a new environment, removin

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"[id="searchbox_input"]"}
  (Session info: chrome=118.0.5993.88); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x0000000102698510 chromedriver + 4310288
1   chromedriver                        0x00000001026904bc chromedriver + 4277436
2   chromedriver                        0x00000001022c3b6c chromedriver + 293740
3   chromedriver                        0x0000000102309040 chromedriver + 577600
4   chromedriver                        0x0000000102343e60 chromedriver + 818784
5   chromedriver                        0x00000001022fcfd0 chromedriver + 528336
6   chromedriver                        0x00000001022fde7c chromedriver + 532092
7   chromedriver                        0x000000010265e834 chromedriver + 4073524
8   chromedriver                        0x00000001026627fc chromedriver + 4089852
9   chromedriver                        0x0000000102662c58 chromedriver + 4090968
10  chromedriver                        0x00000001026688f8 chromedriver + 4114680
11  chromedriver                        0x0000000102663234 chromedriver + 4092468
12  chromedriver                        0x000000010263d604 chromedriver + 3937796
13  chromedriver                        0x000000010267fee8 chromedriver + 4210408
14  chromedriver                        0x0000000102680064 chromedriver + 4210788
15  chromedriver                        0x0000000102690134 chromedriver + 4276532
16  libsystem_pthread.dylib             0x000000018705f034 _pthread_start + 136
17  libsystem_pthread.dylib             0x0000000187059e3c thread_start + 8


In [19]:
import json

In [20]:
reading_messages = json.load(open("prompts/read_prompts.json", "r"))

In [21]:
print(reading_messages["next_move_consideration"])

Taking into consideration the question you have asked: {} - 
Do you think that you should follow any link from current page? If yes, give its number? These are the names of the links you can follow. Think if any of them is relevant to your question:

{}

If you think that you should not follow any link but instead search Internet for information, please formulate the query based on the question you have asked. Format your answer as json object with two keys: "what_to_search" and "link_num".
Values for these keys should be:
if you want to search:
"what_to_search": "your query"
"link_num": ""
if you want to follow link:
"what_to_search": ""
"link_num": intiger number of the link you want to follow



In [16]:
text = '''Taking into consideration the question you have asked: {} - 
Do you think that you should follow any link from current page? If yes, give its number? These are the names of the links you can follow. Think if any of them is relevant to your question:

{}

If you think that you should not follow any link but instead search Internet for information, please formulate the query based on the question you have asked. Format your answer as json object with two keys: "what_to_search" and "link_num".
Values for these keys should be:
if you want to search:
"what_to_search": "your query"
"link_num": ""
if you want to follow link:
"what_to_search": ""
"link_num": intiger number of the link you want to follow
'''

In [18]:
reading_messages["next_move_consideration"] = text
json.dump(reading_messages, open("prompts/read_prompts.json", "w"))