In [1]:
# load env with api keys https://stackoverflow.com/a/54028874
%load_ext dotenv
%dotenv ../etc/config.env

import sys
sys.path.append("../")

from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.chat_models import ChatOpenAI

from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
from typing import List

from desci_sense.parsers.multi_tag_parser import MultiTagParser
from desci_sense.postprocessing.parser_utils import fix_json_string_with_backslashes
from desci_sense.configs import init_config

In [2]:
config = init_config(model_name="mistralai/mixtral-8x7b-instruct")

In [3]:
multi_parser = MultiTagParser(config=config)

                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


In [4]:
pydantic_obj = multi_parser.pydantic_parser.pydantic_object
for k, v in pydantic_obj.__fields__.items():
    print(f"{k}: {v.field_info.description}")

is_announce_tag: Set to True if this post contains an announcement of new research. The announcement is likely made by the authors but may be a third party. The research should be a paper, dataset or other type of research output that is being announced publicly. False otherwise.
is_read_tag: Set to True if this post describes the reading status of the author in relation to a reference, such as a book or article. False otherwise.
is_event_tag: Set to True if this post describes an event, either real-world or an online event. Any kind of event is relevant, some examples of events could be seminars, meetups, or hackathons. False otherwise.
is_review_tag: Set to True if this post contains a review of another reference, such as a book, article or movie. The review could be positive or negative. A review can be detailed or a simple short endorsement. False otherwise.
is_recommendation_tag: Set to True if this post is recommending any kind of content: an article, a movie, podcast, book, anot

In [5]:
target = """My department at Univ Washington in Seattle is searching for a tenure-track assistant prof in "Quantitative Understanding of Collective Behavior" @UWBiology

See ad for more info about the search vision and to apply; happy to answer questions!

https://apply.interfolio.com/130336 https://t.co/ydKZVuAeeY"""

In [7]:
raw_result = multi_parser.run_raw(target)
print(raw_result)

{
"is_announce_tag": true,
"is_read_tag": false,
"is_event_tag": false,
"is_review_tag": false,
"is_recommendation_tag": false,
"is_listening_tag": false,
"is_job_tag": true,
"is_quote_tag": false
}


In [13]:
raw_result = multi_parser.run_raw(target)
parsed = multi_parser.pydantic_parser.parse(raw_result)
print(raw_result)

ValidationError: 5 validation errors for PostTagsDataModel
is_read_tag
  field required (type=value_error.missing)
is_review_tag
  field required (type=value_error.missing)
is_recommendation_tag
  field required (type=value_error.missing)
is_listening_tag
  field required (type=value_error.missing)
is_quote_tag
  field required (type=value_error.missing)

In [14]:
print(raw_result)

{
"is_announce_tag": true,
"is_event_tag": true,
"is_job_tag": true
}


In [11]:
parsed

PostTagsDataModel(is_announce_tag=True, is_read_tag=False, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False, is_listening_tag=False, is_job_tag=True, is_quote_tag=False)

In [4]:
target_post = """I just did the proofs for our "Epistemology of Democratic Citizen Science" paper for J Roy Soc OpenScience.

Here is the updated preprint link: https://osf.io/j62sb.

Will write a longer thread once the paper is out in print."""

In [5]:
res = multi_parser.process_text(target_post)
res

PostTagsDataModel(is_announce_tag=True, is_read_tag=False, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False, is_listening_tag=False, is_job_tag=False, is_quote_tag=False)

In [6]:
res.get_selected_tags_str()

'announce'

In [6]:
target_post_2 = """Just read David Deutsch's 1985 paper on the Physical Church-Turing Principle
carefully.

https://royalsocietypublishing.org/doi/abs/10.1098/rspa.1985.0070

I think I'm getting the following gist from the work (even though I can't
understand the quantum theory in the paper in detail) ...

Maybe someone can correct me if I'm wrong?"""

In [7]:
multi_parser.process_text(target_post_2)

PostTagsDataModel(is_announce_tag=False, is_read_tag=True, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False, is_listening_tag=False, is_job_tag=False, is_quote_tag=False)

In [8]:
target_post_3 = """How is a high-rank signal deformed by noise?

This 1st postdoc work, a close collab with @meldefon, with @SuryaGanguli, pushed me deep into new territory of non-Hermitian random matrix theory, and I think it has some practical results!

Let's go!

https://arxiv.org/abs/2306.00340

1/13"""

In [9]:
multi_parser.process_text(target_post_3)

PostTagsDataModel(is_announce_tag=True, is_read_tag=False, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False, is_listening_tag=False, is_job_tag=False, is_quote_tag=False)

In [14]:
target_post_4 = """Join my lab as a postdoc in the Cognitive Science of Values! PhDs in psychology, philosophy, cognitive science invited to apply to pursue collaborative research on the role of values in belief formation & revision, decision making, etc, deadline 1/15/24 <link to job listing>"""

In [16]:
multi_parser.process_text(target_post_4)

PostTagsDataModel(is_announce_tag=True, is_read_tag=False, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False, is_listening_tag=False, is_job_tag=True, is_quote_tag=False)

In [3]:
PostTagTypes.tags()

['announce', 'read', 'event', 'review', 'recommendation']

In [None]:
# https://docs.streamlit.io/library/api-reference/widgets/st.multiselect

In [4]:
# Instantiate the parser with the new model.
parser = PydanticOutputParser(pydantic_object=PostTagTypes)

# Update the prompt to match the new query and desired format.
prompt = ChatPromptTemplate(
    messages=[
        HumanMessagePromptTemplate.from_template(
            "Tag the post as accurately as possible.\n{format_instructions}\n{question}"
        )
    ],
    input_variables=["question"],
    partial_variables={
        "format_instructions": parser.get_format_instructions(),
    },
)

In [5]:
parser.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"is_announce_tag": {"title": "Is Announce Tag", "description": "Set to True if this post contains an announcement of new research. The announcement is likely made by the authors but may be a third party. The research should be a paper, dataset or other type of research output that is being announced publicly. False otherwise.", "type": "boolean"}, "is_read_tag": {"title": "Is Read Tag", "description": "Set to True if this post describes the reading status of the author in relation to a reference, such as a book or artic

In [6]:
target_post = """I just did the proofs for our "Epistemology of Democratic Citizen Science" paper for J Roy Soc OpenScience.

Here is the updated preprint link: https://osf.io/j62sb.

Will write a longer thread once the paper is out in print."""
# target_post = "This paper is excellently written and its arguments are quite compelling. I'm very happy to have read it! Highly recommended."
# Generate the input using the updated prompt.
user_query = (
    f"Here is the target post: {target_post}"
)
_input = prompt.format_prompt(question=user_query)
# print(_input.to_messages())

In [7]:



config = init_config(model_name="mistralai/mixtral-8x7b-instruct")
nano_parser = BaseParser(config=config)
chat_model = nano_parser.model


                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


In [8]:
config

{'model': {'model_name': 'mistralai/mixtral-8x7b-instruct',
  'temperature': 0.6},
 'prompt': {'template_path': 'desci_sense/prompting/templates/p4.txt'},
 'wandb': {'wand_entity': 'common-sense-makers',
  'project': 'st-demo',
  'wandb_db_name': 'test-DB'}}

In [9]:
output = chat_model(_input.to_messages())
fixed_content = fix_json_string_with_backslashes(output.content)
parsed = parser.parse(fixed_content)
# print(output.content)
print(parsed)

is_announce_tag=True is_read_tag=False is_event_tag=False is_review_tag=False is_recommendation_tag=False


In [10]:
parsed

PostTagTypes(is_announce_tag=True, is_read_tag=False, is_event_tag=False, is_review_tag=False, is_recommendation_tag=False)

In [11]:
parsed.get_selected_tags()

{'announce'}

In [14]:
from langchain.output_parsers import OutputFixingParser
from langchain.schema import OutputParserException

try:
    parsed = parser.parse(output.content)
except OutputParserException as e:
    new_nano_parser = BaseParser(config=config)
    new_parser = OutputFixingParser.from_llm(
        parser=parser,
        llm=new_nano_parser.model
    )
    parsed = new_parser.parse(output.content)

                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


OutputParserException: Failed to parse TwitterUser from completion {
"name": "Ava Rodriguez",
"handle": "avarodriguez",
"age": 27,
"hobbies": ["traveling", "photography", "cooking", "reading"],
"email": "avarodriguez@email.com",
"bio": "A wanderlust-filled, food-loving, bookworm from Spain. Always up for new adventures and exploring different cultures!",
"location": "Barcelona, Spain",
"is\_blue\_badge": true,
"joined": "June 2010",
"gender": "Female",
"appearance": "Curly brown hair, dark eyes, freckles and a slim build, with a warm smile and a friendly demeanor.",
"avatar\_prompt": "A photorealistic portrait of Ava, captured in a serene beach setting, with a vibrant and colorful background that reflects her adventurous spirit and love for photography.",
"banner\_prompt": "A banner image that showcases Ava's passion for cooking and exploring different cuisines, with a beautiful kitchen setup and colorful ingredients in the background, surrounded by a cozy and inviting atmosphere."
}. Got: Invalid \escape: line 9 column 4 (char 325)

In [8]:
cc = {'post':"", 'answer': {'text': 'I just did the proofs for our "Epistemology of Democratic Citizen Science"\npaper for J Roy Soc OpenScience.\n\nHere is the updated preprint link: https://osf.io/j62sb.\n\nWill write a longer thread once the paper is out in print.', 'answer': {'final_answer': 'announce', 'reasoning': ''}}}
