In [1]:
# load env with api keys https://stackoverflow.com/a/54028874
%load_ext dotenv
%dotenv ../etc/config.env

import sys
sys.path.append("../")

import os

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import ChatPromptTemplate
from langchain.schema import BaseOutputParser
from langchain.schema import (
    HumanMessage,
)



from twitter import scrape_tweet

In [16]:
# TEST_TWEET = "https://twitter.com/danwilliamsphil/status/1719436704602275858"
# TEST_TWEET = "https://twitter.com/pwang/status/1719720728184910195"
TEST_TWEET = "https://twitter.com/BlancheMinerva/status/1719714881081954409"
TEST_TWEET = "https://twitter.com/sucholutsky/status/1719725087681569189"

In [17]:
tweet = scrape_tweet(TEST_TWEET)
tweet

{'conversationID': '1719714878263349725',
 'date': 'Wed Nov 01 13:55:53 +0000 2023',
 'date_epoch': 1698846953,
 'hashtags': [],
 'likes': 2,
 'mediaURLs': [],
 'media_extended': [],
 'possibly_sensitive': False,
 'qrtURL': None,
 'replies': 1,
 'retweets': 0,
 'text': 'There are hundreds of researches around the world who are doing safety-critical research precisely because organizations like @AiEleuther @AIatMeta @TIIuae and @MosaicML release models for people to download. Every time I go to an AI conference I meet a dozen such people.',
 'tweetID': '1719714881081954409',
 'tweetURL': 'https://twitter.com/BlancheMinerva/status/1719714881081954409',
 'user_name': 'Stella Biderman',
 'user_screen_name': 'BlancheMinerva'}

In [18]:
# Create model

OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
# openai.api_key = os.environ["OPENROUTER_API_KEY"]

model_name = "mistralai/mistral-7b-instruct" # currently free on OpenRouter (https://openrouter.ai/docs#models)
# model_name = "openai/gpt-3.5-turbo"


chat = ChatOpenAI(
        model=model_name, 
        temperature=0.7,
        openai_api_key=os.environ["OPENROUTER_API_KEY"],
        openai_api_base=OPENROUTER_API_BASE,
        headers={"HTTP-Referer": os.environ["OPENROUTER_REFERRER"]}, # To identify your app. Can be set to e.g. http://localhost:3000 for testing
    )

                    headers was transferred to model_kwargs.
                    Please confirm that headers is what you intended.


In [30]:
# based on https://python.langchain.com/docs/get_started/quickstart#prompttemplate--llm--outputparser

class CommaSeparatedListOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""


    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return text.strip().split(", ")
    
class StripOutputParser(BaseOutputParser):
    """Parse the output of an LLM call to a comma-separated list."""


    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return text.strip()

template = """You are an expert annotator who tags social media posts related to academic research, according to a predefined set of tags. 
The available tag types are:
<announce>: this post contains an announcement of new research, likely by the authors. The research may be a paper, dataset or other type of research output.
<review>: this post contains a review of another reference, such as a book, article or movie. The review could be positive or negative.
<other>: use this if no other tag is suitable. If you tag a post with <Other>, no other tag should be assigned to the post.

A user will pass in a post, and you should think step by step, before returning a list of comma separated tags that best match the post.

Your final answer should be structured as follows:
# Reasoning steps: (your reasoning steps. For each tag you choose, explain why you chose it.)
# Final answer: (the final list of tags, based on the reasoning steps)

Remember:
The final answer should ONLY include tags from the list above, nothing more.
If the <other> tag is included in the answer, no other tag should be included!"""
human_template = "{text}"

chat_prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", human_template),
])
# chain = chat_prompt | chat | CommaSeparatedListOutputParser()
chain = chat_prompt | chat | StripOutputParser()
answer = chain.invoke({"text": tweet["text"]})
print(answer)

Reasoning steps:
1. The post mentions "hundreds of researches" which suggests a review of a research paper or article.
2. The post also mentions "organizations like @AiEleuther @AIatMeta @TIIuae and @MosaicML" which suggests a review of a set of organizations or their activities.
3. However, the post does not explicitly mention any particular reference, so the <review> tag may not be suitable.
4. The post mentions "safety-critical research" which could potentially be a review of a specific topic or field.
5. The post also mentions "people" which suggests a review of a human experience or perspective.

Final answer: <review>, <other>


# Create OpenAI model

This works, what about a free one?

In [6]:
# from langchain.chat_models import ChatOpenAI
# from langchain.schema import (
#     HumanMessage,
# )

# import openai
# import os

# OPENROUTER_API_BASE = "https://openrouter.ai/api/v1"
# openai.api_key = os.environ["OPENROUTER_API_KEY"]


# chat = ChatOpenAI(
#         temperature=0.7,
#         openai_api_key=os.environ["OPENROUTER_API_KEY"],
#         openai_api_base=OPENROUTER_API_BASE,
#         headers={"HTTP-Referer": os.environ["OPENROUTER_REFERRER"]}, 
#     )