In [1]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

import certifi
os.environ['SSL_CERT_FILE'] = certifi.where()
os.environ['REQUESTS_CA_BUNDLE'] = certifi.where()

import truststore
truststore.inject_into_ssl()

In [2]:
from langchain_openai import ChatOpenAI
import os

tongyi_chat = ChatOpenAI(
    api_key=os.getenv("DASHSCOPE_API_KEY"),
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
    model="qwen-plus",
    # other params...
)

In [3]:
messages = [
    ("system", "你是一名专业的翻译家，可以将用户的中文翻译为英文。"),
    ("human", "我喜欢编程。"),
]
tongyi_chat.invoke(messages)


AIMessage(content='The English translation of "我喜欢编程" is:\n\n"I like programming."\n\nThis conveys the meaning that you enjoy or have an interest in programming. The word "programming" refers to the process of writing computer programs or software.', response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 30, 'total_tokens': 75, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'qwen-plus', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-14e00c94-4e05-4906-aa76-cb5f2cb87467-0')

In [4]:
for chunk in tongyi_chat.stream(messages):
    print(chunk)

content='' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content='The' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' English' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' translation' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' of "我喜欢编程' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content='" is:\n\n"I' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' like programming."\n\nThis' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' conveys the meaning' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' that you enjoy or' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' have a preference for' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content=' the activity of programming' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content='/writing code.' id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'
content='' response_metadata={'finish_reason': 'stop'} id='run-c9e5cfc4-738d-4420-be7a-17051be959a2'


In [5]:
await tongyi_chat.ainvoke(messages)

# stream:
# async for chunk in tongyi_chat.astream(messages):
#    print(chunk)

# batch:
# await tongyi_chat.abatch([messages])


AIMessage(content='I like programming.', response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 30, 'total_tokens': 34, 'completion_tokens_details': None, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'qwen-plus', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-398c8d45-be80-48be-a976-71f176cdcb99-0')

In [6]:
from langchain_core.pydantic_v1 import BaseModel, Field


class GetWeather(BaseModel):
    '''Get the current weather in a given location'''

    location: str = Field(
        ..., description="The city and state, e.g. San Francisco, CA"
    )


class GetPopulation(BaseModel):
    '''Get the current population in a given location'''

    location: str = Field(
        ..., description="The city and state, e.g. San Francisco, CA"
    )

chat_with_tools = tongyi_chat.bind_tools([GetWeather, GetPopulation])
ai_msg = chat_with_tools.invoke(
    "Which city is hotter today and which is bigger: LA or NY?"
)
ai_msg.tool_calls

[{'name': 'GetWeather',
  'args': {'location': 'Los Angeles, CA'},
  'id': 'call_3cef7dbd66aa41d8a716e6'}]

In [7]:
from typing import Optional

from langchain_core.pydantic_v1 import BaseModel, Field


class Joke(BaseModel):
    '''Joke to tell user.'''

    setup: str = Field(description="The setup of the joke")
    punchline: str = Field(description="The punchline to the joke")
    rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10")


structured_chat = tongyi_chat.with_structured_output(Joke)
structured_chat.invoke("Tell me a joke about cats")


Joke(setup='Why did the cat sit on the computer?', punchline='Because he wanted to keep an eye on the mouse.', rating=None)

In [8]:
ai_msg = tongyi_chat.invoke(messages)
ai_msg.response_metadata

{'token_usage': {'completion_tokens': 32,
  'prompt_tokens': 30,
  'total_tokens': 62,
  'completion_tokens_details': None,
  'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}},
 'model_name': 'qwen-plus',
 'system_fingerprint': None,
 'finish_reason': 'stop',
 'logprobs': None}

In [10]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("Alibaba-NLP/gte-Qwen2-1.5B-instruct", trust_remote_code=True) #7.1G
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:  43%|####3     | 3.81G/8.80G [00:00<?, ?B/s]

OSError: Consistency check failed: file should be of size 4994888704 but has size 8801219584 (model-00001-of-00002.safetensors).
This is usually due to network issues while downloading the file. Please retry with `force_download=True`.

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("BAAI/bge-m3", trust_remote_code=True)  #2.3G
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


ModuleNotFoundError: No module named 'sentence_transformers'

: 

: 

: 

: 

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", trust_remote_code=True) #500M
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


ModuleNotFoundError: No module named 'sentence_transformers'

: 

: 

: 

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", trust_remote_code=True) #100M
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


ModuleNotFoundError: No module named 'sentence_transformers'

: 

: 

: 

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True) #550M
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


ModuleNotFoundError: No module named 'sentence_transformers'

: 

: 

: 

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1", trust_remote_code=True) #670M
# In case you want to reduce the maximum length:
model.max_seq_length = 8192

queries = [
    "how much protein should a female eat",
    "summit define",
]
documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
]

query_embeddings = model.encode(queries, prompt_name="query")
document_embeddings = model.encode(documents)

scores = (query_embeddings @ document_embeddings.T) * 100
print(scores.tolist())


ModuleNotFoundError: No module named 'sentence_transformers'

: 

: 

: 