In [1]:
import os
import openai
import sys
from dotenv import load_dotenv, find_dotenv, set_key

In [2]:
# 当前windows
# 获取当前的 Conda 环境路径
conda_env_path = os.environ.get('CONDA_PREFIX')

# ".env" 文件的绝对路径
dotenv_path = os.path.join(conda_env_path, '.env')

# 加载 ".env" 文件
_ = load_dotenv(dotenv_path, verbose=True)
openai.api_key = os.environ['OPENAI_API_KEY']

# Introduction to Chains

# Chains and Why They Are Used

The chains are responsible for creating an end-to-end pipeline for using the language models. They will join the model, prompt, memory, parsing output, and debugging capability and provide an easy-to-use interface. A chain will 1) receive the user’s query as an input, 2) process the LLM’s response, and lastly, 3) return the output to the user.

## LLMChain

In [3]:
from langchain import PromptTemplate, OpenAI, LLMChain

prompt_template = "What is a word to replace the following: {word}?"

# Set the "OPENAI_API_KEY" environment variable before running following line.
llm = OpenAI(model_name="text-davinci-003", temperature=0)

llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate.from_template(prompt_template)
)

In [4]:
llm_chain("artificial")

{'word': 'artificial', 'text': '\n\nSynthetic'}

In [5]:
input_list = [
    {"word": "artificial"},
    {"word": "intelligence"},
    {"word": "robot"}
]

llm_chain.apply(input_list)

[{'text': '\n\nSynthetic'}, {'text': '\n\nWisdom'}, {'text': '\n\nAutomaton'}]

In [6]:
llm_chain.generate(input_list)

LLMResult(generations=[[Generation(text='\n\nSynthetic', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\n\nWisdom', generation_info={'finish_reason': 'stop', 'logprobs': None})], [Generation(text='\n\nAutomaton', generation_info={'finish_reason': 'stop', 'logprobs': None})]], llm_output={'token_usage': {'total_tokens': 46, 'completion_tokens': 13, 'prompt_tokens': 33}, 'model_name': 'text-davinci-003'}, run=RunInfo(run_id=UUID('6e8e83d4-5352-429f-b390-42bd0d139643')))

In [7]:
prompt_template = "Looking at the context of '{context}'. What is an appropriate word to replace the following: {word}?"

llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate(template=prompt_template, input_variables=["word", "context"]))

llm_chain.predict(word="fan", context="object")
# or llm_chain.run(word="fan", context="object")

'\n\nventilator'

In [8]:
llm_chain.predict(word="fan", context="humans")
# or llm_chain.run(word="fan", context="humans")

'\n\nAdmirer'

## Parsers

In [9]:
from langchain.output_parsers import CommaSeparatedListOutputParser

output_parser = CommaSeparatedListOutputParser()
template = """List all possible words as substitute for 'artificial' as comma separated."""

llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate(template=template, output_parser=output_parser, input_variables=[]),
    output_parser=output_parser)

llm_chain.predict()

['Synthetic',
 'Manufactured',
 'Imitation',
 'Fabricated',
 'Fake',
 'Simulated',
 'Artificial Intelligence',
 'Automated',
 'Constructed',
 'Programmed',
 'Mechanical',
 'Processed',
 'Algorithmic',
 'Generated.']

## Conversational Chain (Memory)

In [10]:
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory

output_parser = CommaSeparatedListOutputParser()
conversation = ConversationChain(
    llm=llm,
    memory=ConversationBufferMemory()
)

conversation.predict(input="List all possible words as substitute for 'artificial' as comma separated.")

' Synthetic, robotic, manufactured, simulated, computerized, programmed, man-made, fabricated, contrived, and artificial.'

In [11]:
conversation.predict(input="And the next 4?")

' Automated, cybernetic, mechanized, and engineered.'

## Sequential Chain

In [None]:
from langchain.chains import SimpleSequentialChain

overall_chain = SimpleSequentialChain(chains=[chain_one, chain_two])

## Debug

In [12]:
template = """List all possible words as substitute for 'artificial' as comma separated.

Current conversation:
{history}

{input}"""

conversation = ConversationChain(
    llm=llm,
    prompt=PromptTemplate(template=template, input_variables=["history", "input"], output_parser=output_parser),
    memory=ConversationBufferMemory(),
    verbose=True)

conversation.predict(input="")



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mList all possible words as substitute for 'artificial' as comma separated.

Current conversation:


[0m

[1m> Finished chain.[0m


'Synthetic, Imitation, Manufactured, Fabricated, Simulated, Fake, Artificial, Constructed, Computerized, Programmed'

## Custom Chain

In [13]:
from langchain.chains import LLMChain
from langchain.chains.base import Chain

from typing import Dict, List


class ConcatenateChain(Chain):
    chain_1: LLMChain
    chain_2: LLMChain

    @property
    def input_keys(self) -> List[str]:
        # Union of the input keys of the two chains.
        all_input_vars = set(self.chain_1.input_keys).union(set(self.chain_2.input_keys))
        return list(all_input_vars)

    @property
    def output_keys(self) -> List[str]:
        return ['concat_output']

    def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
        output_1 = self.chain_1.run(inputs)
        output_2 = self.chain_2.run(inputs)
        return {'concat_output': output_1 + output_2}

In [14]:
prompt_1 = PromptTemplate(
    input_variables=["word"],
    template="What is the meaning of the following word '{word}'?",
)
chain_1 = LLMChain(llm=llm, prompt=prompt_1)

prompt_2 = PromptTemplate(
    input_variables=["word"],
    template="What is a word to replace the following: {word}?",
)
chain_2 = LLMChain(llm=llm, prompt=prompt_2)

concat_chain = ConcatenateChain(chain_1=chain_1, chain_2=chain_2)
concat_output = concat_chain.run("artificial")
print(f"Concatenated output:\n{concat_output}")

Concatenated output:


Artificial means something that is not natural or made by humans, but rather created or produced by artificial means.

Synthetic


# Create a YouTube Video Summarizer Using Whisper and LangChain 

![图片描述](pics/youtube_video_summarizer.webp)

## Installations:

In [17]:
# !pip install -q yt_dlp
# !pip install -q git+https://github.com/openai/whisper.git # 该方法失败，下面的方法可以成功

In [3]:
!git config --global http.proxy http://127.0.0.1:7890
!git config --global https.proxy http://127.0.0.1:7890

In [9]:
!git clone https://github.com/openai/whisper.git E:\LocalWork\PyProjects\LangChain-Vector\whisper\

Cloning into 'E:\LocalWork\PyProjects\LangChain-Vector\whisper'...


In [5]:
!pip install ../Resource/whisper --user

Looking in indexes: https://pypi.org/simple/
Processing e:\localwork\pyprojects\resource\whisper
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml): started
  Building wheel for openai-whisper (pyproject.toml): finished with status 'done'
  Created wheel for openai-whisper: filename=openai_whisper-20230314-py3-none-any.whl size=807707 sha256=73fded196742882a7aab4a8accf7c8272f617c956af5fdd709345b686fb119f7
  Stored in directory: C:\Users\Administrator\AppData\Local\Temp\pip-ephem-wheel-cache-303hutak\wheels\66\e7\1d\2a7d9e2cebf9d7bacf6cd1b79087b5f7904b9c0b649f08c21b
Successfully built openai-whisper
Install



In [6]:
import yt_dlp

def download_mp4_from_youtube(url):
    # Set the options for the download
    filename = 'lecuninterview.mp4'
    ydl_opts = {
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',
        'outtmpl': filename,
        'quiet': True,
    }

    # Download the video file
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        result = ydl.extract_info(url, download=True)

url = "https://www.youtube.com/watch?v=05w4fbYqbmU&ab_channel=EyeonAI"
download_mp4_from_youtube(url)

                                                                            

## Now it’s time for Whisper!

In [7]:
import whisper

model = whisper.load_model("base")
result = model.transcribe("lecuninterview.mp4", fp16=False)
print(result['text'])



 Hi, I'm Craig Smith and this is I on AI. This week I speak with Bratine Saaha, the head of Amazon's machine learning services. We talked about Amazon's growing dominance in model building and deploying AI about the company's SageMaker platform and whether anyone can compete with the behemoth. Before we begin, I want to thank our sponsor ClearML, the MLOP solution. You can check them out at clear.ml.com. In the meantime, I hope you find my conversation with Bratine as interesting as I did. Maybe start by describing your background when you came to Amazon and what you're doing at Amazon now. At Amazon, I now lead all of our AI and machine learning services, one of which is H. What we have the broadest and deepest set of capabilities. And then Brad, I was at NVIDIA, VP of Software Infrastructure, I did my PhD at Yale University and then also went to Harvard Business School. Been in drugs, been on the product side, been on the business side and now at Amazon leading all of the AI and mach

In [9]:
model = whisper.load_model("base")
result = model.transcribe("lecuninterview.mp4", fp16=False)
print(result['text'])

# options = whisper.DecodingOptions(language= 'en', fp16=False)
# result = whisper.decode(model, "lecuninterview.mp4", options)
# print(result.text)

 Hi, I'm Craig Smith and this is I on AI. This week I speak with Bratine Saaha, the head of Amazon's machine learning services. We talked about Amazon's growing dominance in model building and deploying AI about the company's SageMaker platform and whether anyone can compete with the behemoth. Before we begin, I want to thank our sponsor ClearML, the MLOP solution. You can check them out at clear.ml.com. In the meantime, I hope you find my conversation with Bratine as interesting as I did. Maybe start by describing your background when you came to Amazon and what you're doing at Amazon now. At Amazon, I now lead all of our AI and machine learning services, one of which is H. What we have the broadest and deepest set of capabilities. And then Brad, I was at NVIDIA, VP of Software Infrastructure, I did my PhD at Yale University and then also went to Harvard Business School. Been in drugs, been on the product side, been on the business side and now at Amazon leading all of the AI and mach

In [10]:
with open ('text.txt', 'w') as file:  
    file.write(result['text'])

## Summarization with LangChain

In [11]:
from langchain import OpenAI, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain

llm = OpenAI(model_name="text-davinci-003", temperature=0)



In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"]
)

In [13]:
from langchain.docstore.document import Document

with open('text.txt') as f:
    text = f.read()

texts = text_splitter.split_text(text)
docs = [Document(page_content=t) for t in texts[:4]]

In [14]:
from langchain.chains.summarize import load_summarize_chain
import textwrap

chain = load_summarize_chain(llm, chain_type="map_reduce")

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)

  Craig Smith interviews Bratine Saaha, the head of Amazon's machine learning services, about
Amazon's dominance in model building and deploying AI, their SageMaker platform, and whether anyone
can compete with them. Amazon SageMaker provides pre-trained models and the ability to build models
from scratch or from open source. It is possible to do all machine learning on SageMaker, or to do
part of it on SageMaker and the rest elsewhere. It is important to be within the Amazon Cloud
ecosystem for interoperability.


The `textwrap` library in Python provides a convenient way to wrap and format plain text by adjusting line breaks in an input paragraph. It is particularly useful when displaying text within a limited width, such as in console outputs, emails, or other formatted text displays. The library includes convenience functions like `wrap`, `fill`, and `horten`, as well as the `TextWrapper` class that handles most of the work. If you’re curious, I encourage you to follow this link and find out more, as there are other functions in the `textwrap` library that can be useful depending on your needs.

In [15]:
print(chain.llm_chain.prompt.template)

Write a concise summary of the following:


"{text}"


CONCISE SUMMARY:


In [16]:
prompt_template = """Write a concise bullet point summary of the following:


{text}


CONSCISE SUMMARY IN BULLET POINTS:"""

BULLET_POINT_PROMPT = PromptTemplate(template=prompt_template, 
                        input_variables=["text"])

In [17]:
chain = load_summarize_chain(llm, 
                             chain_type="stuff", 
                             prompt=BULLET_POINT_PROMPT)

output_summary = chain.run(docs)

wrapped_text = textwrap.fill(output_summary, 
                             width=1000,
                             break_long_words=False,
                             replace_whitespace=False)
print(wrapped_text)


- Craig Smith speaks with Bratine Saaha, head of Amazon's machine learning services
- Discussion focuses on Amazon's dominance in model building and deploying AI, and the SageMaker platform
- Bratine's background includes VP of Software Infrastructure at NVIDIA, PhD from Yale University, and Harvard Business School
- Amazon's services are divided into three tiers: customers building their own ML infrastructure and models, customers using Amazon's ML infrastructure to build models, and customers using pre-trained models
- Interoperability within Amazon's ecosystem is important, but customers can also use other tools like TensorFlow


In [18]:
chain = load_summarize_chain(llm, chain_type="refine")

output_summary = chain.run(docs)
wrapped_text = textwrap.fill(output_summary, width=100)
print(wrapped_text)

  Craig Smith interviews Bratine Saaha, the head of Amazon's machine learning services, about
Amazon's dominance in model building and deploying AI, their SageMaker platform, and whether anyone
can compete with them. Bratine has a background in software infrastructure, having done his PhD at
Yale University and Harvard Business School, and now leads all of Amazon's AI and machine learning
services. Amazon's machine learning services are platforms like SageMaker with various tools, which
are tailored to three customer personas. The first set of customers are given optimized
infrastructure to build and deploy their own machine learning models. The second set of customers
are given the ML infrastructure to focus on building the machine learning models, which is what
SageMaker provides. The third set of customers are given pre-trained machine learning models as well
as APIs to infuse intelligence into applications such as natural language processing, document
processing, and computer visio

The `'refine'` summarization chain in LangChain provides a flexible and iterative approach to generating summaries, allowing you to customize prompts and provide additional context for refining the output. This method can result in more accurate and context-aware summaries compared to other chain types like `'stuff'` and `'map_reduce'`.