In [None]:
from typing import Any, Dict, List, Optional
from langchain import BasePromptTemplate, LLMChain
from langchain.schema.language_model import BaseLanguageModel
from langchain.callbacks.manager import (
    AsyncCallbackManagerForChainRun,
    CallbackManagerForChainRun,
)
from pydantic import Extra
from langchain.schema import LLMResult
from langchain.chains.base import Chain
from langchain.prompts import PromptTemplate

In [None]:
class QuestionGeneratorChain(LLMChain):
    """
    An example of a custom chain.
    """

    prompt: BasePromptTemplate
    """Prompt object to use."""
    llm: BaseLanguageModel
    output_key: str = "text"  #: :meta private:

    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    @property
    def input_keys(self) -> List[str]:
        """Will be whatever keys the prompt expects.

        :meta private:
        """
        return self.prompt.input_variables

    @property
    def output_keys(self) -> List[str]:
        """Will always return text key.

        :meta private:
        """
        return [self.output_key]

    def generate(
        self,
        input_list: List[Dict[str, Any]],
        run_manager: Optional[CallbackManagerForChainRun] = None,
    ) -> LLMResult:
        prompts, stop = self.prep_prompts(input_list, run_manager=run_manager)
        return self.llm.generate_prompt(
            prompts=prompts,
            stop=stop,
            callbacks=run_manager.get_child() if run_manager else None,
        )

    async def agenerate(
        self,
        input_list: List[Dict[str, Any]],
        run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
    ) -> LLMResult:
        prompts, stop = await self.aprep_prompts(input_list, run_manager=run_manager)
        return await self.llm.agenerate_prompt(
            prompts=prompts,
            stop=stop,
            callbacks=run_manager.get_child() if run_manager else None,
        )

    @property
    def _chain_type(self) -> str:
        return "dataset_genrator_chain"

    @classmethod
    def from_llm(
        cls,
        llm: BaseLanguageModel,
        **kwargs: Any,
    ) -> Chain:
        template = """文本内容
---------------------
{input}

说明
-------------------
- AI是一个人工智能大语言模型，Human代表人类，System代表系统角色。
- AI将听从System的命令
- Human和AI进行对话。
- Human提问是关于运势和命理相关的问题
- AI从上面的文本内容中找到答案，根据System的设置命令，回答Human的问题。
- 请生成至少10个Human和AI的对话内容。

对话内容
-------------------
System:- 你是一个精通周易、中国生肖、算命、解梦、风水堪舆、西方塔罗牌和占星术的玄学大师。正在与人类进行对话。
- 你精通各种玄学和命理知识，能够帮助人类提供命理方面的解读和建议。
- 你本身具有很神秘的性格，外表看起来是一个睿智的老者。
- 你一直用占卜师带有神秘感的口吻与人类交谈。
"""
        prompt = PromptTemplate.from_template(template=template)
        return cls(
            llm=llm,
            prompt=prompt,
            **kwargs,
        )

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain import OpenAI
import os
from dotenv import load_dotenv

load_dotenv(dotenv_path="env")


gpt35_1 = OpenAI(temperature=0.1, max_tokens=2048, verbose=True)
gpt35_9 = OpenAI(temperature=0.9, max_tokens=2048, verbose=True)
chat_gpt35_1 = ChatOpenAI(temperature=0.1, verbose=True)
chat_gpt35_9 = ChatOpenAI(temperature=0.9, verbose=True)
gpt4 = ChatOpenAI(model_name="gpt-4", temperature=0.9, verbose=True)

import pandas as pd

filename = os.getenv("FILE_NAME")
df = pd.read_csv(
    f"s3://sagemaker-automated-execution-034700280673-us-east-1/data_sample/{filename}.csv"
)
gen_question_chain = QuestionGeneratorChain.from_llm(llm=gpt4, verbose=True)
col_q_data = []
for content in df["n_content"]:
    res = await gen_question_chain.arun(input=content)
    res_list = res.split("\n")
    quetions = [q[6:].strip() for q in res_list if q.startswith("Human:")]
    col_q_data.append("\n".join(quetions))
df["questions"] = pd.Series(col_q_data)
df.to_csv(
    f"s3://sagemaker-automated-execution-034700280673-us-east-1/data_sample/{filename}_q.csv",
    index=False,
)