In [None]:
import pandas as pd

# Display all columns
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)


# Save merged_markdown_result to a markdown file
import os
from datetime import datetime,timedelta

ds = (datetime.now() - timedelta(days=1)).strftime("%Y%m%d")

# Create a directory for the markdown files if it doesn't exist
output_dir = f"output_{ds}"
os.makedirs(output_dir, exist_ok=True)

In [None]:
# Import the base64 encoding library.
import base64, os, time
import logging

# Configure the logging
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

proxy_object = {"http": "http://127.0.0.1:8001", "https": "http://127.0.0.1:8001"}


from openai import AzureOpenAI

client_gpt4o = AzureOpenAI(
    api_version="2024-03-01-preview",
    azure_endpoint="https://xm-ai-us2.openai.azure.com",
    api_key=os.getenv("AZURE_GPT4O_API_KEY", ""),
)

client_gpt4o_mini = AzureOpenAI(
    api_version="2024-03-01-preview",
    azure_endpoint="https://xm-ai-us.openai.azure.com",
    api_key=os.getenv("AZURE_GPT4O_MINI_API_KEY", ""),
)


def call_azure_openai(
    messages=[], retrying=1, is_gpt4o=False, json=True, max_tokens=16384
) -> (str, bool):
    if retrying < 0:
        return "超过了最大重试次数", False
    completion = None
    ## gpt3.5:  gpt-35-turbo-16k,
    ## got4o:   gpt-4o
    ## got4o-mini:   gpt-4o-mini
    model = "gpt-4o-mini"
    client_to_use = client_gpt4o_mini
    if is_gpt4o:
        logging.info(f"using GPT-4o...:{messages}")
        model = "gpt-4o"
        client_to_use = client_gpt4o
    try:
        completion = client_to_use.chat.completions.create(
            model=model,
            temperature=0.1,
            max_tokens=max_tokens,
            messages=messages,
            response_format={"type": "json_object"} if json else {"type": "text"},
        )
        response = completion.choices[0].message.content
        if (
            len(completion.choices) <= 0
            or f"{completion.choices[0].finish_reason}" == "content_filter"
        ):
            return f"azure过滤了本次请求:{completion.choices[0].to_dict()}", False
        if response is None:
            logging.info(f"azure API返回了异常:{completion.to_dict()}")
            time.sleep(10)
            return call_azure_openai(
                messages=messages,
                retrying=retrying - 1,
                is_gpt4o=is_gpt4o,
            )
        logging.info(f"total usage:{completion.usage}")
        return response, True
    except Exception as e:
        logging.info(
            f"请求azure接口报错了:{e}\n messages:{messages}, completion:{completion}"
        )
        if retrying <= 0 or "Error code: 400" in f"{e}":
            return f"{e}", False
        logging.info(f"重试中...{retrying}, messages:{messages}")
        return call_azure_openai(
            messages=messages,
            retrying=retrying - 1,
            is_gpt4o=is_gpt4o,
        )


def call_ai_api_to_get_extract_visit_info(visit_text=""):
    result = {}
    json_text, is_ok = call_azure_openai(
        is_gpt4o=False,
        messages=[
            {
                "role": "system",
                "content": [
                    {
                        "type": "text",
                        "text": """
用户会给发给你一系列销售员对客户的拜访记录，请你用JSON回答以下几个问题：
- 客户是否长时间未下单？
- 是否见到核心KP？
- 客户长时间未下单的原因？
- 本次拜访销售做了什么准备？
- 销售向客户推荐了哪些具体的活动？
- 销售向客户推荐了哪些具体的商品？
- 客户的主要采买渠道是？
- 客户不愿意当场下单的原因？
- 拜访记录完整性打分？（0-100分，100分表示非常完整，0分表示非常不完整）

**请注意，‘安佳’，‘铁塔’一般来说是商品名字，而不太可能是活动名字，活动名字一般带有‘专享’、‘清仓’、‘特价’、‘活动’等字样**
**请你完全基于销售员的拜访记录内容来回答以上问题，如果拜访内容中找不到问题的答案，请回答‘无’**
**请你用问题的标题做JSON的key，答案做value，比如：{"客户是否长时间未下单": "是"}**
""",
                    }
                ],
            },
            {
                "role": "user",
                "content": [{"type": "text", "text": visit_text}],
            },
        ],
    )

    logging.info(f"json_text:{json_text}, visit_text:{visit_text}")
    return json_text


# top2_df = all_df.head(2).copy()
from datetime import datetime

date_of_now = datetime.now().strftime("%Y-%m-%d")

In [None]:
import sys,os
# Expand the `~` to the full path and append it to `sys.path`
full_path = os.path.expanduser('~/Documents/github/aliyun-devops')
sys.path.append(full_path)

from odps_client import get_odps_sql_result_as_df
from datetime import datetime, timedelta

sql = f"""
SELECT  a.m_id 商户ID
        ,c.mname as 商户名
        ,a.admin_id
        ,a.admin_name AS 拜访人
        ,b.m1_name as M1负责人
        ,b.m2_name as M2负责人
        ,b.m3_name as M3负责人
        ,b.zone_name as 销售区域
        ,follow_up_way as 拜访方式
        ,condition as 拜访内容
        ,CASE   WHEN `status` = 0 THEN '未跟进'
                WHEN `status` = 1 THEN '已跟进'
                WHEN `status` = 2 THEN '已跟进且下单'
                WHEN `status` = 3 THEN '联系不上'
                WHEN `status` = 4 THEN '放弃跟进'
                WHEN `status` = 9 THEN '重置'
                ELSE '未知状态'
        END AS 拜访状态
        ,add_time as 拜访时间
        ,CASE   WHEN visit_objective = 0 THEN '拉新'
                WHEN visit_objective = 1 THEN '催月活'
                WHEN visit_objective = 2 THEN '客户维护'
                WHEN visit_objective = 3 THEN '拓品'
                WHEN visit_objective = 4 THEN '售后处理'
                WHEN visit_objective = 5 THEN '催省心送'
                ELSE '未知目的'
        END AS 拜访目的
        ,CASE   WHEN visit_type = 0 THEN '普通拜访'
                WHEN visit_type = 1 THEN '陪访'
                ELSE '未知类型'
        END AS 拜访类型
        ,'https://azure.summerfarm.net/'|| a.follow_up_pic as 拜访图片
        ,a.ds
        ,c.last_order_time
        ,datediff(getdate(),c.last_order_time,'dd') as 距离上次下单天数
FROM    summerfarm_tech.ods_follow_up_record_di a
INNER JOIN summerfarm_tech.dim_bd_df b
ON      b.ds = MAX_PT("summerfarm_tech.dim_bd_df")
AND     a.admin_id = b.bd_id
LEFT JOIN summerfarm_tech.ods_merchant_df c
ON c.m_id=a.m_id AND c.ds=MAX_PT('summerfarm_tech.ods_merchant_df')
WHERE   a.ds = '{ds}'
;
"""

bd_follow_up_record_df = get_odps_sql_result_as_df(sql=sql)
bd_follow_up_record_df[["拜访人", "拜访内容", "拜访状态", "拜访目的", "拜访类型", "拜访时间", "销售区域", "m1负责人", "m2负责人", "m3负责人", "拜访图片"]].to_csv(
    f"./{ds}_BD拜访记录_全部_{len(bd_follow_up_record_df)}条.csv", index=False
)
bd_follow_up_record_df.head(2)

In [None]:
system_prompt = {
    "role": "system",
    "content": [
        {
            "type": "text",
            "text": "你是一个资深销售主管，擅长分析销售员的客户拜访记录",
        }
    ],
}


def call_ai_api_to_get_insigns(city, csv_string=""):
    merged_markdown_result = ""
    text, is_ok = call_azure_openai(
        is_gpt4o=False,
        json=False,
        messages=[
            system_prompt,
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"""以下是你管理的团队销售员的客户拜访记录，作为销售主管，从中你发现了哪些值得注意的现象？
    将你发现的每一种现象按照重要程度倒序排列。请你列举数据以阐述其值得你关注的原因。
    **请你完全基于CSV的数据做分析，如果用户没有具体的反馈内容，请不要推测。这对公司来说非常重要，我们需要使用真实的客户反馈去调整经营策略**
    以下是CSV内容：\n\n{csv_string}""",
                    }
                ],
            },
        ],
    )

    if not is_ok:
        logging.info(f"call_ai_api_to_get_insigns failed: {text}")
        return ""

    merged_markdown_result = f"## {city}团队销售拜访记录AI分析\n\n{text}\n\n"

    text, is_ok = call_azure_openai(
        is_gpt4o=False,
        json=False,
        messages=[
            system_prompt,
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"""作为销售主管，请你分析在这些拜访记录中，有哪些具体的客户提到竞争对手的品比我们的价格低的？
                    请你列举出具体的客户名和商品名，以及竞争对手的名称和价格（如果有的话）。
                    **请你完全基于CSV的数据做分析，如果用户没有具体的反馈内容，请不要推测。这对公司来说非常重要，我们需要使用真实的客户反馈去调整经营策略**
                    以下是拜访记录CSV内容：\n\n{csv_string}""",
                    }
                ],
            },
        ],
    )

    merged_markdown_result = (
        f"{merged_markdown_result}## 竞争对手情况分析\n\n{text}\n\n"
    )

    text, is_ok = call_azure_openai(
        is_gpt4o=False,
        json=False,
        messages=[
            system_prompt,
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"""作为销售主管，请你分析客户长时间未下单的原因中，有哪些值得注意的现象？请你列举每种原因的占比，凸显出每个原因的重要程度。
                    **请你完全基于CSV的数据做分析，如果用户没有反馈具体的原因，请不要推测。这对公司来说非常重要，我们需要使用真实的客户反馈去调整经营策略**
                    以下是拜访记录CSV内容：\n\n{csv_string}""",
                    }
                ],
            },
        ],
    )

    merged_markdown_result = (
        f"{merged_markdown_result}## 长时间不下单原因分析\n\n{text}\n\n"
    )

    filename = f"{city}_销售团队拜访记录分析结果_{ds}.md"

    # Full path for the output file
    output_path = os.path.join(output_dir, filename)

    # Write the merged_markdown_result to the file
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(merged_markdown_result)

    print(f"Markdown file saved: {output_path}")

In [None]:
import json

keys = []


def extract_ai_result(ai_result, key):
    return json.loads(ai_result).get(key, "未知")


for city in bd_follow_up_record_df["销售区域"].unique():
    logging.info(f"开始处理:{city}")
    # Filter the dataframe for the current city
    city_df = bd_follow_up_record_df[bd_follow_up_record_df["销售区域"] == city]

    # Create a valid filename by replacing any characters that might be problematic in filenames
    safe_city_name = "".join(c if c.isalnum() or c in ("-", "_") else "_" for c in city)

    # Save the city's records to a CSV file
    filename = f"./{output_dir}/{safe_city_name}_{ds}_拜访记录.csv"
    city_df[
        ["拜访人", "商户名", "距离上次下单天数", "拜访目的", "拜访类型", "拜访内容"]
    ].to_csv(filename, index=False, encoding="utf-8-sig")

    print(f"Saved {len(city_df)} records for {city} to {filename}")

    city_df["AI分析"] = city_df["拜访内容"].apply(call_ai_api_to_get_extract_visit_info)
    city_df[
        ["销售区域", "拜访人", "m1负责人", "商户名", "距离上次下单天数", "AI分析"]
    ].to_csv(f"./{output_dir}/{safe_city_name}_{ds}_拜访记录_AI分析.csv", index=False)

    ai_analysis_df = city_df[
        ["销售区域", "拜访人", "m1负责人", "商户名", "距离上次下单天数", "AI分析"]
    ].copy()

    for index, row in ai_analysis_df.iterrows():
        ai_result = json.loads(row["AI分析"])
        if not keys:
            keys = list(ai_result.keys())
            logging.info(f"keys: {keys}")
            break

    for key in keys:
        ai_analysis_df[key] = ai_analysis_df["AI分析"].apply(
            lambda x: extract_ai_result(x, key)
        )

    display_keys = ["销售区域", "拜访人", "m1负责人", "商户名", "距离上次下单天数"]
    display_keys.extend(keys)
    ai_analysis_df[display_keys].to_csv(
        f"./{output_dir}/{safe_city_name}_{ds}_拜访记录_AI分析_展开.csv", index=False
    )

    csv_string = ai_analysis_df[display_keys].to_csv(index=False)

    print(f"{city}, \ncsv_string:{csv_string}")
    call_ai_api_to_get_insigns(csv_string=csv_string, city=safe_city_name)