## 文件头，准备各类API

In [None]:
import os
import json
from typing import List
import shutil

from alibabacloud_sls20201230.client import Client as Sls20201230Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_sls20201230 import models as sls_20201230_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
import os
import pandas as pd
from odps import ODPS, DataFrame
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor
import concurrent.futures
import argparse
import logging

# Configure the logging
logging.basicConfig(
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    datefmt="%Y-%m-%d %H:%M:%S",
)

ALIBABA_CLOUD_ACCESS_KEY_ID = os.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")
ALIBABA_CLOUD_ACCESS_KEY_SECRET = os.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")
FEISHU_XGPT_APP_SECRET = os.getenv("FEISHU_XGPT_APP_SECRET")
AZURE_GPT4O_API_KEY = os.getenv("AZURE_GPT4O_API_KEY")
AZURE_API_KEY = os.getenv("AZURE_API_KEY")
CALL_AI_SERVICE = os.getenv("CALL_AI_SERVICE", "true")

ds_to_run = datetime.now().strftime("%Y-%m-%d 00:00:00")
ds_to_run = datetime.strptime(ds_to_run, "%Y-%m-%d 00:00:00") - timedelta(days=1)
ds_to_run = ds_to_run.strftime("%Y%m%d")

parser = argparse.ArgumentParser()
parser.add_argument(
    "--ds_to_run",
    default=ds_to_run,
    help="指定跑哪一天的数据，格式: 20250520",
)
parser.add_argument(
    "--ACCESS_KEY_ID",
    default=ALIBABA_CLOUD_ACCESS_KEY_ID,
    help="ALIBABA_CLOUD_ACCESS_KEY_ID",
)
parser.add_argument(
    "--ACCESS_KEY_SECRET",
    default=ALIBABA_CLOUD_ACCESS_KEY_SECRET,
    help="ALIBABA_CLOUD_ACCESS_KEY_SECRET",
)
parser.add_argument(
    "--FEISHU_XGPT_APP_SECRET",
    default=FEISHU_XGPT_APP_SECRET,
    help="飞书的FEISHU_XGPT_APP_SECRET",
)
parser.add_argument(
    "--AZURE_API_KEY",
    default=AZURE_API_KEY,
    help="AZURE_API_KEY",
)
parser.add_argument(
    "--AZURE_GPT4O_API_KEY", default=AZURE_GPT4O_API_KEY, help="AZURE_GPT4O_API_KEY"
)

args, unknown = parser.parse_known_args()

logging.info(f"Parsed args: {args}")
logging.info(f"Unknown args: {unknown}")
logging.info(args)
ds_to_run = args.ds_to_run

DATA_PATH = f"./data/{ds_to_run}"

logging.info(f"ds_to_run:{ds_to_run}")

default_segment_duration = int(os.getenv("SEGMENT_DURATION", "45"))

odps = ODPS(
    args.ACCESS_KEY_ID,
    args.ACCESS_KEY_SECRET,
    project="summerfarm_ds_dev",
    endpoint="http://service.cn-hangzhou.maxcompute.aliyun.com/api",
)

config = open_api_models.Config(
    access_key_id=args.ACCESS_KEY_ID,
    access_key_secret=args.ACCESS_KEY_SECRET,
)
# Endpoint 请参考 https://api.aliyun.com/product/Sls
config.endpoint = f"cn-hangzhou.log.aliyuncs.com"
sls_client = Sls20201230Client(config)


def create_dir_if_not_exist(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)


create_dir_if_not_exist(DATA_PATH)


def get_odps_sql_result_as_df(sql) -> pd.DataFrame:
    logging.info(f"ODPS SQL:\n{sql}")
    instance = odps.execute_sql(
        sql,
        hints={"odps.sql.hive.compatible": True, "odps.sql.type.system.odps2": True},
    )
    instance.wait_for_success()
    pd_df = None
    with instance.open_reader(tunnel=True) as reader:
        # type of pd_df is pandas DataFrame
        pd_df = reader.to_pandas()

    if pd_df is not None:
        logging.info(f"columns:{pd_df.columns}")
        return pd_df
    return None

In [None]:
df=get_odps_sql_result_as_df("""
SELECT  a.id as after_sale_order_id
        ,a.sku,a.m_id,a.order_no,a.add_time,a.update_time
        ,b.*
FROM    summerfarm_tech.ods_after_sale_order_df a
LEFT JOIN summerfarm_tech.ods_after_sale_proof_df b
ON      b.ds = MAX_PT('summerfarm_tech.ods_after_sale_proof_df')
AND     b.after_sale_order_no = a.after_sale_order_no
WHERE   a.ds = MAX_PT('summerfarm_tech.ods_after_sale_order_df')
AND     a.sku = 'Q001L01S001'
AND     a.add_time BETWEEN '2024-01-01 00:00:00' AND '2025-01-01 00:00:00'
;
""")



In [None]:
def get_pic_link(proof_pic):
    if proof_pic is None or len(proof_pic) <= 1:
        return proof_pic
    proof_pic = f"{proof_pic}".split(";")
    if len(proof_pic) <= 0:
        return ""
    return "\t,\t".join([f"https://azure.summerfarm.net/{pic}" for pic in proof_pic])


df["售后图片"] = df["proof_pic"].apply(get_pic_link)

df.to_csv(f"./Q001L01S001_2024售后数据.csv", index=False)