### Import data

In [1]:
import pandas as pd
df = pd.read_csv('./data/qwen_1by1+predictions.csv')
print(df.columns)
# 1641:
# 2388:
# 2414:
# 2426:
# 2627:
# 2878:
# 3362

Index(['topic', 'stance', 'argument', 'key_point', 'embedding(0_1_2_3)',
       'embedding(4_7_8_15)', 'embedding(5_6_9_10)', 'embedding(11_12_13_14)',
       'embedding(16_17_18_19)', 'embedding(20_21_22_23)',
       'embedding(24_25_26_27)', 'avg_embedding'],
      dtype='object')


In [2]:
print(df.at[2388,'key_point'])

Libertarianism promotes human potential by granting individuals自由 to pursue their expertise unhindered by financial constraints or irrational regulations.


## Predict via qwen-max

In [3]:
import random
from http import HTTPStatus
import dashscope
import yaml

In [4]:
with open("../conf/index.yaml") as f:
    credentials = yaml.safe_load(f)
qwen_api_key = credentials['environment_variables']['QWEN_DASHSCOPE_API_KEY']

In [5]:
import re
def contains_chinese(text):
    # Regular expression pattern for Chinese characters
    chinese_pattern = re.compile('[\u4e00-\u9fff]+')

    # Search for Chinese characters in the text
    if chinese_pattern.search(text):
        return True
    else:
        return False

In [6]:
def generating(topic, argument):
    try:
        messages = [
            {
                'role': 'system',
                'content': f"""
            You need to do key point analysis on the query of user and
        generate a key point in one sentence from it based on the topic "{topic}" and the key point should be English . You should only return the key point. The return format is as followed: Key Point: ...
            """
            },
            {
                'role': 'user',
                'content': argument
            }
        ]
        dashscope.api_key = qwen_api_key
        response = dashscope.Generation.call(model="qwen-max",
                                   messages=messages,
                                   # 设置随机数种子seed，如果没有设置，则随机数种子默认为1234
                                   seed=random.randint(1, 10000),
                                   # 将输出设置为"message"格式
                                   result_format='message')
        if response.status_code == HTTPStatus.OK:
            output = response['output']["choices"][0]["message"]["content"]
            key_point = output.split("Key Point: ")[1].strip()
            print(response['output']["choices"][0]["message"]["content"])
            print(key_point)
            if contains_chinese(key_point):
                print("-----------------------------------------")
                return generating(topic,argument)
            else:
                print("New: "+key_point)
                return key_point
        else:
            print('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
                response.request_id, response.status_code,
                response.code, response.message
            ))
            return generating(topic,argument)
    except Exception as e:
        print(f"An error occurred: {e}. Retrying...")
        return generating(topic, argument)

In [None]:
import ast
for index, row in df.iloc[1641:1642].iterrows():
    print(str(index)+": ")
    topic = row['topic']
    argument = row['argument']
    result = generating(topic, argument)
    df.at[index, 'key_point'] = result

1641: 
Key Point: Abolishing intellectual property rights could jeopardize the livelihoods of creative individuals, potentially leading to a loss of their creative output as they转向 alternative paid occupations.
Abolishing intellectual property rights could jeopardize the livelihoods of creative individuals, potentially leading to a loss of their creative output as they转向 alternative paid occupations.
-----------------------------------------
Key Point: Abolishing intellectual property rights could jeopardize the livelihoods of creative individuals, potentially leading to a loss of their artistic contributions as they转向 other means of income.
Abolishing intellectual property rights could jeopardize the livelihoods of creative individuals, potentially leading to a loss of their artistic contributions as they转向 other means of income.
-----------------------------------------
Key Point: The absence of intellectual property rights could result in creative individuals being unable to financi

In [196]:
df.to_csv('./data/1by1+predictions.csv', index=False)
print("Add new data successfully!!!!!")

Add new data successfully!!!!!


### Postprocessing with chinese character problem

In [244]:
for index, row in df.iloc[:7236].iterrows():
    # print(str(index)+": ")
    if contains_chinese(df.at[index,'key_point']):
        print(str(index)+": ")
        # print("old: "+df.at[index,'key_point'])
        # topic = row['topic']
        # argument = row['argument']
        # result = generating(topic, argument)
        # df.at[index, 'key_point'] = result
    # else: print("No")

1641: 
2388: 
2414: 
2426: 
2627: 
2878: 
3362: 


In [242]:
df.to_csv('./data/1by1+predictions.csv', index=False)
print("Add new data successfully!!!!!")

Add new data successfully!!!!!


In [7]:
for index, row in df.iterrows():
    # print(str(index)+": ")
    if contains_chinese(df.at[index,'key_point']):
        print(str(index)+": ")
        # print("old: "+df.at[index,'key_point'])
        # topic = row['topic']
        # argument = row['argument']
        # result = generating(topic, argument)
        # df.at[index, 'key_point'] = result
    # else: print("No")

1641: 
2388: 
2414: 
2426: 
2627: 
2878: 
3362: 
