In [1]:
import numpy as np
import pandas as pd


In [2]:
def compute_deviation(pred, actual, offset):
    # 通用偏差公式：deviation = |pred - actual| / (actual + offset)
    return np.abs(pred - actual) / (actual + offset)


In [3]:
def compute_precision_row(row):
    # 计算单条指标
    # Deviation_forward = |Forward_pred - Forward_actual| / (Forward_actual + 5)
    deviation_forward = compute_deviation(row['pred_forward_count'], row['forward_count'], 5)

    # Deviation_comment = |Comment_pred - Comment_actual| / (Comment_actual + 3)
    deviation_comment = compute_deviation(row['pred_comment_count'], row['comment_count'], 3)

    # Deviation_like = |Like_pred - Like_actual| / (Like_actual + 3)
    deviation_like = compute_deviation(row['pred_like_count'], row['like_count'], 3)
    
    # Precision_i = 1 - 0.5 × Deviation_forward - 0.25 × Deviation_comment - 0.25 × Deviation_like
    precision = 1 - 0.5 * deviation_forward - 0.25 * deviation_comment - 0.25 * deviation_like
    
    return precision, deviation_forward, deviation_comment, deviation_like


In [4]:
def evaluate_model(df):
    # 计算每条微博的精度和偏差
    df[['precision', 'dev_forward', 'dev_comment', 'dev_like']] = df.apply(
        compute_precision_row, axis=1, result_type='expand'
    )

    # interaction_i = forward + comment + like，且上限为 100
    interaction = df['forward_count'] + df['comment_count'] + df['like_count']
    interaction_capped = interaction.clip(upper=100)
    weight = interaction_capped + 1

    # sgn(precision_i - 0.8)，大于0为1，否则为0
    sign_term = (df['precision'] - 0.8 > 0).astype(int)

    # precision = ∑(weight × sign_term) / ∑(weight)
    weighted_score = np.sum(weight * sign_term) / np.sum(weight)

    return {
        'weighted_precision_score': weighted_score,
        'mean_precision': df['precision'].mean(),
        'df_with_metrics': df
    }


In [5]:
"""
input: df(uid, mid, forward_count, comment_count, like_count, pred_forward_count, pred_comment_count, pred_like_count)
output: {'weighted_precision_score': float,
        'mean_precision': float,
        'df_with_metrics': df(df + precision, dev_forward, dev_comment, dev_like)}
"""


"\ninput: df(uid, mid, forward_count, comment_count, like_count, pred_forward_count, pred_comment_count, pred_like_count)\noutput: {'weighted_precision_score': float,\n        'mean_precision': float,\n        'df_with_metrics': df(df + precision, dev_forward, dev_comment, dev_like)}\n"

In [31]:
rst_path = './save/predictions_20250508_221653.csv'
df = pd.read_csv(rst_path)


In [32]:
message = evaluate_model(df)
print(message['weighted_precision_score'], message['mean_precision'])
message['df_with_metrics'].to_csv('./save/predictions_20250508_221653_lgb_eval.csv')

0.3694254454730708 0.9168242881648407
