In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.spatial import ConvexHull

def load_and_prepare_data(csv_path):
    """加载数据并构建三个核心指标"""
    # 1. 加载数据
    print(f"Attempting to load data from: {csv_path}")

    # 检查文件是否存在
    import os
    if not os.path.exists(csv_path):
        raise FileNotFoundError(f"File not found: {csv_path}")

    encodings = ['utf-8', 'latin-1', 'iso-8859-1', 'cp1252', 'windows-1252']
    df = None
    for encoding in encodings:
        try:
            df = pd.read_csv(csv_path, encoding=encoding, low_memory=False)
            print(f"Data loaded successfully with encoding: {encoding}")
            break
        except Exception as e:
            print(f"Failed with encoding {encoding}: {str(e)}")
            continue

    if df is None:
        raise ValueError("Could not load CSV file with any encoding")

    print(f"原始数据: {len(df)} 行, {len(df.columns)} 列")

    # 2. 构建三个核心指标
    # a. 初始吸引力: 使用活动前的吸引力预期评分 (attr1_1)
    df['pre_attraction'] = pd.to_numeric(df['attr1_1'], errors='coerce')

    # b. 对话质量: 使用对话后的综合评分平均值
    interaction_cols = ['attr', 'sinc', 'intel', 'fun', 'amb', 'shar', 'like']
    for col in interaction_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    df['interaction_score'] = df[interaction_cols].mean(axis=1, skipna=True)

    # c. 最终决策: 转换为数值 (0=No, 1=Yes)
    df['decision'] = pd.to_numeric(df['dec'], errors='coerce')

    # 3. 数据清洗：删除三个指标中有缺失值的行
    required_cols = ['pre_attraction', 'interaction_score', 'decision']
    plot_data = df[required_cols].dropna()

    print(f"有效数据行数: {len(plot_data)}")
    print(f"决策分布 - Yes: {len(plot_data[plot_data['decision'] == 1])}, No: {len(plot_data[plot_data['decision'] == 0])}")

    return plot_data

def normalize_to_barycentric(plot_data):
    """将三个指标归一化并转换为重心坐标"""

    plot_data = plot_data.copy()

    # 归一化初始吸引力和对话质量到0-1范围
    plot_data['pre_norm'] = (plot_data['pre_attraction'] - plot_data['pre_attraction'].min()) / \
                           (plot_data['pre_attraction'].max() - plot_data['pre_attraction'].min())
    plot_data['int_norm'] = (plot_data['interaction_score'] - plot_data['interaction_score'].min()) / \
                           (plot_data['interaction_score'].max() - plot_data['interaction_score'].min())

    # 决策已经是0-1
    plot_data['dec_norm'] = plot_data['decision']

    # 创建重心坐标：使用几何平均或其他方法来平衡三个因素
    # 为了让点更好地分布在三角形内部，我们使用一个不同的方法

    # 方法：将每个指标映射到一个0-1范围，然后使用几何方法
    # 来确保点落在三角形内部

    # 标准化到合适的范围
    pre_weight = plot_data['pre_norm'] * 0.8 + 0.1  # 0.1-0.9范围，避免边界
    int_weight = plot_data['int_norm'] * 0.8 + 0.1
    dec_weight = plot_data['dec_norm'] * 0.8 + 0.1

    # 使用归一化确保 a + b + c = 1
    total = pre_weight + int_weight + dec_weight
    plot_data['a'] = pre_weight / total  # 初始吸引力权重
    plot_data['b'] = int_weight / total  # 对话质量权重
    plot_data['c'] = dec_weight / total  # 最终决策权重

    return plot_data

def barycentric_to_cartesian(a, b, c):
    """
    将重心坐标转换为笛卡尔坐标
    等边三角形的三个顶点：
    A (初始吸引力): (0, 0)
    B (对话质量): (1, 0)
    C (最终决策): (0.5, √3/2)

    重心坐标 (a,b,c) 满足 a + b + c = 1
    转换为笛卡尔坐标的正确公式：
    x = b + c * 0.5
    y = c * (√3/2)
    """
    # 确保坐标在[0,1]范围内
    x = np.clip(b + c * 0.5, 0, 1)
    y = np.clip(c * (np.sqrt(3) / 2), 0, np.sqrt(3)/2)
    return x, y

def calculate_interaction_effect(a, b, c):
    """
    计算'对话改变决策'的程度
    使用三个指标之间的方差作为衡量标准：
    方差越大，表示三个指标差异越大，对话对决策的影响可能越大
    """
    values = np.array([a, b, c])
    variance = np.var(values)

    # 归一化到0-1范围（方差最大值是0.25，当两个值为0.5，一个值为0时）
    normalized_effect = min(variance / 0.25, 1.0)

    return normalized_effect

def create_triangle_plot_interactive(plot_data):
    """创建交互式三角交互图"""

    # 计算坐标
    coords = plot_data.apply(lambda row: barycentric_to_cartesian(
        row['a'], row['b'], row['c']), axis=1)
    plot_data = plot_data.copy()
    plot_data['x'] = [c[0] for c in coords]
    plot_data['y'] = [c[1] for c in coords]

    # 计算交互效果
    plot_data['interaction_effect'] = plot_data.apply(lambda row: calculate_interaction_effect(
        row['a'], row['b'], row['c']), axis=1)

    # 创建plotly图表
    fig = go.Figure()

    # Blue-pink color scheme from SCI journal style
    # Yes decisions: Blue tones (#104e8b - deep blue)
    yes_color = '#104e8b'
    # No decisions: Pink/red tones (#d89090 - pink)
    no_color = '#d89090'
    # Triangle frame: Deep blue
    frame_color = '#104e8b'
    
    # 绘制外围大三角形框架
    triangle_vertices = np.array([[0, 0], [1, 0], [0.5, np.sqrt(3)/2], [0, 0]])
    fig.add_trace(go.Scatter(
        x=triangle_vertices[:, 0],
        y=triangle_vertices[:, 1],
        mode='lines',
        line=dict(color=frame_color, width=4),
        name='Decision Space Boundary',
        showlegend=True
    ))

    # 分离数据
    yes_data = plot_data[plot_data['decision'] == 1]
    no_data = plot_data[plot_data['decision'] == 0]

    # Yes决策的点（蓝色系）
    # 创建蓝色渐变颜色方案：从浅蓝到深蓝
    blue_colorscale = [
        [0, '#afc3d8'],  # 浅蓝
        [0.5, '#5f89b1'],  # 中蓝
        [1, '#104e8b']  # 深蓝
    ]
    
    fig.add_trace(go.Scatter(
        x=yes_data['x'],
        y=yes_data['y'],
        mode='markers',
        marker=dict(
            size=yes_data['interaction_effect'] * 20 + 8,  # 点大小反映交互效果
            color=yes_data['interaction_effect'],
            colorscale=blue_colorscale,  # 蓝色渐变
            showscale=True,
            colorbar=dict(
                title="Interaction Effect<br>(Decision=Yes)",
                x=1.02,
                y=0.7,
                len=0.3,
                titlefont=dict(color='#104e8b')
            ),
            line=dict(color='white', width=1),
            opacity=0.8
        ),
        name='Decision = Yes',
        text=yes_data.apply(lambda row: f'Pre-attraction: {row["pre_attraction"]:.1f}<br>'
                                      f'Interaction: {row["interaction_score"]:.1f}<br>'
                                      f'Decision: Yes<br>'
                                      f'Effect: {row["interaction_effect"]:.3f}', axis=1),
        hovertemplate='%{text}<extra></extra>'
    ))

    # No决策的点（粉色系）
    # 创建粉色渐变颜色方案：从浅粉到深粉/红
    pink_colorscale = [
        [0, '#f2dada'],  # 浅粉
        [0.5, '#e5b5b5'],  # 中粉
        [1, '#d89090']  # 深粉
    ]
    
    fig.add_trace(go.Scatter(
        x=no_data['x'],
        y=no_data['y'],
        mode='markers',
        marker=dict(
            size=no_data['interaction_effect'] * 20 + 8,
            color=no_data['interaction_effect'],
            colorscale=pink_colorscale,  # 粉色渐变
            showscale=True,
            colorbar=dict(
                title="Interaction Effect<br>(Decision=No)",
                x=1.02,
                y=0.3,
                len=0.3,
                titlefont=dict(color='#d89090')
            ),
            line=dict(color='white', width=1),
            opacity=0.8
        ),
        name='Decision = No',
        text=no_data.apply(lambda row: f'Pre-attraction: {row["pre_attraction"]:.1f}<br>'
                                     f'Interaction: {row["interaction_score"]:.1f}<br>'
                                     f'Decision: No<br>'
                                     f'Effect: {row["interaction_effect"]:.3f}', axis=1),
        hovertemplate='%{text}<extra></extra>'
    ))

    # 添加顶点标签
    fig.add_trace(go.Scatter(
        x=[0, 1, 0.5],
        y=[-0.05, -0.05, np.sqrt(3)/2 + 0.05],
        mode='text',
        text=['Pre-attraction', 'Interaction<br>Quality', 'Final<br>Decision'],
        textposition='middle center',
        textfont=dict(size=14, color='#104e8b'),  # 深蓝色标签
        showlegend=False,
        hoverinfo='skip'
    ))

    # 更新布局
    fig.update_layout(
        title=dict(
            text='Triangle Interaction Plot: Pre-attraction, Interaction Quality & Final Decision<br>'
                 '<sup>Relationship between Initial Attraction, Conversation Quality and Final Choice</sup>',
            x=0.5,
            font=dict(size=16, color='#104e8b')  # 深蓝色标题
        ),
        xaxis=dict(
            range=[-0.1, 1.1],
            autorange=False,
            showgrid=False,
            showticklabels=False,
            showline=False
        ),
        yaxis=dict(
            range=[-0.1, np.sqrt(3)/2 + 0.1],
            autorange=False,
            showgrid=False,
            showticklabels=False,
            showline=False,
            scaleanchor="x",
            scaleratio=1
        ),
        width=1200,
        height=900,
        showlegend=True,
        legend=dict(
            x=1.02,
            y=0.5,
            bgcolor='rgba(242, 218, 218, 0.9)',  # 浅粉色背景 (#f2dada)
            bordercolor='rgba(104, 78, 139, 0.8)',  # 深蓝色边框 (#104e8b)
            borderwidth=1.5,
            font=dict(color='#104e8b')
        ),
        plot_bgcolor='white',  # Pure white background
        paper_bgcolor='white'  # Pure white background
    )

    return fig, plot_data

def main():
    """主函数"""
    try:
        # 数据文件路径
        csv_path = r'/content/Speed Dating Data.csv'

        # 1. 加载和准备数据
        plot_data = load_and_prepare_data(csv_path)

        # 2. 归一化和坐标转换
        plot_data = normalize_to_barycentric(plot_data)

        # 3. 创建可视化
        fig, processed_data = create_triangle_plot_interactive(plot_data)

        # 4. Save the interactive plot as HTML
        output_path = 'triangle_interaction_plot.html'
        fig.write_html(output_path)
        print(f"Interactive plot saved as '{output_path}'")
        print("Open the HTML file in your web browser to view the interactive triangle plot.")

        # Display basic statistics
        print("\nStatistics:")
        print(f"Total samples: {len(processed_data)}")
        print(f"Average pre-attraction: {processed_data['pre_attraction'].mean():.3f}")
        print(f"Average interaction score: {processed_data['interaction_score'].mean():.3f}")
        print(f"Average decision: {processed_data['decision'].mean():.3f}")

        # Display decision distribution
        yes_effect = processed_data[processed_data['decision'] == 1]['interaction_effect'].mean()
        no_effect = processed_data[processed_data['decision'] == 0]['interaction_effect'].mean()
        print(f"Average interaction effect (Decision=Yes): {yes_effect:.3f}")
        print(f"Average interaction effect (Decision=No): {no_effect:.3f}")

    except Exception as e:
        print(f"错误: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()


Attempting to load data from: /content/Speed Dating Data.csv
Failed with encoding utf-8: 'utf-8' codec can't decode byte 0x8e in position 17: invalid start byte
Data loaded successfully with encoding: latin-1
原始数据: 8378 行, 195 列
有效数据行数: 8121
决策分布 - Yes: 3491, No: 4630
Interactive plot saved as 'triangle_interaction_plot.html'
Open the HTML file in your web browser to view the interactive triangle plot.

Statistics:
Total samples: 8121
Average pre-attraction: 22.477
Average interaction score: 6.519
Average decision: 0.430
Average interaction effect (Decision=Yes): 0.084
Average interaction effect (Decision=No): 0.182
