# 6. Conversation Management - Pol.is Math Python Implementation

The `Conversation` class and `ConversationManager` are key components of the Pol.is system. They handle state management, vote processing, and updating computations. This notebook explores the conversation management implementation in the Python conversion.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
from polismath.conversation import Conversation, ConversationManager
from polismath.math.named_matrix import NamedMatrix

## 6.1 Creating a Conversation

Let's start by creating a new conversation using the `Conversation` class.

In [None]:
# Create a new empty conversation
conversation_id = "demo-conversation-" + str(int(time.time()))
conversation = Conversation(conversation_id)

print(f"Created new conversation with ID: {conversation_id}")
print(f"Conversation object: {conversation}")
print(f"Initial state: {conversation.get_state()}")

## 6.2 Conversation Properties

Let's examine the properties of the conversation object.

In [None]:
# Examine the conversation properties
print("Conversation Properties:")
print(f"ID: {conversation.conversation_id}")
print(f"Last updated: {conversation.last_updated}")
print(f"Matrix: {conversation.raw_rating_mat}")
print(f"Participant count: {conversation.participant_count}")
print(f"Comment count: {conversation.comment_count}")

# Check the initial computed results
print("\nInitial Computed Results:")
print(f"PCA: {conversation.pca}")
print(f"Group clusters: {conversation.group_clusters}")
print(f"Repness: {conversation.repness}")

## 6.3 Adding Votes to the Conversation

Now let's add some votes to the conversation.

In [None]:
# Define some votes to add
votes = {
    "votes": [
        {"pid": "participant1", "tid": "comment1", "vote": 1},   # Agree
        {"pid": "participant1", "tid": "comment2", "vote": -1},  # Disagree
        {"pid": "participant1", "tid": "comment3", "vote": 1},   # Agree
        {"pid": "participant2", "tid": "comment1", "vote": 1},   # Agree
        {"pid": "participant2", "tid": "comment2", "vote": 1},   # Agree
        {"pid": "participant2", "tid": "comment4", "vote": -1},  # Disagree
        {"pid": "participant3", "tid": "comment1", "vote": -1},  # Disagree
        {"pid": "participant3", "tid": "comment2", "vote": -1},  # Disagree
        {"pid": "participant3", "tid": "comment3", "vote": -1},  # Disagree
        {"pid": "participant4", "tid": "comment1", "vote": 1},   # Agree
        {"pid": "participant4", "tid": "comment4", "vote": 1},   # Agree
        {"pid": "participant5", "tid": "comment2", "vote": -1},  # Disagree
        {"pid": "participant5", "tid": "comment3", "vote": -1},  # Disagree
        {"pid": "participant5", "tid": "comment4", "vote": 1},   # Agree
        {"pid": "participant5", "tid": "comment5", "vote": -1},  # Disagree
    ],
    "lastVoteTimestamp": int(time.time() * 1000)  # Current time in milliseconds
}

# Update the conversation with the votes
updated_conversation = conversation.update_votes(votes)

print(f"Updated conversation with {len(votes['votes'])} votes")
print(f"Updated at: {updated_conversation.last_updated}")
print(f"Participant count: {updated_conversation.participant_count}")
print(f"Comment count: {updated_conversation.comment_count}")

## 6.4 Examining the Vote Matrix

In [None]:
# Examine the vote matrix after adding votes
print("Vote Matrix After Adding Votes:")
matrix = updated_conversation.raw_rating_mat

print(f"Matrix shape: {matrix.matrix.shape}")
print(f"Row names (participants): {matrix.row_names}")
print(f"Column names (comments): {matrix.col_names}")
print("\nMatrix data:")
print(matrix.matrix)

# Visualize the vote matrix
plt.figure(figsize=(10, 8))
ax = plt.subplot()

# Replace NaN with 0 for visualization
viz_data = matrix.matrix.copy()
viz_data = np.where(np.isnan(viz_data), 0, viz_data)

# Create a custom colormap: red for disagree, white for pass, green for agree
cmap = plt.cm.RdYlGn
bounds = [-1.5, -0.5, 0.5, 1.5]
norm = plt.Normalize(-1.5, 1.5)

# Create the heatmap
im = ax.imshow(viz_data, cmap=cmap, norm=norm)

# Add labels
ax.set_xticks(np.arange(len(matrix.col_names)))
ax.set_yticks(np.arange(len(matrix.row_names)))
ax.set_xticklabels(matrix.col_names)
ax.set_yticklabels(matrix.row_names)

# Rotate the x-axis labels
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Add a colorbar
cbar = ax.figure.colorbar(im, ticks=[-1, 0, 1])
cbar.ax.set_yticklabels(['Disagree', 'Pass', 'Agree'])

# Add title and labels
ax.set_title("Vote Matrix Visualization")
ax.set_xlabel("Comments")
ax.set_ylabel("Participants")

# Adjust layout and display
plt.tight_layout()
plt.show()

## 6.5 Examining the Computed Results

In [None]:
# Check if the votes triggered any computation
print("Computed Results After Adding Votes:")
print(f"PCA computed: {updated_conversation.pca is not None}")
print(f"Clustering computed: {updated_conversation.group_clusters is not None}")
print(f"Representativeness computed: {updated_conversation.repness is not None}")

# If computations were performed, examine them
if updated_conversation.pca is not None:
    print("\nPCA Results:")
    print(f"PCA keys: {list(updated_conversation.pca.keys())}")
    
    if 'projection' in updated_conversation.pca:
        print(f"Number of participants projected: {len(updated_conversation.pca['projection'])}")
        print(f"Sample projection: {list(updated_conversation.pca['projection'].items())[:2]}")

if updated_conversation.group_clusters is not None:
    print("\nClustering Results:")
    print(f"Number of clusters: {len(updated_conversation.group_clusters)}")
    for i, cluster in enumerate(updated_conversation.group_clusters):
        print(f"Cluster {i}: {len(cluster)} participants")
        print(f"  Members: {cluster}")

if updated_conversation.repness is not None:
    print("\nRepresentativeness Results:")
    print(f"Repness keys: {list(updated_conversation.repness.keys())}")
    
    if 'group_repness' in updated_conversation.repness:
        group_repness = updated_conversation.repness['group_repness']
        print(f"Groups with repness: {list(group_repness.keys())}")
        
        # Display some representative comments
        for group_id, comments in group_repness.items():
            print(f"\nTop Representative Comments for Group {group_id}:")
            for i, comment in enumerate(comments[:3]):
                print(f"  {i+1}. Comment {comment['comment_id']} ({comment['repful']}): repness={comment['repness']:.3f}, z-score={comment['repness_z']:.3f}")

## 6.6 Visualizing PCA and Clustering Results

If PCA and clustering were computed, let's visualize the results.

In [None]:
# Visualize PCA and clustering results if available
if updated_conversation.pca is not None and 'projection' in updated_conversation.pca and updated_conversation.group_clusters is not None:
    # Extract the projection coordinates
    proj_dict = updated_conversation.pca['projection']
    
    # Create a mapping from participant ID to cluster
    id_to_cluster = {}
    for cluster_idx, cluster_members in enumerate(updated_conversation.group_clusters):
        for pid in cluster_members:
            id_to_cluster[pid] = cluster_idx
    
    # Extract the projection coordinates and assigned clusters
    x_coords = []
    y_coords = []
    labels = []
    assigned_clusters = []
    
    for p_id, projection in proj_dict.items():
        if len(projection) >= 2:  # Make sure we have at least 2D projection
            x_coords.append(projection[0])
            y_coords.append(projection[1])
            labels.append(p_id)
            assigned_clusters.append(id_to_cluster.get(p_id, -1))  # -1 if not in any cluster
    
    # Create a scatter plot of the projections with assigned clusters
    plt.figure(figsize=(12, 10))
    
    # Use different colors for different clusters
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'cyan']
    colors = [colors[c % len(colors)] if c >= 0 else 'gray' for c in assigned_clusters]
    
    # Create the scatter plot
    scatter = plt.scatter(x_coords, y_coords, c=colors, alpha=0.7, s=100)
    
    # Add labels to the points
    for i, label in enumerate(labels):
        plt.annotate(label, (x_coords[i], y_coords[i]), fontsize=8, 
                    xytext=(5, 5), textcoords='offset points')
    
    # Add a legend
    from matplotlib.lines import Line2D
    legend_elements = []
    for i in range(len(updated_conversation.group_clusters)):
        color = colors[assigned_clusters.index(i)] if i in assigned_clusters else 'gray'
        legend_elements.append(Line2D([0], [0], marker='o', color='w', markerfacecolor=color, 
                                      markersize=10, label=f'Cluster {i}'))
    
    plt.legend(handles=legend_elements)
    
    # Add title and labels
    plt.title("PCA Projection with Cluster Assignments")
    plt.xlabel("Principal Component 1")
    plt.ylabel("Principal Component 2")
    plt.grid(True, linestyle="--", alpha=0.7)
    plt.tight_layout()
    plt.show()
else:
    print("PCA and clustering results not available for visualization. Need more votes to compute meaningful clusters.")

## 6.7 Creating a Conversation Manager

The `ConversationManager` class manages multiple conversations and provides methods for creating, updating, and retrieving conversations.

In [None]:
# Create a temporary directory for conversation data
import tempfile
data_dir = tempfile.mkdtemp()
print(f"Created temporary data directory: {data_dir}")

# Create a conversation manager
manager = ConversationManager(data_dir=data_dir)
print(f"Created conversation manager with data directory: {data_dir}")
print(f"Initial conversations: {manager.get_conversations()}")

## 6.8 Creating and Managing Conversations

In [None]:
# Create a new conversation through the manager
conv_id = "manager-conversation-" + str(int(time.time()))
manager.create_conversation(conv_id)
print(f"Created conversation {conv_id} through the manager")

# Get the conversation
conv = manager.get_conversation(conv_id)
print(f"Retrieved conversation: {conv}")

# Add votes to the conversation through the manager
print("\nProcessing votes through the manager...")
updated_conv = manager.process_votes(conv_id, votes)
print(f"Updated conversation at: {updated_conv.last_updated}")
print(f"Participant count: {updated_conv.participant_count}")
print(f"Comment count: {updated_conv.comment_count}")

# Check if the conversation was saved to disk
conv_file = os.path.join(data_dir, f"{conv_id}.json")
print(f"\nChecking if conversation was saved to disk at {conv_file}...")
if os.path.exists(conv_file):
    print(f"Conversation was saved to disk. File size: {os.path.getsize(conv_file)} bytes")
else:
    print("Conversation was not saved to disk.")

## 6.9 Moderation Settings

Pol.is supports moderation of comments and participants. Let's apply some moderation settings.

In [None]:
# Define moderation settings
moderation = {
    "mod_out_tids": ["comment2"],  # Exclude comment2
    "mod_in_tids": ["comment1"],    # Feature comment1
    "meta_tids": [],                # No meta comments
    "mod_out_ptpts": ["participant3"]  # Exclude participant3
}

# Apply moderation settings
print("Applying moderation settings...")
moderated_conv = manager.update_moderation(conv_id, moderation)

print("\nModeration settings applied:")
print(f"Excluded comments: {moderated_conv.mod_out_tids}")
print(f"Featured comments: {moderated_conv.mod_in_tids}")
print(f"Excluded participants: {moderated_conv.mod_out_ptpts}")

## 6.10 Examining Effects of Moderation

Let's examine how the moderation settings affect the computations.

In [None]:
# Examine the matrix after moderation
print("Vote Matrix After Moderation:")
moderated_matrix = moderated_conv.raw_rating_mat

print(f"Matrix shape: {moderated_matrix.matrix.shape}")
print(f"Row names (participants): {moderated_matrix.row_names}")
print(f"Column names (comments): {moderated_matrix.col_names}")

# Check if participant3 is excluded from the matrix
excluded_participant = "participant3"
if excluded_participant in moderated_matrix.row_names:
    print(f"\nWarning: Excluded participant {excluded_participant} is still in the matrix.")
else:
    print(f"\nExcluded participant {excluded_participant} is removed from the matrix as expected.")

# Check if comment2 is excluded from the matrix
excluded_comment = "comment2"
if excluded_comment in moderated_matrix.col_names:
    print(f"Warning: Excluded comment {excluded_comment} is still in the matrix.")
else:
    print(f"Excluded comment {excluded_comment} is removed from the matrix as expected.")

# Visualize the moderated vote matrix
plt.figure(figsize=(10, 8))
ax = plt.subplot()

# Replace NaN with 0 for visualization
viz_data_mod = moderated_matrix.matrix.copy()
viz_data_mod = np.where(np.isnan(viz_data_mod), 0, viz_data_mod)

# Create the heatmap
im = ax.imshow(viz_data_mod, cmap=cmap, norm=norm)

# Add labels
ax.set_xticks(np.arange(len(moderated_matrix.col_names)))
ax.set_yticks(np.arange(len(moderated_matrix.row_names)))
ax.set_xticklabels(moderated_matrix.col_names)
ax.set_yticklabels(moderated_matrix.row_names)

# Rotate the x-axis labels
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Add a colorbar
cbar = ax.figure.colorbar(im, ticks=[-1, 0, 1])
cbar.ax.set_yticklabels(['Disagree', 'Pass', 'Agree'])

# Add title and labels
ax.set_title("Moderated Vote Matrix Visualization")
ax.set_xlabel("Comments")
ax.set_ylabel("Participants")

# Adjust layout and display
plt.tight_layout()
plt.show()

## 6.11 Forcing Recomputation

Sometimes we may want to force a recomputation of the clustering, PCA, and representativeness.

In [None]:
# Force recomputation of the conversation
print("Forcing recomputation...")
recomputed_conv = manager.recompute(conv_id)

print("\nRecomputation complete:")
print(f"Updated at: {recomputed_conv.last_updated}")
print(f"PCA computed: {recomputed_conv.pca is not None}")
print(f"Clustering computed: {recomputed_conv.group_clusters is not None}")
print(f"Representativeness computed: {recomputed_conv.repness is not None}")

## 6.12 Exporting and Importing Conversations

The `ConversationManager` supports exporting conversations to files and importing them from files.

In [None]:
# Export the conversation to a file
export_file = os.path.join(data_dir, f"{conv_id}_export.json")
print(f"Exporting conversation to {export_file}...")
manager.export_conversation(conv_id, export_file)

# Check if the export file was created
if os.path.exists(export_file):
    print(f"Conversation was exported successfully. File size: {os.path.getsize(export_file)} bytes")
else:
    print("Failed to export conversation.")

# Import the conversation with a new ID
import_id = f"{conv_id}_imported"
print(f"\nImporting conversation as {import_id}...")
imported_id = manager.import_conversation(export_file, new_id=import_id)

print(f"Imported conversation with ID: {imported_id}")
imported_conv = manager.get_conversation(imported_id)
print(f"Imported conversation: {imported_conv}")
print(f"Participant count: {imported_conv.participant_count}")
print(f"Comment count: {imported_conv.comment_count}")

## 6.13 Creating a Demo Conversation with Synthetic Data

Let's create a more realistic conversation with synthetic votes from multiple participants.

In [None]:
# Create a demo conversation with synthetic data
demo_id = "synthetic-demo-" + str(int(time.time()))
manager.create_conversation(demo_id)
print(f"Created synthetic demo conversation {demo_id}")

# Generate synthetic votes
import random
random.seed(42)  # For reproducibility

num_participants = 100
num_comments = 20
participant_ids = [f"p{i}" for i in range(num_participants)]
comment_ids = [f"c{i}" for i in range(num_comments)]

# Create two distinct opinion groups
synthetic_votes = {"votes": []}

for p_idx, pid in enumerate(participant_ids):
    # First group tends to agree with first half of comments, second group with second half
    group = 0 if p_idx < 50 else 1
    
    for c_idx, cid in enumerate(comment_ids):
        # Determine tendency to agree based on group
        if (group == 0 and c_idx < 10) or (group == 1 and c_idx >= 10):
            agree_prob = 0.8  # High probability of agreement
        else:
            agree_prob = 0.2  # Low probability of agreement
        
        # Randomly determine vote (1=agree, -1=disagree, None=pass)
        r = random.random()
        if r < agree_prob:
            vote = 1
        elif r < agree_prob + 0.15:
            vote = -1
        else:
            continue  # Skip this vote (pass)
        
        # Add vote
        synthetic_votes["votes"].append({
            "pid": pid,
            "tid": cid,
            "vote": vote
        })

print(f"Generated {len(synthetic_votes['votes'])} synthetic votes")

# Process synthetic votes
print("Processing synthetic votes...")
demo_conv = manager.process_votes(demo_id, synthetic_votes)

print(f"\nSynthetic demo conversation processed:")
print(f"Participant count: {demo_conv.participant_count}")
print(f"Comment count: {demo_conv.comment_count}")
print(f"Vote count: {np.sum(~np.isnan(demo_conv.raw_rating_mat.matrix))}")

## 6.14 Visualizing the Synthetic Demo Conversation

In [None]:
# Check if PCA and clustering were computed
print("Computed Results for Synthetic Demo:")
print(f"PCA computed: {demo_conv.pca is not None}")
print(f"Clustering computed: {demo_conv.group_clusters is not None}")
print(f"Representativeness computed: {demo_conv.repness is not None}")

# If PCA and clustering were computed, visualize the results
if demo_conv.pca is not None and 'projection' in demo_conv.pca and demo_conv.group_clusters is not None:
    # Extract the projection coordinates
    proj_dict = demo_conv.pca['projection']
    
    # Create a mapping from participant ID to cluster
    id_to_cluster = {}
    for cluster_idx, cluster_members in enumerate(demo_conv.group_clusters):
        for pid in cluster_members:
            id_to_cluster[pid] = cluster_idx
    
    # Extract the projection coordinates and assigned clusters
    x_coords = []
    y_coords = []
    assigned_clusters = []
    true_groups = []
    
    for p_id, projection in proj_dict.items():
        if len(projection) >= 2:  # Make sure we have at least 2D projection
            x_coords.append(projection[0])
            y_coords.append(projection[1])
            assigned_clusters.append(id_to_cluster.get(p_id, -1))  # -1 if not in any cluster
            
            # Determine true group based on participant ID
            p_idx = int(p_id[1:])  # Extract the index from "p{idx}"
            true_groups.append(0 if p_idx < 50 else 1)
    
    # Create scatter plots for true groups and detected clusters
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 8))
    
    # Plot with true groups
    colors_true = ["blue" if g == 0 else "red" for g in true_groups]
    scatter1 = ax1.scatter(x_coords, y_coords, c=colors_true, alpha=0.6, s=50)
    ax1.set_title("Participants Colored by True Groups")
    ax1.set_xlabel("Principal Component 1")
    ax1.set_ylabel("Principal Component 2")
    from matplotlib.lines import Line2D
    legend_elements1 = [
        Line2D([0], [0], marker='o', color='w', markerfacecolor='blue', markersize=10, label='True Group 1'),
        Line2D([0], [0], marker='o', color='w', markerfacecolor='red', markersize=10, label='True Group 2')
    ]
    ax1.legend(handles=legend_elements1)
    ax1.grid(True, linestyle="--", alpha=0.7)
    
    # Plot with detected clusters
    num_clusters = max(assigned_clusters) + 1
    cluster_colors = plt.cm.tab10(np.linspace(0, 1, num_clusters))
    colors_cluster = [cluster_colors[c] if c >= 0 else (0.7, 0.7, 0.7, 1.0) for c in assigned_clusters]
    scatter2 = ax2.scatter(x_coords, y_coords, c=colors_cluster, alpha=0.6, s=50)
    ax2.set_title("Participants Colored by Detected Clusters")
    ax2.set_xlabel("Principal Component 1")
    ax2.set_ylabel("Principal Component 2")
    legend_elements2 = []
    for i in range(num_clusters):
        legend_elements2.append(Line2D([0], [0], marker='o', color='w', 
                                       markerfacecolor=cluster_colors[i], 
                                       markersize=10, label=f'Cluster {i}'))
    ax2.legend(handles=legend_elements2)
    ax2.grid(True, linestyle="--", alpha=0.7)
    
    plt.tight_layout()
    plt.show()
    
    # Display representative comments
    if demo_conv.repness is not None and 'group_repness' in demo_conv.repness:
        group_repness = demo_conv.repness['group_repness']
        print("\nRepresentative Comments by Cluster:")
        for group_id, comments in group_repness.items():
            print(f"\nTop Representative Comments for Cluster {group_id}:")
            agrees = [c for c in comments if c['repful'] == 'agree']
            disagrees = [c for c in comments if c['repful'] == 'disagree']
            
            print("Top 'Agree' comments:")
            for i, comment in enumerate(sorted(agrees, key=lambda x: abs(x['repness_z']), reverse=True)[:3]):
                print(f"  {i+1}. Comment {comment['comment_id']}: repness={comment['repness']:.3f}, z-score={comment['repness_z']:.3f}")
            
            print("Top 'Disagree' comments:")
            for i, comment in enumerate(sorted(disagrees, key=lambda x: abs(x['repness_z']), reverse=True)[:3]):
                print(f"  {i+1}. Comment {comment['comment_id']}: repness={comment['repness']:.3f}, z-score={comment['repness_z']:.3f}")
else:
    print("PCA and clustering results not available for visualization. Need more votes to compute meaningful clusters.")

## 6.15 Summary

The Pol.is conversation management system:

1. Uses the `Conversation` class to store and update state for each conversation
2. Uses the `ConversationManager` class to manage multiple conversations
3. Processes votes and updates computations (PCA, clustering, representativeness)
4. Supports moderation of comments and participants
5. Provides methods for importing and exporting conversations
6. Persists conversation state to disk

This system is the core of the Pol.is backend and manages all the state and computation for Pol.is conversations.