# 1. Named Matrix - Pol.is Math Python Implementation

The `NamedMatrix` is the core data structure in the Pol.is math system. It stores votes from participants (rows) on comments (columns) and provides various operations for manipulating this data.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from polismath.math.named_matrix import NamedMatrix

## 1.1 Creating a Named Matrix

Let's create a simple `NamedMatrix` to represent votes from participants on comments:

In [None]:
# Create a simple votes matrix with 5 participants and 4 comments
# Votes are: 1 (agree), -1 (disagree), NaN (pass)

# Participant IDs
participant_ids = [f"p{i}" for i in range(5)]

# Comment IDs
comment_ids = [f"c{i}" for i in range(4)]

# Votes matrix (rows=participants, columns=comments)
votes_matrix = np.array([
    [ 1,  1, -1,  1],  # p0
    [ 1,  1, np.nan, -1],  # p1
    [-1, -1,  1,  1],  # p2
    [-1, np.nan,  1, -1],  # p3
    [ 1, -1, -1, np.nan]   # p4
])

# Create a NamedMatrix
vote_matrix = NamedMatrix(votes_matrix, participant_ids, comment_ids)

# Display the matrix
print("Vote matrix shape:", vote_matrix.matrix.shape)
print("Row names (participants):", vote_matrix.row_names)
print("Column names (comments):", vote_matrix.col_names)
print("\nMatrix data:")
print(vote_matrix.matrix)

## 1.2 Accessing Data in the Named Matrix

In [None]:
# Get a row by name (participant's votes)
p0_votes = vote_matrix.get_row_by_name("p0")
print("p0's votes:", p0_votes)

# Get a column by name (all votes on a comment)
c1_votes = vote_matrix.get_column_by_name("c1")
print("Votes on comment c1:", c1_votes)

# Get a specific value
p2_c3_vote = vote_matrix.get_value("p2", "c3")
print("p2's vote on comment c3:", p2_c3_vote)

## 1.3 Updating the Matrix with New Votes

In [None]:
# Update an existing vote
updated_matrix = vote_matrix.update("p0", "c2", 1)  # Change p0's vote on c2 from -1 to 1
print("Updated p0's vote on c2 from -1 to 1:")
print("Original value:", vote_matrix.get_value("p0", "c2"))
print("Updated value:", updated_matrix.get_value("p0", "c2"))

# Add a new participant with votes
new_participant_votes = {"c0": 1, "c1": -1, "c2": 1, "c3": -1}
matrix_with_new_participant = vote_matrix
for comment, vote in new_participant_votes.items():
    matrix_with_new_participant = matrix_with_new_participant.update("p5", comment, vote)

print("\nAdded a new participant p5:")
print("New matrix shape:", matrix_with_new_participant.matrix.shape)
print("Row names:", matrix_with_new_participant.row_names)

# Add a new comment
matrix_with_new_comment = vote_matrix
matrix_with_new_comment = matrix_with_new_comment.update("p0", "c4", 1)
matrix_with_new_comment = matrix_with_new_comment.update("p1", "c4", -1)

print("\nAdded a new comment c4:")
print("New matrix shape:", matrix_with_new_comment.matrix.shape)
print("Column names:", matrix_with_new_comment.col_names)

## 1.4 Matrix Operations

In [None]:
# Create a subset of the matrix by selecting specific rows (participants)
subset_rows = ["p0", "p2", "p4"]
row_subset = vote_matrix.rowname_subset(subset_rows)
print("Row subset shape:", row_subset.matrix.shape)
print("Row subset names:", row_subset.row_names)
print(row_subset.matrix)

# Create a subset by selecting specific columns (comments)
subset_cols = ["c0", "c3"]
col_subset = vote_matrix.colname_subset(subset_cols)
print("\nColumn subset shape:", col_subset.matrix.shape)
print("Column subset names:", col_subset.col_names)
print(col_subset.matrix)

# Get statistics about the matrix
print("\nMatrix statistics:")
print("Number of participants:", len(vote_matrix.row_names))
print("Number of comments:", len(vote_matrix.col_names))
print("Number of votes cast:", np.sum(~np.isnan(vote_matrix.matrix)))
print("Percentage of votes that are agrees:", np.sum(vote_matrix.matrix == 1) / np.sum(~np.isnan(vote_matrix.matrix)) * 100, "%")
print("Percentage of votes that are disagrees:", np.sum(vote_matrix.matrix == -1) / np.sum(~np.isnan(vote_matrix.matrix)) * 100, "%")

## 1.5 Visualizing the Vote Matrix

In [None]:
# Create a visualization of the vote matrix
fig, ax = plt.subplots(figsize=(10, 6))

# Replace NaN with 0 for visualization
viz_data = vote_matrix.matrix.copy()
viz_data = np.where(np.isnan(viz_data), 0, viz_data)

# Create a custom colormap: red for disagree, white for pass, green for agree
cmap = plt.cm.RdYlGn
bounds = [-1.5, -0.5, 0.5, 1.5]
norm = plt.Normalize(-1.5, 1.5)

# Create the heatmap
im = ax.imshow(viz_data, cmap=cmap, norm=norm)

# Add labels
ax.set_xticks(np.arange(len(comment_ids)))
ax.set_yticks(np.arange(len(participant_ids)))
ax.set_xticklabels(comment_ids)
ax.set_yticklabels(participant_ids)

# Rotate the x-axis labels
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")

# Add a colorbar
cbar = ax.figure.colorbar(im, ticks=[-1, 0, 1])
cbar.ax.set_yticklabels(['Disagree', 'Pass', 'Agree'])

# Add title and labels
ax.set_title("Vote Matrix Visualization")
ax.set_xlabel("Comments")
ax.set_ylabel("Participants")

# Adjust layout and display
fig.tight_layout()
plt.show()

## 1.6 Handling Real-World Data

In a real Pol.is conversation, the vote matrix would be much larger and more sparse (many NaN values). The `NamedMatrix` class is designed to handle this efficiently.

In [None]:
# Create a larger, more realistic example
num_participants = 100
num_comments = 30
large_participant_ids = [f"p{i}" for i in range(num_participants)]
large_comment_ids = [f"c{i}" for i in range(num_comments)]

# Create a sparse matrix where most entries are NaN (participants only vote on some comments)
large_votes_matrix = np.full((num_participants, num_comments), np.nan)

# Add some votes (about 30% of possible votes)
num_votes = int(num_participants * num_comments * 0.3)
for _ in range(num_votes):
    p_idx = np.random.randint(0, num_participants)
    c_idx = np.random.randint(0, num_comments)
    # Vote is either agree (1) or disagree (-1) with 70% probability of agree
    vote = 1 if np.random.random() < 0.7 else -1
    large_votes_matrix[p_idx, c_idx] = vote

# Create the named matrix
large_vote_matrix = NamedMatrix(large_votes_matrix, large_participant_ids, large_comment_ids)

# Print statistics
print(f"Created a matrix with {num_participants} participants and {num_comments} comments")
num_cast_votes = np.sum(~np.isnan(large_votes_matrix))
print(f"Total possible votes: {num_participants * num_comments}")
print(f"Actual votes cast: {num_cast_votes} ({num_cast_votes/(num_participants * num_comments)*100:.1f}%)")
print(f"Agrees: {np.sum(large_votes_matrix == 1)} ({np.sum(large_votes_matrix == 1)/num_cast_votes*100:.1f}%)")
print(f"Disagrees: {np.sum(large_votes_matrix == -1)} ({np.sum(large_votes_matrix == -1)/num_cast_votes*100:.1f}%)")

# Visualize a subset of the matrix
fig, ax = plt.subplots(figsize=(12, 8))

subset_matrix = large_vote_matrix.rowname_subset(large_participant_ids[:20]).colname_subset(large_comment_ids[:15])
viz_data_large = subset_matrix.matrix.copy()
viz_data_large = np.where(np.isnan(viz_data_large), 0, viz_data_large)

im = ax.imshow(viz_data_large, cmap=cmap, norm=norm)
ax.set_xticks(np.arange(len(subset_matrix.col_names)))
ax.set_yticks(np.arange(len(subset_matrix.row_names)))
ax.set_xticklabels(subset_matrix.col_names)
ax.set_yticklabels(subset_matrix.row_names)
plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor")
cbar = ax.figure.colorbar(im, ticks=[-1, 0, 1])
cbar.ax.set_yticklabels(['Disagree', 'Pass', 'Agree'])
ax.set_title("Large Vote Matrix Visualization (Subset)")
ax.set_xlabel("Comments")
ax.set_ylabel("Participants")
fig.tight_layout()
plt.show()

## 1.7 Summary

The `NamedMatrix` class is the foundation of the Pol.is math system. It provides:

1. A data structure for storing votes with named rows (participants) and columns (comments)
2. Methods for accessing and updating the data by name rather than index
3. Operations for creating subsets of the matrix
4. Efficient handling of sparse data (many participants only vote on a subset of comments)

In the next notebook, we'll explore how this matrix is used for dimensionality reduction using PCA.