# `bsky-net` demo: simulating the majority model


In [1]:
# Imports
import json
import random
import typing as t
from collections import Counter

import matplotlib.pyplot as plt

In [2]:
# Type definitions

Opinion = t.Literal["favor", "against", "none"]


class RecordInfo(t.TypedDict):
    opinion: Opinion
    createdAt: str


class UserActivity(t.TypedDict):
    seen: dict[str, RecordInfo]
    liked: dict[str, RecordInfo]


BskyNetGraph: t.TypeAlias = dict[str, dict[str, UserActivity]]

In [6]:
# Read in bsky-net graph

with open("../data/processed/bsky-net-daily.json", "r") as json_file:
    bsky_net: BskyNetGraph = json.load(json_file)

## Looking at the data


In [10]:
print("Activity for user @arunwadhwa.com on March 2, 2023:")
print(json.dumps(bsky_net["2023-03-02"]["did:plc:nxtgtzge53oft7cx6tqdsfcn"], indent=4))

Activity for user @arunwadhwa.com on March 2, 2023:
{
    "seen": {
        "at://did:plc:wc4tl2pcydmsfmx6jxxjwevu/app.bsky.feed.post/3jpwth5n4qc2c": {
            "opinion": "against",
            "createdAt": "2023-03-02T08:09:48.418Z"
        },
        "at://did:plc:nbyzym7vdf3czj7rn3tt5vtq/app.bsky.feed.post/3jpxk3ymmsc2p": {
            "opinion": "favor",
            "createdAt": "2023-03-02T14:55:09.871Z"
        },
        "at://did:plc:gd5p4vw3jjkhx4bao2s5qir2/app.bsky.feed.post/3jpy3cssol324": {
            "opinion": "none",
            "createdAt": "2023-03-02T20:03:12.414Z"
        },
        "at://did:plc:gd5p4vw3jjkhx4bao2s5qir2/app.bsky.feed.post/3jpy3gmxvcc22": {
            "opinion": "none",
            "createdAt": "2023-03-02T20:05:20.465Z"
        }
    },
    "liked": {
        "at://did:plc:wc4tl2pcydmsfmx6jxxjwevu/app.bsky.feed.post/3jpwth5n4qc2c": {
            "opinion": "against",
            "createdAt": "2023-03-02T08:12:17.095Z"
        }
    }
}


Above shows the activity of user `@arunwadhwa.com` on March 2, 2023. You can view his profile [here](https://bsky.social/profile/did:plc:nxtgtzge53oft7cx6tqdsfcn).

`seen` describes every on-topic post that `@arunwadhwa.com` saw\* on that day; each key represents the `uri` (post ID) of a post that was created by a user they followed as of that day; each value describes that its expressed opinion and the timestamp of its creation.

For example, the post with `uri` `at://did:plc:wc4tl2pcydmsfmx6jxxjwevu/app.bsky.feed.post/3jpwth5n4qc2c` (viewable [here](https://bsky.app/profile/did:plc:wc4tl2pcydmsfmx6jxxjwevu/post/3jpwth5n4qc2c)) was created by user `@travis.dads.lol` at `2023-03-02T08:09:48.418Z` and expresses the opinion `against`.

`liked` is a subset of `seen`; it describes every post from `seen` that `@arunwadhwa.com` liked.

\* _Note: "saw" is used very loosely here; in practice, `seen` contains every on-topic post created by a user `@arunwadhwa.com` followed on that day. Currently, this means that many posts included in `seen` weren't actually viewed by that user. The posts included in `seen` will be improved, using information like `@arunwadhwa.com`'s activity on that day._


## Iterating over bsky-net

[Explain]


In [15]:
for step, data in bsky_net.items():
    print(f"Day {step}:")
    for did, activity in data.items():
        posts_seen = activity["seen"]
        posts_liked = activity["liked"]

        print(f"  User {did}:")
        print(f"    # posts seen: {len(posts_seen)}")
        print(f"    # posts liked: {len(posts_liked)}")


Day 2022-11-17:
  User did:plc:ragtjsm2j2vknwkz3zp4oxrd:
    # posts seen: 1
    # posts liked: 0
  User did:plc:yk4dd2qkboz2yv6tpubpc6co:
    # posts seen: 1
    # posts liked: 0
  User did:plc:l3rouwludahu3ui3bt66mfvj:
    # posts seen: 1
    # posts liked: 0
  User did:plc:oky5czdrnfjpqslsw2a5iclo:
    # posts seen: 1
    # posts liked: 0
Day 2022-11-18:
Day 2022-11-19:
Day 2022-11-20:
Day 2022-11-21:
Day 2022-11-22:
Day 2022-11-23:
  User did:plc:ragtjsm2j2vknwkz3zp4oxrd:
    # posts seen: 1
    # posts liked: 1
  User did:plc:l3rouwludahu3ui3bt66mfvj:
    # posts seen: 1
    # posts liked: 0
  User did:plc:vpkhqolt662uhesyj6nxm7ys:
    # posts seen: 1
    # posts liked: 0
  User did:plc:oky5czdrnfjpqslsw2a5iclo:
    # posts seen: 1
    # posts liked: 1
  User did:plc:7axcqwj4roha6mqpdhpdwczx:
    # posts seen: 1
    # posts liked: 0
  User did:plc:vzmlifz3ghar4cu2hj3srga2:
    # posts seen: 1
    # posts liked: 0
  User did:plc:qjeavhlw222ppsre4rscd3n2:
    # posts seen: 1
    # p

## Simulating the majority model

[Explain]


In [22]:
def majority_vote(opinions: list[Opinion]) -> Opinion:
    counts = Counter(opinions)
    return max(counts, key=lambda x: counts[x])


n_steps = len(bsky_net)
opinion_history: dict[str, list[t.Optional[Opinion]]] = {}
curr_opinions: dict[str, Opinion] = {}

# Initialize opinion history
for data in bsky_net.values():
    for did in data.keys():
        if did not in opinion_history:
            opinion_history[did] = [None] * n_steps

for idx, data in enumerate(bsky_net.values()):
    unchanged_dids = set(opinion_history.keys())

    for did, activity in data.items():
        if not activity["seen"]:
            continue

        seen_opinions = []

        for record in activity["seen"].values():
            if not record["on_topic"]:
                continue

            seen_opinions.append(record["opinion"])

        pred_opinion = majority_vote(seen_opinions)

        opinion_history[did][idx] = pred_opinion
        curr_opinions[did] = pred_opinion

In [24]:
# TODO: Visualize opinions over time, etc.

In [17]:
# TODO: Actual validation analysis, calculate metrics, etc.

positive_count = sum(1 for opinion in opinions.values() if opinion > 0)
negative_count = sum(1 for opinion in opinions.values() if opinion < 0)
neutral_count = sum(1 for opinion in opinions.values() if opinion == 0)

print(f"Number of users: {len(opinions)}")
print(f"Positive opinions: {positive_count}")
print(f"Negative opinions: {negative_count}")
print(f"Neutral opinions: {neutral_count}")

Number of users: 191
Positive opinions: 191
Negative opinions: 0
Neutral opinions: 0
