In [1]:
import json


with open("../data/speeches_lemmatized.json") as f:
    speeches = json.load(f)

# Preprocessing the corpus

In [2]:
from collections import defaultdict


def group_speeches_by_quarter_century(speeches_list):
    """Group speeches into quarter centuries and create text files.

    Args:
        speeches_list: List of dictionaries containing speech data

    Returns:
        Dictionary with quarter century ranges as keys and filenames as values

    """
    # Group speeches by quarter century
    quarter_groups = defaultdict(list)

    for speech in speeches_list:
        year = int(speech["date"][:4])  # Get year from date
        # Calculate quarter century (1800-1824, 1825-1849, etc.)
        quarter_start = year - (year % 25)
        quarter_end = quarter_start + 24
        quarter_key = f"{quarter_start}-{quarter_end}"

        quarter_groups[quarter_key].append(speech)

    # Create text files for each quarter century
    file_paths = {}
    for quarter, speeches in quarter_groups.items():
        # Sort speeches by date
        speeches.sort(key=lambda x: x["date"])

        # Create lemmatized version
        lemma_text = "\n".join(s["lemmatized"] for s in speeches)
        lemma_filename = f"../data/speeches_{quarter}_lemmatized.txt"
        with open(lemma_filename, "w", encoding="utf-8") as f:
            f.write(lemma_text)

        # Create transcript version
        transcript_text = "\n".join(s["transcript"] for s in speeches)
        transcript_filename = f"../data/speeches_{quarter}_transcript.txt"
        with open(transcript_filename, "w", encoding="utf-8") as f:
            f.write(transcript_text)

        file_paths[quarter] = {
            "lemmatized": lemma_filename,
            "transcript": transcript_filename,
            "count": len(speeches),
        }

    # Print summary
    print("Quarter Century Statistics:")
    for quarter, info in file_paths.items():
        print(f"{quarter}: {info['count']} speeches")

    return file_paths


file_paths = group_speeches_by_quarter_century(speeches)

Quarter Century Statistics:
1800-1824: 59 speeches
1900-1924: 91 speeches
1975-1999: 150 speeches
1825-1849: 90 speeches
2000-2024: 164 speeches
1950-1974: 165 speeches
1925-1949: 103 speeches
1875-1899: 99 speeches
1775-1799: 28 speeches
1850-1874: 108 speeches
2025-2049: 2 speeches


# Building language models

In [None]:
from pathlib import Path

from chronowords.algebra.svd import SVDAlgebra


def create_embeddings_for_periods(file_paths, use_lemmatized=True):
    """Create word embeddings for each quarter-century period.

    Args:
        file_paths: Dictionary from group_speeches_by_quarter_century
        use_lemmatized: Whether to use lemmatized or transcript texts

    Returns:
        Dictionary of {period: SVDAlgebra model}

    """
    models = {}

    for period, info in file_paths.items():
        print(f"\nProcessing period {period}")

        # Choose which text version to use
        filename = info["lemmatized" if use_lemmatized else "transcript"]

        # Create generator for the corpus
        def read_corpus():
            with open(filename, encoding="utf-8") as f:
                for line in f:
                    if line.strip():  # Skip empty lines
                        yield line.strip().lower()  # Lowercase everything

        # Initialize and train model
        model = SVDAlgebra(
            n_components=100,  # Smaller dimension for historical texts
            window_size=5,
            min_word_length=3,
            cms_width=1_000_000,  # 1M width should be enough for this corpus
            cms_depth=5,
        )

        try:
            # Train model
            # print(f"Training model for {period}...")
            model.train(read_corpus())

            # Print some statistics
            # print(f"Vocabulary size: {len(model.vocabulary)}")
            # print(f"Sample words: {model.vocabulary[:10]}")

            models[period] = model
        except Exception as e:
            print(f"Error processing period {period}: {e!s}")

    return models


# Create models for both lemmatized and transcript versions
print("Creating models for lemmatized texts...")
lemma_models = create_embeddings_for_periods(file_paths, use_lemmatized=True)

print("\nCreating models for transcript texts...")
transcript_models = create_embeddings_for_periods(file_paths, use_lemmatized=False)

# Save models for later use
for period, model in lemma_models.items():
    save_path = Path(f"../models/lemmatized/{period}")
    save_path.mkdir(parents=True, exist_ok=True)
    model.save_model(save_path)

for period, model in transcript_models.items():
    save_path = Path(f"../models/transcript/{period}")
    save_path.mkdir(parents=True, exist_ok=True)
    model.save_model(save_path)

Creating models for lemmatized texts...

Processing period 1800-1824
Counting words and skipgrams...


# Semantic shift

In [3]:
from pathlib import Path


def load_and_sort_models(base_path="../models/lemmatized"):
    models = {}
    for period_path in Path(base_path).iterdir():
        if period_path.is_dir():
            period = period_path.name
            loaded_model = SVDAlgebra()
            loaded_model.load_model(period_path)
            models[period] = loaded_model

    # Sort by start year
    sorted_periods = sorted(models.keys(), key=lambda x: int(x.split("-")[0]))
    return {period: models[period] for period in sorted_periods}


models = load_and_sort_models()
print("Periods in chronological order:", list(models.keys()))

target_words = [
    "freedom",
    "democracy",
    "constitution",
    "justice",
    "government",
    "power",
    "law",
    "authority",
    "america",
    "union",
    "state",
    "nation",
]

Periods in chronological order: ['1775-1799', '1800-1824', '1825-1849', '1850-1874', '1875-1899', '1900-1924', '1925-1949', '1950-1974', '1975-1999', '2000-2024', '2025-2049']


In [4]:
from chronowords.alignment.procrustes import ProcustesAligner


def analyze_shifts(models, target_words=None):
    """Analyze semantic shifts between consecutive periods."""
    periods = sorted(models.keys())
    results = {}

    for i in range(len(periods) - 1):
        period1, period2 = periods[i], periods[i + 1]
        model1, model2 = models[period1], models[period2]

        # Align embeddings
        aligner = ProcustesAligner()
        metrics = aligner.fit(
            model1.embeddings,
            model2.embeddings,
            model1.vocabulary,
            model2.vocabulary,
        )

        print(f"\nAligned {period1} -> {period2}")
        print(f"Aligned words: {metrics.num_aligned_words}")
        print(f"Average similarity: {metrics.average_cosine_similarity:.3f}")

        # Analyze specific words
        if target_words:
            shifts = []
            for word in target_words:
                sim = aligner.get_word_similarity(
                    word, model1.embeddings, model2.embeddings
                )
                if sim is not None:
                    shifts.append((word, 1 - sim))  # Convert to distance

            # Sort by shift magnitude
            shifts.sort(key=lambda x: x[1], reverse=True)
            results[f"{period1}->{period2}"] = shifts

            print("\nTop shifted words:")
            for word, shift in shifts[:5]:
                print(f"{word}: {shift:.3f}")

    return results


# Analyze key political concepts
target_words = [
    "freedom",
    "democracy",
    "government",
    "power",
    "war",
    "peace",
    "america",
    "union",
    "state",
    "constitution",
    "rights",
    "justice",
    "law",
]

shifts = analyze_shifts(models, target_words)


Aligned 1775-1799 -> 1800-1824
Aligned words: 729
Average similarity: 0.315

Top shifted words:
freedom: 1.116
power: 0.828
state: 0.804
war: 0.760
law: 0.722

Aligned 1800-1824 -> 1825-1849
Aligned words: 795
Average similarity: 0.296

Top shifted words:
peace: 0.799
constitution: 0.764
power: 0.721
union: 0.715
state: 0.709

Aligned 1825-1849 -> 1850-1874
Aligned words: 822
Average similarity: 0.299

Top shifted words:
government: 0.876
constitution: 0.799
war: 0.772
state: 0.761
union: 0.740

Aligned 1850-1874 -> 1875-1899
Aligned words: 779
Average similarity: 0.305

Top shifted words:
freedom: 0.956
constitution: 0.859
state: 0.802
america: 0.757
government: 0.715

Aligned 1875-1899 -> 1900-1924
Aligned words: 780
Average similarity: 0.301

Top shifted words:
freedom: 1.115
government: 0.770
america: 0.751
state: 0.742
constitution: 0.723

Aligned 1900-1924 -> 1925-1949
Aligned words: 782
Average similarity: 0.304

Top shifted words:
democracy: 0.983
law: 0.898
government: 0.724


In [5]:
from pathlib import Path

import altair as alt
import pandas as pd


# Load preprocessed models
models = load_and_sort_models()
periods = sorted(models.keys(), key=lambda x: int(x.split("-")[0]))

shift_data = []
# For each period transition and its shifts
for i in range(len(periods) - 1):
    period1, period2 = periods[i], periods[i + 1]
    period_pair = f"{period1} → {period2}"

    # Get the shift data for this period transition
    model1, model2 = models[period1], models[period2]
    aligner = ProcustesAligner()
    metrics = aligner.fit(
        model1.embeddings,
        model2.embeddings,
        model1.vocabulary,
        model2.vocabulary,
    )

    # Calculate shifts for target words
    for word in target_words:
        sim = aligner.get_word_similarity(word, model1.embeddings, model2.embeddings)
        if sim is not None:
            shift_data.append(
                {
                    "period": period_pair,
                    "word": word,
                    "shift": 1 - sim,  # Convert similarity to distance
                    "year": int(period1.split("-")[0]),
                }
            )

# Create DataFrame
df = pd.DataFrame(shift_data)

timeline = (
    alt.Chart(df)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:Q", title="Year"),
        y=alt.Y("shift:Q", title="Semantic Change"),
        color="word:N",
        tooltip=["word", "year", "shift"],
    )
    .properties(width=800, height=400, title="Semantic Shifts Over Time")
    .interactive()
)

# Display the visualization
timeline

In [6]:
df

Unnamed: 0,period,word,shift,year
0,1775-1799 → 1800-1824,freedom,1.116279,1775
1,1775-1799 → 1800-1824,government,0.640784,1775
2,1775-1799 → 1800-1824,power,0.827967,1775
3,1775-1799 → 1800-1824,war,0.759982,1775
4,1775-1799 → 1800-1824,peace,0.671803,1775
...,...,...,...,...
109,2000-2024 → 2025-2049,america,0.522717,2000
110,2000-2024 → 2025-2049,state,0.769804,2000
111,2000-2024 → 2025-2049,constitution,0.707226,2000
112,2000-2024 → 2025-2049,justice,0.708987,2000


In [7]:
shift_data = []
# Use first period as reference
base_period = periods[0]
base_model = models[base_period]

for current_period in periods[1:]:
    current_model = models[current_period]
    aligner = ProcustesAligner()
    metrics = aligner.fit(
        base_model.embeddings,
        current_model.embeddings,
        base_model.vocabulary,
        current_model.vocabulary,
    )

    for word in target_words:
        sim = aligner.get_word_similarity(
            word, base_model.embeddings, current_model.embeddings
        )
        if sim is not None:
            shift_data.append(
                {"period": current_period, "word": word, "cumulative_shift": 1 - sim}
            )

df_cumulative = pd.DataFrame(shift_data)
df_cumulative["year"] = df_cumulative["period"].apply(lambda x: int(x.split("-")[0]))

cumulative_timeline = (
    alt.Chart(df_cumulative)
    .mark_line(point=True)
    .encode(
        x=alt.X("year:Q", title="Year"),
        y=alt.Y("cumulative_shift:Q", title="Cumulative Semantic Change from 1775"),
        color="word:N",
        tooltip=["word", "year", "cumulative_shift"],
    )
    .properties(
        width=800, height=400, title="Cumulative Semantic Shifts from First Period"
    )
    .interactive()
)

cumulative_timeline

In [8]:
df_cumulative

Unnamed: 0,period,word,cumulative_shift,year
0,1800-1824,freedom,1.116279,1800
1,1800-1824,government,0.640784,1800
2,1800-1824,power,0.827967,1800
3,1800-1824,war,0.759982,1800
4,1800-1824,peace,0.671803,1800
...,...,...,...,...
104,2025-2049,america,0.634986,2025
105,2025-2049,state,0.632132,2025
106,2025-2049,constitution,0.696486,2025
107,2025-2049,justice,0.588670,2025


# Topic modeling with NMF

In [9]:
from chronowords.topics.nmf import TopicModel


def analyze_topics_by_period(models, n_topics=10):
    """Create topic models for each time period using existing PPMI matrices.

    Args:
        models: Dictionary of period -> model data from load_and_sort_models()
        n_topics: Number of topics to extract

    """
    topic_models = {}

    for period, model_data in models.items():
        print(f"\nAnalyzing topics for {period}")

        # Get embeddings and vocabulary
        embeddings = model_data.M_dense
        embeddings[embeddings < 0.0] = 0.0  # quick fix for negative values
        vocabulary = model_data.vocabulary

        # Create topic model on existing embeddings
        topic_model = TopicModel(n_topics=n_topics)
        topic_model.fit(embeddings, vocabulary)

        # Print topics
        print("\nTop topics:")
        topic_model.print_topics()

        topic_models[period] = topic_model

    return topic_models


# Create topic models using existing data
topic_models = analyze_topics_by_period(models)


Analyzing topics for 1775-1799





Top topics:

Topic 0:
  service: 0.0034
  can: 0.0032
  violence: 0.0031
  london: 0.0029
  try: 0.0026
  acknowledgment: 0.0026
  upon: 0.0026
  interrupt: 0.0025
  from: 0.0025
  competent: 0.0025

Topic 1:
  never: 0.0035
  lead: 0.0031
  proportion: 0.0030
  executive: 0.0029
  condemnation: 0.0028
  academy: 0.0028
  house: 0.0027
  moderation: 0.0027
  feel: 0.0027
  heretofore: 0.0026

Topic 2:
  invite: 0.0029
  within: 0.0027
  amity: 0.0027
  four: 0.0026
  unite: 0.0026
  preservation: 0.0025
  heretofore: 0.0024
  ill: 0.0024
  expedition: 0.0024
  anxiety: 0.0022

Topic 3:
  september: 0.0032
  forget: 0.0031
  accumulation: 0.0027
  senekas: 0.0027
  obedience: 0.0027
  contribute: 0.0026
  conducive: 0.0026
  fortitude: 0.0025
  passamaquoddy: 0.0025
  contract: 0.0024

Topic 4:
  throw: 0.0030
  eye: 0.0030
  candid: 0.0029
  advantage: 0.0029
  offender: 0.0028
  since: 0.0027
  addition: 0.0027
  local: 0.0027
  age: 0.0027
  process: 0.0027

Topic 5:
  intention: 0.




Top topics:

Topic 0:
  improvement: 0.0037
  demand: 0.0032
  omit: 0.0028
  aid: 0.0026
  hundred: 0.0026
  return: 0.0026
  store: 0.0025
  little: 0.0025
  annual: 0.0025
  persevere: 0.0025

Topic 1:
  execution: 0.0030
  safety: 0.0028
  appoint: 0.0028
  operation: 0.0026
  seat: 0.0026
  likewise: 0.0026
  prevail: 0.0026
  difficulty: 0.0025
  survey: 0.0025
  jurisdiction: 0.0025

Topic 2:
  high: 0.0029
  port: 0.0029
  otherwise: 0.0029
  may: 0.0028
  exertion: 0.0027
  together: 0.0027
  state: 0.0026
  like: 0.0026
  subsequent: 0.0025
  event: 0.0024

Topic 3:
  employ: 0.0035
  force: 0.0029
  suffer: 0.0029
  basis: 0.0029
  have: 0.0028
  duly: 0.0028
  mean: 0.0026
  necessary: 0.0026
  employment: 0.0025
  among: 0.0025

Topic 4:
  sustain: 0.0032
  interesting: 0.0028
  next: 0.0028
  title: 0.0027
  his: 0.0026
  toward: 0.0026
  local: 0.0025
  amount: 0.0024
  take: 0.0024
  town: 0.0024

Topic 5:
  difficult: 0.0035
  stock: 0.0033
  particular: 0.0033
  repo




Top topics:

Topic 0:
  manner: 0.0036
  consult: 0.0031
  relate: 0.0028
  note: 0.0026
  day: 0.0025
  exercise: 0.0025
  oppose: 0.0025
  honor: 0.0025
  more: 0.0025
  material: 0.0025

Topic 1:
  negotiate: 0.0035
  grant: 0.0028
  difficulty: 0.0028
  revolution: 0.0027
  equal: 0.0027
  two: 0.0027
  yet: 0.0026
  guaranty: 0.0026
  east: 0.0025
  inquiry: 0.0025

Topic 2:
  provision: 0.0030
  tendency: 0.0030
  distribution: 0.0028
  additional: 0.0027
  consist: 0.0027
  french: 0.0027
  keep: 0.0025
  consideration: 0.0025
  compromise: 0.0025
  settle: 0.0024

Topic 3:
  proceed: 0.0030
  offense: 0.0029
  inconvenience: 0.0026
  and: 0.0026
  any: 0.0026
  pursuit: 0.0026
  commander: 0.0026
  ordinary: 0.0025
  revenue: 0.0024
  constitution: 0.0024

Topic 4:
  advance: 0.0032
  capture: 0.0029
  exclude: 0.0028
  fully: 0.0027
  receipt: 0.0027
  close: 0.0027
  powerful: 0.0027
  proceeding: 0.0027
  friendly: 0.0026
  economy: 0.0026

Topic 5:
  claim: 0.0033
  occupy




Top topics:

Topic 0:
  senators: 0.0032
  domestic: 0.0032
  final: 0.0030
  river: 0.0030
  legislation: 0.0029
  deem: 0.0027
  answer: 0.0026
  claim: 0.0026
  begin: 0.0026
  patriotic: 0.0026

Topic 1:
  general: 0.0033
  invasion: 0.0030
  way: 0.0030
  doubt: 0.0028
  ratification: 0.0027
  distant: 0.0027
  suitable: 0.0026
  once: 0.0026
  connection: 0.0025
  france: 0.0025

Topic 2:
  offense: 0.0029
  consideration: 0.0029
  cent: 0.0028
  function: 0.0028
  support: 0.0028
  whose: 0.0027
  without: 0.0027
  complaint: 0.0026
  discharge: 0.0026
  present: 0.0025

Topic 3:
  essential: 0.0031
  nature: 0.0030
  examine: 0.0028
  permit: 0.0027
  ready: 0.0026
  hostility: 0.0025
  suggestion: 0.0024
  person: 0.0024
  yourself: 0.0024
  fourth: 0.0023

Topic 4:
  hold: 0.0033
  would: 0.0030
  thirty: 0.0029
  owner: 0.0028
  bill: 0.0028
  colony: 0.0026
  california: 0.0026
  legislature: 0.0024
  receipt: 0.0024
  those: 0.0023

Topic 5:
  violation: 0.0032
  see: 0.0




Top topics:

Topic 0:
  thereof: 0.0031
  taxis: 0.0026
  his: 0.0026
  allotment: 0.0026
  enact: 0.0026
  case: 0.0025
  practice: 0.0025
  await: 0.0024
  japan: 0.0024
  citizen: 0.0023

Topic 1:
  commerce: 0.0030
  declaration: 0.0028
  gratifying: 0.0027
  completion: 0.0027
  field: 0.0026
  selection: 0.0026
  possession: 0.0025
  offense: 0.0025
  character: 0.0025
  her: 0.0025

Topic 2:
  recommendation: 0.0026
  dispose: 0.0026
  entirely: 0.0026
  complete: 0.0026
  tribunal: 0.0026
  expense: 0.0026
  little: 0.0025
  deficiency: 0.0025
  date: 0.0025
  equal: 0.0025

Topic 3:
  date: 0.0029
  appoint: 0.0026
  saving: 0.0026
  limit: 0.0025
  jurisdiction: 0.0025
  show: 0.0024
  refer: 0.0024
  valuable: 0.0024
  can: 0.0024
  deposit: 0.0024

Topic 4:
  object: 0.0036
  legislative: 0.0031
  some: 0.0029
  exposition: 0.0028
  financial: 0.0027
  pursue: 0.0026
  justly: 0.0026
  branch: 0.0026
  import: 0.0026
  security: 0.0026

Topic 5:
  island: 0.0031
  york: 0.




Top topics:

Topic 0:
  child: 0.0032
  amendment: 0.0026
  actual: 0.0026
  loss: 0.0026
  expansion: 0.0025
  troop: 0.0025
  standard: 0.0025
  movement: 0.0025
  fashion: 0.0024
  confer: 0.0024

Topic 1:
  commander: 0.0029
  adjustment: 0.0027
  stock: 0.0026
  beyond: 0.0025
  british: 0.0025
  manufacturer: 0.0025
  report: 0.0025
  least: 0.0024
  farmer: 0.0024
  protection: 0.0023

Topic 2:
  field: 0.0029
  their: 0.0027
  seven: 0.0027
  traffic: 0.0025
  fix: 0.0025
  undertake: 0.0025
  season: 0.0025
  useful: 0.0024
  through: 0.0024
  import: 0.0024

Topic 3:
  intention: 0.0035
  industrial: 0.0032
  woman: 0.0029
  oppose: 0.0029
  remedy: 0.0029
  prosecution: 0.0028
  post: 0.0028
  timber: 0.0027
  recommend: 0.0026
  interfere: 0.0025

Topic 4:
  president: 0.0029
  free: 0.0028
  away: 0.0028
  center: 0.0027
  organize: 0.0026
  urge: 0.0025
  exceed: 0.0025
  reclamation: 0.0025
  short: 0.0024
  personal: 0.0024

Topic 5:
  distribute: 0.0032
  civilization




Top topics:

Topic 0:
  confidence: 0.0031
  obtain: 0.0030
  unity: 0.0030
  forth: 0.0028
  asia: 0.0028
  fourth: 0.0027
  thing: 0.0027
  far: 0.0027
  religion: 0.0027
  charge: 0.0026

Topic 1:
  south: 0.0029
  father: 0.0029
  capacity: 0.0028
  both: 0.0027
  obtain: 0.0025
  ally: 0.0024
  certainly: 0.0024
  task: 0.0024
  fighting: 0.0023
  bank: 0.0023

Topic 2:
  long: 0.0030
  cooperative: 0.0030
  course: 0.0030
  france: 0.0028
  congress: 0.0028
  self: 0.0028
  legislation: 0.0027
  afford: 0.0027
  problem: 0.0026
  portion: 0.0026

Topic 3:
  recent: 0.0036
  heavy: 0.0030
  struggle: 0.0029
  race: 0.0027
  immediately: 0.0027
  either: 0.0027
  certainly: 0.0026
  principal: 0.0026
  national: 0.0025
  daily: 0.0025

Topic 4:
  seven: 0.0039
  growth: 0.0031
  time: 0.0030
  careful: 0.0030
  coal: 0.0029
  effect: 0.0028
  possible: 0.0028
  secretary: 0.0027
  next: 0.0027
  allow: 0.0026

Topic 5:
  tax: 0.0032
  each: 0.0029
  study: 0.0028
  training: 0.002




Top topics:

Topic 0:
  doubt: 0.0030
  eight: 0.0028
  operation: 0.0027
  benefit: 0.0027
  raise: 0.0026
  immediate: 0.0026
  major: 0.0026
  study: 0.0026
  union: 0.0025
  long: 0.0025

Topic 1:
  room: 0.0038
  set: 0.0033
  march: 0.0032
  body: 0.0031
  hand: 0.0030
  enforcement: 0.0028
  alone: 0.0028
  presidency: 0.0027
  department: 0.0027
  different: 0.0027

Topic 2:
  political: 0.0030
  development: 0.0030
  quality: 0.0030
  dollar: 0.0029
  truly: 0.0029
  ought: 0.0029
  truman: 0.0029
  bad: 0.0028
  fight: 0.0028
  paris: 0.0027

Topic 3:
  mankind: 0.0032
  today: 0.0031
  threaten: 0.0031
  inflation: 0.0031
  during: 0.0030
  generally: 0.0029
  assume: 0.0028
  city: 0.0028
  hopeful: 0.0028
  these: 0.0028

Topic 4:
  matter: 0.0034
  relationship: 0.0032
  diplomatic: 0.0031
  southeast: 0.0029
  federal: 0.0029
  aid: 0.0028
  payment: 0.0027
  senate: 0.0027
  love: 0.0027
  accord: 0.0026

Topic 5:
  duty: 0.0044
  staff: 0.0032
  action: 0.0030
  befor




Top topics:

Topic 0:
  clearly: 0.0032
  employment: 0.0032
  how: 0.0031
  march: 0.0030
  total: 0.0029
  some: 0.0029
  economic: 0.0029
  fail: 0.0028
  dukakis: 0.0028
  pursue: 0.0028

Topic 1:
  benefit: 0.0040
  would: 0.0037
  200: 0.0036
  half: 0.0035
  waste: 0.0033
  search: 0.0032
  able: 0.0032
  gulf: 0.0032
  basic: 0.0031
  wide: 0.0031

Topic 2:
  put: 0.0037
  washington: 0.0032
  january: 0.0030
  private: 0.0029
  city: 0.0028
  down: 0.0027
  aid: 0.0027
  keep: 0.0027
  independence: 0.0027
  court: 0.0026

Topic 3:
  all: 0.0035
  step: 0.0031
  accomplish: 0.0031
  affirmative: 0.0028
  salt: 0.0028
  alliance: 0.0027
  close: 0.0027
  attempt: 0.0027
  town: 0.0026
  implement: 0.0026

Topic 4:
  where: 0.0038
  lead: 0.0038
  require: 0.0035
  destroy: 0.0034
  presidency: 0.0033
  enact: 0.0032
  modern: 0.0032
  foreign: 0.0031
  people: 0.0030
  balance: 0.0030

Topic 5:
  quote: 0.0037
  americans: 0.0036
  save: 0.0035
  belief: 0.0033
  ford: 0.0031





Top topics:

Topic 0:
  program: 0.0031
  race: 0.0031
  think: 0.0030
  believe: 0.0030
  uniform: 0.0030
  history: 0.0029
  institution: 0.0029
  response: 0.0028
  liberty: 0.0027
  her: 0.0026

Topic 1:
  his: 0.0042
  palestinian: 0.0038
  light: 0.0032
  white: 0.0031
  guarantee: 0.0031
  solution: 0.0031
  center: 0.0030
  line: 0.0030
  role: 0.0029
  afghanistan: 0.0028

Topic 2:
  reject: 0.0035
  mark: 0.0032
  audience: 0.0030
  enough: 0.0029
  judge: 0.0029
  life: 0.0029
  tear: 0.0028
  iraqis: 0.0027
  six: 0.0027
  dollar: 0.0027

Topic 3:
  usa: 0.0046
  just: 0.0038
  dr.: 0.0034
  reward: 0.0031
  approach: 0.0030
  cooperation: 0.0030
  secretary: 0.0029
  read: 0.0029
  date: 0.0029
  oppose: 0.0029

Topic 4:
  ahead: 0.0039
  brave: 0.0034
  food: 0.0034
  man: 0.0032
  authority: 0.0032
  study: 0.0030
  order: 0.0029
  whole: 0.0029
  along: 0.0028
  tell: 0.0028

Topic 5:
  hear: 0.0038
  organization: 0.0036
  innocent: 0.0035
  imagine: 0.0034
  the: 0.0



In [11]:
def process_topic_evolution(topic_models):
    """Process topic models into evolution chains for visualization.

    Args:
        topic_models: Dictionary mapping periods to TopicModel instances

    Returns:
        List of dicts containing evolution data for visualization

    """
    all_data = []
    # Sort periods chronologically
    periods = sorted(topic_models.keys(), key=lambda x: int(x.split("-")[0]))

    # Initialize chains from first period
    first_period = periods[0]
    first_model = topic_models[first_period]

    # Create initial topic chains
    chains = {}
    for idx, topic in enumerate(first_model.topics):
        chain_id = f"Chain_{idx}"
        top_words = [word for word, _ in topic.words[:3]]
        topic_label = ", ".join(top_words)

        chains[chain_id] = {
            "id": chain_id,
            "current_topic": topic,
            "label": topic_label,
        }

        # Add first period data point
        all_data.append(
            {
                "year": int(first_period.split("-")[0]),
                "period": first_period,
                "chain_id": chain_id,
                "topic_label": topic_label,
                "weight": topic.words[0][1],  # Use weight of top word
                "words": topic_label,
            }
        )

    # Follow alignments through subsequent periods
    for i in range(len(periods) - 1):
        period1, period2 = periods[i], periods[i + 1]
        model1 = topic_models[period1]
        model2 = topic_models[period2]

        # Get aligned topics
        aligned_topics = model1.align_with(model2)

        # Update chains based on alignments
        new_chains = {}
        used_target_topics = set()

        for aligned in aligned_topics:
            # Find chain containing source topic
            source_chain = None
            for chain_id, chain in chains.items():
                if chain["current_topic"].id == aligned.source_topic.id:
                    source_chain = chain
                    break

            if source_chain and aligned.target_topic.id not in used_target_topics:
                chain_id = source_chain["id"]
                top_words = [word for word, _ in aligned.target_topic.words[:3]]
                topic_label = ", ".join(top_words)

                # Add data point for this period
                all_data.append(
                    {
                        "year": int(period2.split("-")[0]),
                        "period": period2,
                        "chain_id": chain_id,
                        "topic_label": source_chain[
                            "label"
                        ],  # Keep original label for continuity
                        "weight": aligned.target_topic.words[0][1],
                        "words": topic_label,  # Current words for tooltip
                    }
                )

                # Update chain
                new_chains[chain_id] = {
                    "id": chain_id,
                    "current_topic": aligned.target_topic,
                    "label": source_chain["label"],
                }
                used_target_topics.add(aligned.target_topic.id)

        chains = new_chains

    return all_data


def plot_topic_trends(topic_models):
    """Create interactive visualization of topic evolution over time.

    Args:
        topic_models: Dictionary mapping periods to TopicModel instances

    Returns:
        altair.Chart: Interactive visualization

    """
    import altair as alt
    import pandas as pd

    # Process data
    all_data = process_topic_evolution(topic_models)
    df = pd.DataFrame(all_data)

    # Create base chart
    base = alt.Chart(df).encode(
        x=alt.X(
            "year:Q",
            title="Year",
            scale=alt.Scale(domain=[df.year.min(), df.year.max()]),
        ),
        color=alt.Color(
            "topic_label:N",
            legend=alt.Legend(title="Topics", orient="bottom", columns=2),
        ),
        tooltip=[
            alt.Tooltip("year:Q", title="Year"),
            alt.Tooltip("words:N", title="Top Words"),
            alt.Tooltip("weight:Q", title="Weight", format=".3f"),
        ],
    )

    # Create line chart with points
    lines = base.mark_line(size=2).encode(
        y=alt.Y("weight:Q", title="Topic Weight"), detail="chain_id:N"
    )

    points = base.mark_circle(size=60).encode(y=alt.Y("weight:Q", title="Topic Weight"))

    # Combine charts
    chart = (
        (lines + points)
        .properties(width=800, height=400, title="Topic Evolution Over Time")
        .interactive()
    )

    # Add selection
    topic_selection = alt.selection_point(fields=["topic_label"], bind="legend")

    chart = chart.add_params(topic_selection).encode(
        opacity=alt.condition(topic_selection, alt.value(1), alt.value(0.2))
    )

    return chart


# Example usage:
topic_models = analyze_topics_by_period(models)
chart = plot_topic_trends(topic_models)
chart
# chart.save('topic_evolution.html')  # Save interactive chart


Analyzing topics for 1775-1799





Top topics:

Topic 0:
  kind: 0.0029
  acknowledgment: 0.0028
  notify: 0.0027
  complaint: 0.0025
  from: 0.0025
  seek: 0.0025
  credit: 0.0025
  contain: 0.0024
  humane: 0.0023
  maxim: 0.0023

Topic 1:
  lead: 0.0030
  moderation: 0.0030
  never: 0.0028
  executive: 0.0028
  house: 0.0027
  proportion: 0.0026
  insure: 0.0026
  feel: 0.0026
  heretofore: 0.0025
  inform: 0.0025

Topic 2:
  repel: 0.0039
  neutral: 0.0034
  establish: 0.0028
  itself: 0.0027
  constitutional: 0.0027
  view: 0.0027
  source: 0.0027
  propose: 0.0026
  usual: 0.0026
  indeed: 0.0025

Topic 3:
  intend: 0.0029
  facilitate: 0.0029
  extinguishment: 0.0028
  7th: 0.0027
  mischief: 0.0026
  whose: 0.0025
  consolation: 0.0025
  judgment: 0.0024
  connection: 0.0024
  mutual: 0.0024

Topic 4:
  september: 0.0032
  anxiety: 0.0032
  senekas: 0.0028
  vest: 0.0026
  relate: 0.0024
  proceed: 0.0024
  recommendation: 0.0024
  passamaquoddy: 0.0024
  conducive: 0.0024
  enable: 0.0024

Topic 5:
  offender:




Top topics:

Topic 0:
  have: 0.0032
  duly: 0.0032
  basis: 0.0030
  recollect: 0.0029
  rapidly: 0.0029
  provinces: 0.0027
  inland: 0.0027
  necessary: 0.0026
  employ: 0.0025
  restrain: 0.0024

Topic 1:
  operation: 0.0029
  execution: 0.0028
  wish: 0.0028
  likewise: 0.0028
  appoint: 0.0028
  jurisdiction: 0.0027
  survey: 0.0026
  justice: 0.0025
  prevail: 0.0025
  safety: 0.0025

Topic 2:
  arise: 0.0028
  high: 0.0027
  with: 0.0027
  description: 0.0026
  probable: 0.0025
  port: 0.0025
  ensue: 0.0025
  disturb: 0.0025
  visit: 0.0025
  pressure: 0.0024

Topic 3:
  come: 0.0031
  bless: 0.0029
  seat: 0.0028
  ought: 0.0028
  what: 0.0027
  make: 0.0027
  remedy: 0.0026
  turn: 0.0026
  negotiation: 0.0025
  question: 0.0025

Topic 4:
  faithful: 0.0031
  wrong: 0.0030
  friendship: 0.0029
  condition: 0.0029
  difficult: 0.0028
  import: 0.0028
  confine: 0.0028
  resource: 0.0028
  militia: 0.0027
  deficiency: 0.0027

Topic 5:
  next: 0.0036
  florida: 0.0035
  inter




Top topics:

Topic 0:
  however: 0.0031
  yet: 0.0030
  early: 0.0027
  relate: 0.0027
  harbor: 0.0026
  paper: 0.0026
  june: 0.0026
  immediate: 0.0025
  greatly: 0.0025
  change: 0.0025

Topic 1:
  violation: 0.0030
  sign: 0.0029
  confederacy: 0.0029
  mind: 0.0028
  whole: 0.0028
  prosecution: 0.0027
  united: 0.0026
  not: 0.0025
  past: 0.0025
  appropriate: 0.0025

Topic 2:
  close: 0.0033
  difficulty: 0.0032
  advance: 0.0031
  postmaster: 0.0029
  negotiate: 0.0028
  european: 0.0027
  controversy: 0.0026
  self: 0.0026
  rely: 0.0025
  1846: 0.0025

Topic 3:
  indulge: 0.0033
  provision: 0.0032
  debt: 0.0028
  produce: 0.0027
  unjust: 0.0027
  navy: 0.0026
  frontier: 0.0026
  additional: 0.0026
  accordingly: 0.0026
  disturb: 0.0025

Topic 4:
  indians: 0.0035
  consist: 0.0033
  source: 0.0029
  expenditure: 0.0028
  attempt: 0.0028
  installment: 0.0028
  still: 0.0027
  june: 0.0027
  dispute: 0.0027
  arise: 0.0027

Topic 5:
  land: 0.0029
  form: 0.0028
  modi




Top topics:

Topic 0:
  favorable: 0.0028
  geographical: 0.0026
  controversy: 0.0026
  reference: 0.0026
  progress: 0.0025
  very: 0.0025
  1850: 0.0025
  execution: 0.0025
  their: 0.0025
  turn: 0.0025

Topic 1:
  side: 0.0030
  permit: 0.0028
  derive: 0.0028
  presence: 0.0027
  efficiency: 0.0027
  withdraw: 0.0027
  represent: 0.0026
  settle: 0.0026
  share: 0.0026
  custom: 0.0026

Topic 2:
  advantage: 0.0030
  confine: 0.0026
  age: 0.0025
  currency: 0.0025
  material: 0.0024
  three: 0.0024
  recognition: 0.0023
  supply: 0.0023
  admission: 0.0023
  obligation: 0.0023

Topic 3:
  hold: 0.0033
  california: 0.0030
  thirty: 0.0029
  would: 0.0028
  owner: 0.0026
  transportation: 0.0025
  bill: 0.0025
  receipt: 0.0025
  decision: 0.0024
  sectional: 0.0024

Topic 4:
  whereas: 0.0031
  commend: 0.0031
  old: 0.0030
  republic: 0.0029
  more: 0.0028
  favor: 0.0026
  aid: 0.0026
  application: 0.0026
  matter: 0.0026
  fear: 0.0024

Topic 5:
  nature: 0.0029
  ship: 0.0




Top topics:

Topic 0:
  thereof: 0.0032
  practice: 0.0031
  circumstance: 0.0027
  case: 0.0026
  his: 0.0026
  division: 0.0025
  speedy: 0.0025
  concede: 0.0025
  select: 0.0024
  town: 0.0024

Topic 1:
  possession: 0.0030
  influence: 0.0028
  canal: 0.0028
  safe: 0.0028
  british: 0.0027
  coin: 0.0026
  field: 0.0026
  exhibition: 0.0026
  extend: 0.0026
  opportunity: 0.0025

Topic 2:
  gratifying: 0.0031
  ballot: 0.0030
  connection: 0.0029
  little: 0.0028
  compel: 0.0028
  paris: 0.0028
  argument: 0.0028
  bear: 0.0027
  dispute: 0.0026
  date: 0.0025

Topic 3:
  limit: 0.0027
  detail: 0.0027
  another: 0.0026
  whom: 0.0026
  spain: 0.0025
  race: 0.0025
  violence: 0.0024
  germany: 0.0024
  date: 0.0024
  can: 0.0024

Topic 4:
  expense: 0.0037
  legislative: 0.0033
  pass: 0.0031
  assurance: 0.0030
  dispatch: 0.0029
  pound: 0.0028
  import: 0.0027
  proceed: 0.0025
  some: 0.0025
  loss: 0.0025

Topic 5:
  preservation: 0.0027
  accept: 0.0026
  which: 0.0026
 




Top topics:

Topic 0:
  sufficient: 0.0032
  easy: 0.0028
  recommend: 0.0026
  comprehensive: 0.0026
  freedom: 0.0025
  nature: 0.0025
  confer: 0.0025
  damage: 0.0025
  have: 0.0024
  administer: 0.0024

Topic 1:
  commander: 0.0031
  govern: 0.0028
  improvement: 0.0027
  force: 0.0026
  committee: 0.0025
  adjustment: 0.0025
  industry: 0.0025
  enact: 0.0024
  second: 0.0024
  resolution: 0.0024

Topic 2:
  troop: 0.0026
  passage: 0.0025
  child: 0.0025
  expansion: 0.0025
  period: 0.0024
  pende: 0.0024
  december: 0.0023
  foundation: 0.0022
  amendment: 0.0022
  undertake: 0.0022

Topic 3:
  recently: 0.0033
  maintain: 0.0029
  point: 0.0027
  joint: 0.0026
  bring: 0.0026
  establishment: 0.0026
  between: 0.0025
  fully: 0.0025
  render: 0.0025
  afford: 0.0025

Topic 4:
  stock: 0.0030
  else: 0.0030
  influence: 0.0029
  ownership: 0.0027
  americans: 0.0025
  final: 0.0025
  refer: 0.0025
  patriotic: 0.0024
  conviction: 0.0024
  region: 0.0024

Topic 5:
  center: 0




Top topics:

Topic 0:
  certainly: 0.0029
  freedom: 0.0028
  south: 0.0027
  expenditure: 0.0027
  science: 0.0027
  achieve: 0.0026
  gain: 0.0026
  allies: 0.0025
  everywhere: 0.0024
  refuse: 0.0024

Topic 1:
  seven: 0.0040
  careful: 0.0031
  secretary: 0.0030
  possible: 0.0030
  time: 0.0028
  wealth: 0.0028
  next: 0.0028
  make: 0.0028
  leadership: 0.0028
  growth: 0.0027

Topic 2:
  behind: 0.0032
  fourth: 0.0031
  confidence: 0.0031
  private: 0.0026
  legislative: 0.0026
  thing: 0.0026
  charge: 0.0025
  purchase: 0.0025
  create: 0.0024
  raw: 0.0024

Topic 3:
  prohibition: 0.0038
  heavy: 0.0028
  council: 0.0028
  recent: 0.0027
  refer: 0.0027
  good: 0.0026
  either: 0.0026
  strengthen: 0.0025
  generation: 0.0025
  primary: 0.0024

Topic 4:
  situation: 0.0032
  except: 0.0029
  problem: 0.0028
  world: 0.0028
  with: 0.0027
  committee: 0.0026
  render: 0.0026
  congress: 0.0026
  abroad: 0.0025
  yes: 0.0025

Topic 5:
  participate: 0.0037
  fair: 0.0027
  h




Top topics:

Topic 0:
  senate: 0.0031
  matter: 0.0029
  diplomatic: 0.0028
  member: 0.0027
  accord: 0.0027
  proper: 0.0027
  nuclear: 0.0027
  casualty: 0.0027
  training: 0.0026
  also: 0.0025

Topic 1:
  debt: 0.0036
  continent: 0.0034
  today: 0.0032
  these: 0.0032
  than: 0.0031
  win: 0.0029
  effectively: 0.0028
  laos: 0.0028
  secondly: 0.0028
  budget: 0.0028

Topic 2:
  hopeful: 0.0033
  eight: 0.0033
  rather: 0.0033
  conversation: 0.0032
  that: 0.0030
  during: 0.0028
  associate: 0.0028
  minute: 0.0027
  complete: 0.0027
  sign: 0.0027

Topic 3:
  assume: 0.0036
  completely: 0.0033
  would: 0.0032
  service: 0.0029
  set: 0.0028
  and: 0.0028
  equality: 0.0027
  influence: 0.0027
  march: 0.0027
  presidency: 0.0027

Topic 4:
  succeed: 0.0038
  troop: 0.0034
  nam: 0.0034
  vietnam: 0.0033
  word: 0.0032
  result: 0.0030
  salary: 0.0030
  direct: 0.0029
  race: 0.0029
  presidential: 0.0028

Topic 5:
  long: 0.0035
  quality: 0.0033
  american: 0.0033
  trul




Top topics:

Topic 0:
  duty: 0.0040
  prosperity: 0.0031
  class: 0.0031
  how: 0.0030
  being: 0.0029
  amendment: 0.0029
  when: 0.0028
  case: 0.0027
  simply: 0.0027
  gentleman: 0.0027

Topic 1:
  cover: 0.0032
  city: 0.0032
  reserve: 0.0032
  stop: 0.0031
  down: 0.0031
  east: 0.0031
  faith: 0.0031
  opportunity: 0.0030
  available: 0.0030
  gun: 0.0029

Topic 2:
  accomplish: 0.0034
  all: 0.0033
  show: 0.0031
  aggression: 0.0030
  environment: 0.0029
  close: 0.0029
  affirmative: 0.0029
  encourage: 0.0029
  too: 0.0029
  three: 0.0028

Topic 3:
  strengthen: 0.0031
  wall: 0.0030
  short: 0.0029
  example: 0.0029
  iraq: 0.0029
  minority: 0.0028
  establish: 0.0028
  everywhere: 0.0028
  trade: 0.0027
  tie: 0.0027

Topic 4:
  senate: 0.0036
  within: 0.0035
  action: 0.0033
  danger: 0.0032
  political: 0.0032
  foreign: 0.0031
  knowledge: 0.0030
  ahead: 0.0030
  king: 0.0029
  ability: 0.0028

Topic 5:
  representative: 0.0035
  need: 0.0034
  quote: 0.0032
  pol




Top topics:

Topic 0:
  solution: 0.0036
  africa: 0.0035
  laden: 0.0033
  process: 0.0032
  governor: 0.0031
  training: 0.0030
  discussion: 0.0029
  treat: 0.0029
  debt: 0.0029
  testing: 0.0028

Topic 1:
  try: 0.0040
  authority: 0.0038
  center: 0.0034
  brave: 0.0034
  man: 0.0033
  qaeda: 0.0032
  deserve: 0.0032
  special: 0.0032
  fine: 0.0031
  five: 0.0031

Topic 2:
  reform: 0.0042
  anywhere: 0.0032
  remember: 0.0031
  usa: 0.0031
  extend: 0.0030
  why: 0.0030
  court: 0.0029
  hard: 0.0029
  will: 0.0029
  nation: 0.0028

Topic 3:
  open: 0.0033
  pressure: 0.0032
  toward: 0.0030
  may: 0.0028
  strongly: 0.0028
  never: 0.0027
  fuel: 0.0026
  factory: 0.0026
  draw: 0.0026
  problem: 0.0026

Topic 4:
  keep: 0.0041
  entrepreneur: 0.0033
  ship: 0.0033
  announce: 0.0031
  wrong: 0.0030
  suppose: 0.0029
  lose: 0.0029
  impose: 0.0029
  senate: 0.0029
  stake: 0.0028

Topic 5:
  dollar: 0.0040
  audience: 0.0035
  trade: 0.0032
  wonder: 0.0032
  civilian: 0.003




Top topics:

Topic 0:
  vice: 0.0034
  earn: 0.0034
  disposal: 0.0033
  drug: 0.0033
  pull: 0.0031
  obligation: 0.0031
  punish: 0.0031
  oligarchy: 0.0030
  now: 0.0029
  optimistic: 0.0029

Topic 1:
  threat: 0.0037
  unlimited: 0.0032
  purpose: 0.0032
  harness: 0.0031
  into: 0.0031
  about: 0.0030
  completely: 0.0029
  totally: 0.0029
  whom: 0.0029
  should: 0.0029

Topic 2:
  network: 0.0031
  tech: 0.0031
  immediately: 0.0030
  vehicle: 0.0030
  waste: 0.0030
  female: 0.0029
  back: 0.0029
  check: 0.0029
  stay: 0.0029
  far: 0.0028

Topic 3:
  sway: 0.0039
  future: 0.0038
  raise: 0.0033
  reclaim: 0.0033
  security: 0.0031
  object: 0.0031
  external: 0.0031
  transition: 0.0030
  walk: 0.0029
  office: 0.0029

Topic 4:
  life: 0.0034
  beautiful: 0.0033
  earn: 0.0033
  day: 0.0032
  meanwhile: 0.0031
  provide: 0.0030
  chief: 0.0029
  honest: 0.0028
  great: 0.0027
  gold: 0.0027

Topic 5:
  expel: 0.0041
  harbor: 0.0037
  unjustly: 0.0031
  consequential: 0.003