In [None]:
import pandas as pd

# Load the US speeches dataset
df = pd.read_csv("data/discours_US.csv", sep="\t", engine="python")

print("Dataset shape:", df.shape)
df.head()

In [None]:
# Distribution of speeches per speaker
df["speaker"].value_counts().head(10)

In [None]:
from dataset_builders import build_corpus_from_discours_us

corpus = build_corpus_from_discours_us(
    "../data/discours_US.csv",
    corpus_name="Discours US (sentences)"
)

print("Nombre de documents (phrases) :", corpus.ndoc)
print("Nombre d'auteurs :", corpus.naut)

In [None]:
# TD6 - simple search
snippets = corpus.search("america")
snippets[:3]

# TD6 - concordancer
corpus.concorde(r"\bamerica\b", context=30).head()

In [None]:
from SearchEngine import SearchEngine

engine = SearchEngine(corpus)
print("Search engine ready.")

In [None]:
engine.search(
    "america freedom",
    top_n=5,
    use_tfidf=True,
    show_progress=True
)

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output
title = widgets.Label(value="ðŸ”Ž Moteur de recherche â€“ Discours US (TD8)")

query_box = widgets.Text(
    description="Mots-clÃ©s :",
    placeholder="ex: america freedom"
)

top_slider = widgets.IntSlider(
    value=5,
    min=1,
    max=20,
    step=1,
    description="Top N :"
)

search_btn = widgets.Button(description="Rechercher")
output = widgets.Output()

def on_search_clicked(_):
    with output:
        clear_output()
        query = query_box.value
        n = top_slider.value
        if not query.strip():
            print("Veuillez entrer des mots-clÃ©s.")
            return
        results = engine.search(
            query,
            top_n=n,
            use_tfidf=True,
            show_progress=False
        )
        display(results)

search_btn.on_click(on_search_clicked)

ui = widgets.VBox([
    title,
    widgets.HBox([query_box, top_slider, search_btn]),
    output
])

display(ui)


In [None]:
from explorer import Explorer

explorer = Explorer(corpus)

explorer.compare_by_type("Document", "Document", top_n=10)

trend = explorer.temporal_trend("america", freq="Y")
trend

import matplotlib.pyplot as plt

plt.figure(figsize=(8,4))
plt.plot(trend["period"], trend["rel_freq"], marker="o")
plt.title("Ã‰volution temporelle du mot 'america'")
plt.xlabel("AnnÃ©e")
plt.ylabel("FrÃ©quence relative")
plt.grid(True)
plt.show()
