In [None]:
from impresso import connect, AND, OR

impresso = connect("https://dev.impresso-project.ch/public-api", persisted_token=True)

# Text reuse clusters

## Find clusters mentioning text

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    offset=2,
    limit=5,
    order_by="passages-count",
)
result.df

In [None]:
f"The result above contains {result.total} clusters in total. The offset is {result.offset} and the limit is {result.limit}."

## Cluster size

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    cluster_size=(50, 100),
)
result.df

## Mentioning text in title

In [None]:
result = impresso.text_reuse.clusters.find(
    title=AND("luxembourg", "suisse"),
    cluster_size=(100, 200),
)
result.df

## Lexical overlap

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    lexical_overlap=(50, 51),
)
result.df

## Day delta

Number of days between the first and last mention of the text in the cluster.

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    day_delta=(50, 100),
)
result.df

## Date range

In [None]:
from impresso import DateRange

result = impresso.text_reuse.clusters.find(
    text="banana", 
    date_range=DateRange("1921-05-21", "2001-01-02")
)
print(f"{result.total} items found.")
result.df.head(1)

## Newspaper

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    newspaper_id=OR("EXP", "GDL"),
)
print(f"{result.total} items found.")
result.df.head(1)

## Collection

In [None]:
result = impresso.text_reuse.clusters.find(collection_id="12312312")
print(f"{result.total} items found.")
result.df.head(1)

## Front page

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    front_page=True,
)
print(f"{result.total} items found.")
result.df.head(1)

## Topic

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    topic_id=OR("tm-fr-all-v2.0_tp07_fr", "tm-fr-all-v2.0_tp48_fr")
)
print(f"{result.total} items found.")
result.df.head(1)

## Language

In [None]:
result = impresso.text_reuse.clusters.find(
    text="luxembourg", 
    language=OR("it", "en")
)
print(f"{result.total} items found.")
result.df.head(1)

## Country

In [None]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    country=OR("FR", "CH")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity mention

In [None]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    mention=OR("Belval", "Lausanne")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity by ID

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    entity_id="aida-0001-54-Switzerland"
)
print(f"{result.total} items found.")
result.df.head(1)