In [None]:
from impresso import connect, AND, OR

impresso = connect("https://dev.impresso-project.ch/public-api", persisted_token=True)

# Text reuse clusters

## Find clusters mentioning text

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    offset=2,
    limit=5,
    order_by="passages-count",
)
result.df

In [None]:
f"The result above contains {result.total} clusters in total. The offset is {result.offset} and the limit is {result.limit}."

## Cluster size

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    cluster_size=(50, 100),
)
result.df

## Mentioning text in title

In [None]:
result = impresso.text_reuse.clusters.find(
    title=AND("luxembourg", "suisse"),
    cluster_size=(100, 200),
)
result.df

## Lexical overlap

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    lexical_overlap=(50, 51),
)
result.df

## Day delta

Number of days between the first and last mention of the text in the cluster.

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    day_delta=(50, 100),
)
result.df

## Date range

In [None]:
from impresso import DateRange

result = impresso.text_reuse.clusters.find(
    text="banana", 
    date_range=DateRange("1921-05-21", "2001-01-02")
)
print(f"{result.total} items found.")
result.df.head(1)

## Newspaper

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    newspaper_id=OR("EXP", "GDL"),
)
print(f"{result.total} items found.")
result.df.head(1)

## Collection

In [None]:
result = impresso.text_reuse.clusters.find(collection_id="12312312")
print(f"{result.total} items found.")
result.df.head(1)

## Front page

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    front_page=True,
)
print(f"{result.total} items found.")
result.df.head(1)

## Topic

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    topic_id=OR("tm-fr-all-v2.0_tp07_fr", "tm-fr-all-v2.0_tp48_fr")
)
print(f"{result.total} items found.")
result.df.head(1)

## Language

In [None]:
result = impresso.text_reuse.clusters.find(
    text="luxembourg", 
    language=OR("it", "en")
)
print(f"{result.total} items found.")
result.df.head(1)

## Country

In [None]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    country=OR("FR", "CH")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity mention

In [None]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    mention=OR("Belval", "Lausanne")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity by ID

In [None]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    entity_id="aida-0001-54-Switzerland"
)
print(f"{result.total} items found.")
result.df.head(1)

# Text reuse clusters facets

## Date range

In [None]:
result = impresso.text_reuse.clusters.facet("daterange", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.tail(3)

## Cluster size

In [None]:
result = impresso.text_reuse.clusters.facet("textReuseClusterSize", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

## Lexical overlap

In [None]:
result = impresso.text_reuse.clusters.facet("textReuseClusterLexicalOverlap", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

## Day delta

In [None]:
result = impresso.text_reuse.clusters.facet("textReuseClusterDayDelta", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

## Newspaper

In [None]:
result = impresso.text_reuse.clusters.facet("newspaper", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

# Text reuse passages

## Find passages mentioning text

In [None]:
result = impresso.text_reuse.passages.find(
    text="belval",
    offset=2,
    limit=5,
    order_by="clusterSize",
)
result.df

In [None]:
f"The result above contains {result.total} passages in total. The offset is {result.offset} and the limit is {result.limit}."

## Cluster size

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana",
    cluster_size=(50, 100),
)
result.df

## Mentioning text in title

In [None]:
result = impresso.text_reuse.passages.find(
    title=AND("luxembourg", "suisse"),
    cluster_size=(100, 200),
)
result.df.head(1)

## Lexical overlap

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana",
    lexical_overlap=(50, 51),
)
result.df.head(1)

## Day delta

Number of days between the first and last mention of the text in the cluster.

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana",
    day_delta=(50, 100),
)
result.df.head(1)

## Date range

In [None]:
from impresso import DateRange

result = impresso.text_reuse.passages.find(
    text="banana", 
    date_range=DateRange("1921-05-21", "2001-01-02")
)
print(f"{result.total} items found.")
result.df.head(1)

## Newspaper

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    newspaper_id=OR("EXP", "GDL"),
)
print(f"{result.total} items found.")
result.df.head(1)

## Collection

In [None]:
result = impresso.text_reuse.passages.find(collection_id="12312312")
print(f"{result.total} items found.")
result.df.head(1)

## Front page

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    front_page=True,
)
print(f"{result.total} items found.")
result.df.head(1)

## Topic

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    topic_id=OR("tm-fr-all-v2.0_tp07_fr", "tm-fr-all-v2.0_tp48_fr")
)
print(f"{result.total} items found.")
result.df.head(1)

## Language

In [None]:
result = impresso.text_reuse.passages.find(
    text="luxembourg", 
    language=OR("it", "en")
)
print(f"{result.total} items found.")
result.df.head(1)

## Country

In [None]:
result = impresso.text_reuse.passages.find(
    text="schengen", 
    country=OR("FR", "CH")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity mention

In [None]:
result = impresso.text_reuse.passages.find(
    text="schengen", 
    mention=OR("Belval", "Lausanne")
)
print(f"{result.total} items found.")
result.df.head(1)

## Entity by ID

In [None]:
result = impresso.text_reuse.passages.find(
    text="banana",
    entity_id="aida-0001-54-Switzerland"
)
print(f"{result.total} items found.")
result.df.head(1)

# Text reuse passages facets

## newspaper

In [None]:
result = impresso.text_reuse.clusters.facet("newspaper")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## daterange

In [None]:
result = impresso.text_reuse.passages.facet("daterange", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## yearmonth

In [None]:
result = impresso.text_reuse.passages.facet("yearmonth", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## year

In [None]:
result = impresso.text_reuse.passages.facet("year", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## connectedClusters

In [None]:
result = impresso.text_reuse.passages.facet("connectedClusters", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## textReuseClusterSize

In [None]:
result = impresso.text_reuse.passages.facet("textReuseClusterSize", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## textReuseClusterLexicalOverlap

In [None]:
result = impresso.text_reuse.passages.facet("textReuseClusterLexicalOverlap", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## textReuseClusterDayDelta

In [None]:
result = impresso.text_reuse.passages.facet("textReuseClusterDayDelta", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## textReuseCluster

In [None]:
result = impresso.text_reuse.passages.facet("textReuseCluster", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## collection

In [None]:
result = impresso.text_reuse.passages.facet("collection", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## topic

In [None]:
result = impresso.text_reuse.passages.facet("topic", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## person

In [None]:
result = impresso.text_reuse.passages.facet("person", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## location

In [None]:
result = impresso.text_reuse.passages.facet("location", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## nag

In [None]:
result = impresso.text_reuse.passages.facet("nag", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## language

In [None]:
result = impresso.text_reuse.passages.facet("language", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

## country

In [None]:
result = impresso.text_reuse.passages.facet("country", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)