In [1]:
from impresso import connect, AND, OR

impresso = connect()

🎉 You are now connected to the Impresso API!  🎉
🔗 Using API: https://dev.impresso-project.ch/public-api


# Text reuse clusters

## Find clusters mentioning text

In [2]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    offset=2,
    limit=5,
    order_by="passages-count",
)
result.df

Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c137438978332,Un rapport géné-\nral au roi Léopold sur la si...,59.183673,2,1891-07-20,1891-07-21
tr-nobp-all-v01-c60129939020,"Wie Kinder der Mission ihm erzählt haben, gibt...",51.0,2,1896-02-06,1896-02-21
tr-nobp-all-v01-c42949726081,"renversé\nLÉOPOLDVILLE, 9 mars A la veille de ...",55.405405,2,1961-03-10,1961-03-10
tr-nobp-all-v01-c146028959252,veau\nPommes fondantes Gratin dauphinois\nLait...,48.0,2,1971-04-10,1971-05-28
tr-nobp-all-v01-c298240,La recette de la semaine\nBanana-Split\nCouper...,95.833333,2,1971-03-20,1971-03-20


In [3]:
f"The result above contains {result.total} clusters in total. The offset is {result.offset} and the limit is {result.limit}."

'The result above contains 190 clusters in total. The offset is 2 and the limit is 5.'

## Cluster size

In [4]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    cluster_size=(50, 100),
)
result.df

Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c200886,PRIX CRAQUANTS\nPROFITEZ-EN MAINTENANT ! OFFRE...,1.010101,63,2007-05-15,2011-12-29
tr-nobp-all-v01-c117172,"La Chaux-de-Fonds,\nLe Sphynx samedi 22 mars 2...",38.571429,50,2008-01-14,2008-12-22


## Mentioning text in title

In [5]:
result = impresso.text_reuse.clusters.find(
    title=AND("luxembourg", "suisse"),
    cluster_size=(100, 200),
)
result.df

Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c433978,Trafic-marchandises Luxembourg — Suisse.\nAvec...,0.581395,139,1931-02-16,1958-01-17
tr-nobp-all-v01-c325226,JOURNAL de GENEVE\net GAZETTE de LAUSANNE\nEdi...,36.0,143,1991-12-13,1993-11-30


## Lexical overlap

In [6]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    lexical_overlap=(50, 51),
)
result.df

Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c154619006960,Fusillade entre soldats de\nl’ONU et soldats c...,50.980392,4,1961-03-04,1961-03-04
tr-nobp-all-v01-c60129939020,"Wie Kinder der Mission ihm erzählt haben, gibt...",51.0,2,1896-02-06,1896-02-21
tr-nobp-all-v01-c240518594491,Encore avantageux plus\nValable du 28.8 au 3.9...,50.0,2,2007-08-28,2008-09-02


## Day delta

Number of days between the first and last mention of the text in the cluster.

In [7]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    day_delta=(50, 100),
)
result.df

Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c34359907704,Les Sociétés anonymes suisses\npendant la cris...,78.536585,2,1937-05-08,1937-07-10


## Date range

In [8]:
from impresso import DateRange

result = impresso.text_reuse.clusters.find(
    text="banana", 
    date_range=DateRange("1921-05-21", "2001-01-02")
)
print(f"{result.total} items found.")
result.df.head(1)

95 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c171798985973,veut récupérer\nMatadi et Banana\nLéopoldville...,66.956522,2,1961-03-07,1961-03-07


## Newspaper

In [13]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    newspaper_id=OR("EXP", "GDL"),
)
print(f"{result.total} items found.")
result.df.head(1)

139 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c51539934627,"Banana, Feindouno et\nFeltscher à Lausanne\nFO...",100.0,2,2013-08-31,2013-08-31


## Collection

In [14]:
result = impresso.text_reuse.clusters.find(collection_id="12312312")
print(f"{result.total} items found.")
result.df.head(1)

0 items found.


## Front page

In [15]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    front_page=True,
)
print(f"{result.total} items found.")
result.df.head(1)

16 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c231928631691,"La « Panther » coulée\nMilan , 28 teptembre .\...",60.606061,2,1914-09-29,1914-09-29


## Topic

In [16]:
result = impresso.text_reuse.clusters.find(
    text="banana", 
    topic_id=OR("tm-fr-all-v2.0_tp07_fr", "tm-fr-all-v2.0_tp48_fr")
)
print(f"{result.total} items found.")
result.df.head(1)

16 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c117193,Deschansonsàenfairetrémousserplusd’un\nFocus\n...,100.0,2,2008-07-10,2008-07-10


## Language

In [17]:
result = impresso.text_reuse.clusters.find(
    text="luxembourg", 
    language=OR("it", "en")
)
print(f"{result.total} items found.")
result.df.head(1)

3 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c8590352366,"INTERNATIONAL\nPORTFOLIO SICAV\n1, rue Schille...",18.84058,30,1975-07-11,1997-03-26


## Country

In [18]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    country=OR("FR", "CH")
)
print(f"{result.total} items found.")
result.df.head(1)

1792 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c249108133968,En route vers\nSchengen\nSUISSE _.\nEn dépit d...,97.297297,2,2001-03-27,2001-03-27


## Entity mention

In [19]:
result = impresso.text_reuse.clusters.find(
    text="schengen", 
    mention=OR("Belval", "Lausanne")
)
print(f"{result.total} items found.")
result.df.head(1)

43 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c60129931875,Die Verwaltung der öf-\nfentlichen Gebäude hat...,46.153846,2,1946-07-31,1946-09-20


## Entity by ID

In [20]:
result = impresso.text_reuse.clusters.find(
    text="banana",
    entity_id="aida-0001-54-Switzerland"
)
print(f"{result.total} items found.")
result.df.head(1)

21 items found.


Unnamed: 0_level_0,textSample,cluster.lexicalOverlap,cluster.clusterSize,cluster.timeCoverage.from,cluster.timeCoverage.to
cluster.id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tr-nobp-all-v01-c128849280553,"Schreckliche Seefahrt . Antwerpen ,\n(>; v . O...",71.527778,2,1892-02-19,1892-02-20


# Text reuse clusters facets

## Date range

In [21]:
result = impresso.text_reuse.clusters.facet("daterange", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.tail(3)

** SearchFacet(type='daterange', num_buckets=321, buckets=[SearchFacetBucket(count=0, val='1700-01-01T00:00:00Z', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='1701-01-01T00:00:00Z', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='1702-01-01T00:00:00Z', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='1703-01-01T00:00:00Z', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='1704-01-01T00:00:00Z', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='1705-01-01T00:

Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
2018-01-01T00:00:00Z,6
2019-01-01T00:00:00Z,0
2020-01-01T00:00:00Z,0


## Cluster size

In [23]:
result = impresso.text_reuse.clusters.facet("textReuseClusterSize", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

** SearchFacet(type='textReuseClusterSize', num_buckets=10, buckets=[SearchFacetBucket(count=1687, val='0', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='10000', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='20000', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='30000', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='40000', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='50000', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impre

Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
0,1687
10000,0
20000,0


## Lexical overlap

In [24]:
result = impresso.text_reuse.clusters.facet("textReuseClusterLexicalOverlap", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

** SearchFacet(type='textReuseClusterLexicalOverlap', num_buckets=10, buckets=[SearchFacetBucket(count=1687, val='0', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='10', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='20', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='30', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='40', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=0, val='50', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.a

Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
0,1687
10,0
20,0


## Day delta

In [26]:
result = impresso.text_reuse.clusters.facet("textReuseClusterDayDelta", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

** SearchFacet(type='textReuseClusterDayDelta', num_buckets=10, buckets=[SearchFacetBucket(count=219, val='0', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=28, val='10', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=5, val='20', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=4, val='30', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=13, val='40', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_client.types.Unset object at 0x11f566390>), SearchFacetBucket(count=10, val='50', uid=<impresso.api_client.types.Unset object at 0x11f566390>, item=<impresso.api_c

Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
0,219
10,28
20,5


## Newspaper

In [28]:
result = impresso.text_reuse.clusters.facet("newspaper", lexical_overlap=(1, 2))
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(3)

** SearchFacet(type='newspaper', num_buckets=54, buckets=[SearchFacetBucket(count=1112, val='EXP', uid='EXP', item=Year(uid='EXP', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=1071, val='IMP', uid='IMP', item=Year(uid='IMP', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=1018, val='LLE', uid='LLE', item=Year(uid='LLE', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=834, val='JDG', uid='JDG', item=Year(uid='JDG', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=812, val='GDL', uid='GDL', item=Year(uid='GDL', values=<impresso.api_client.types.Unset object at 0x11f566390>, ref

Unnamed: 0_level_0,count,uid,item.uid,item.acronym,item.labels,item.languages,item.included,item.name,item.endYear,item.startYear,item.countArticles,item.countIssues,item.countPages,item.deltaYear,item.properties,item.fetched
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
EXP,1112,EXP,EXP,,,,,,,,,,,,,
IMP,1071,IMP,IMP,,,,,,,,,,,,,
LLE,1018,LLE,LLE,,,,,,,,,,,,,


# Text reuse passages

## Find passages mentioning text

In [30]:
result = impresso.text_reuse.passages.find(
    text="belval",
    offset=2,
    limit=5,
    order_by="clusterSize",
)
result.df

Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c103079291010-IMP-2008-04-28-a-i0300@0:2689,0,2689,Tu n’aurais pas dû partir\nJean-Louis Grosmair...,Tu n’aurais pas dû partir,[35],[],[{'id': 'tr-nobp-all-v01-c103079291010'}],False,2689,2008-04-28T00:00:00+00:00,"[57,1275,1366,489]",IMP-2008-04-28-a-i0300,tr-nobp-all-v01-c103079291010,2,0,100.0,IMP,IMP-2008-04-28-a
c103079305149-tageblatt-1941-02-10-a-i0068@0:3569,0,3569,Gerichtszeitung\nAm 10. Mai gegen 9 Uhr morgen...,Gerichtszeitung,[10],[local-duma-mxB6kMKc],"[{'id': 'tr-nobp-all-v01-c103079305149'}, {'id...",False,3569,1941-02-10T00:00:00+00:00,"[100,1200,1810,1527]",tageblatt-1941-02-10-a-i0068,tr-nobp-all-v01-c103079305149,2,0,68.181818,tageblatt,tageblatt-1941-02-10-a
c103079397256-luxwort-1941-04-12-a-i0078@0:803,0,803,Oeffentliche Verdinggabe\nAuf die Lieferung vo...,Oeffentliche Verdinggabe,[5],[local-duma-mxB6kMKc],[{'id': 'tr-nobp-all-v01-c103079397256'}],False,803,1941-04-12T00:00:00+00:00,"[3634,1462,852,873]",luxwort-1941-04-12-a-i0078,tr-nobp-all-v01-c103079397256,2,3,62.337662,luxwort,luxwort-1941-04-12-a
c103079400257-tageblatt-1920-02-26-a-i0016@57:1129,57,1129,"Fahrplan ander ungen. Ab näch--\nsten Montag, ...",Lokalneuigkeiten.,[2],[],"[{'id': 'tr-nobp-all-v01-c103079400257'}, {'id...",False,1072,1920-02-26T00:00:00+00:00,"[1955,4490,902,1227]",tageblatt-1920-02-26-a-i0016,tr-nobp-all-v01-c103079400257,2,2,30.864198,tageblatt,tageblatt-1920-02-26-a
c103079422056-obermosel-1939-08-01-a-i0047@1533:4218,1533,4218,Am Abend des 20. Juni war der aus Ech-\nlernac...,Obergerichtshof.,[8],"[local-eb-aRm1PT_a, local-duma-mxB6kMKc]","[{'id': 'tr-nobp-all-v01-c103079422056'}, {'id...",False,2685,1939-08-01T00:00:00+00:00,"[1864,3321,835,2597]",obermosel-1939-08-01-a-i0047,tr-nobp-all-v01-c103079422056,2,1,56.561086,obermosel,obermosel-1939-08-01-a


In [33]:
f"The result above contains {result.total} passages in total. The offset is {result.offset} and the limit is {result.limit}."

'The result above contains 247 passages in total. The offset is 2 and the limit is 5.'

## Cluster size

In [36]:
result = impresso.text_reuse.passages.find(
    text="banana",
    cluster_size=(50, 100),
)
result.df

Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c200886-EXP-2009-07-29-a-i0041@0:522,0,522,PRIX CRAQUANTS\nPROFITEZ-EN MAINTENANT ! OFFRE...,,[4],[],"[{'id': 'tr-nobp-all-v01-c163801'}, {'id': 'tr...",False,522,2009-07-29T00:00:00+00:00,"[91,130,1487,1655]",EXP-2009-07-29-a-i0041,tr-nobp-all-v01-c200886,63,1689,1.010101,EXP,EXP-2009-07-29-a
c117172-IMP-2008-03-25-a-i0111@0:642,0,642,"La Chaux-de-Fonds,\nLe Sphynx samedi 22 mars 2...",,[12],[],[{'id': 'tr-nobp-all-v01-c117172'}],False,642,2008-03-25T00:00:00+00:00,"[148,315,1530,2220]",IMP-2008-03-25-a-i0111,tr-nobp-all-v01-c117172,50,343,38.571429,IMP,IMP-2008-03-25-a
c117172-EXP-2008-03-25-a-i0105@0:642,0,642,"La Chaux-de-Fonds,\nLe Sphynx samedi 22 mars 2...",,[12],[],[{'id': 'tr-nobp-all-v01-c117172'}],False,642,2008-03-25T00:00:00+00:00,"[148,315,1530,2220]",EXP-2008-03-25-a-i0105,tr-nobp-all-v01-c117172,50,343,38.571429,EXP,EXP-2008-03-25-a


## Mentioning text in title

In [38]:
result = impresso.text_reuse.passages.find(
    title=AND("luxembourg", "suisse"),
    cluster_size=(100, 200),
)
result.df.head(1)

Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c433978-obermosel-1946-08-14-a-i0035@0:474,0,474,CHEMINS DE FER LUXEMBOURGEOIS. — Avis.\nTarif ...,CHEMINS DE FER LUXEMBOURGEOIS. — Avis. Tarif i...,[7],[],[{'id': 'tr-nobp-all-v01-c433978'}],False,474,1946-08-14T00:00:00+00:00,"[2640,3768,840,392]",obermosel-1946-08-14-a-i0035,tr-nobp-all-v01-c433978,139,9832,0.581395,obermosel,obermosel-1946-08-14-a


## Lexical overlap

In [40]:
result = impresso.text_reuse.passages.find(
    text="banana",
    lexical_overlap=(50, 51),
)
result.df.head(1)

Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c154619006960-JDG-1961-03-04-a-i0097@6739:7176,6739,7176,"Léopoldville, 3. — (Reuter). — Une fusillade a...",Le représentant de M. «H» dénonce /'anarchie m...,[12],"[local-IIV_6APb-X0x8SwxS, local-IIV_6APb-4hURA...","[{'id': 'tr-nobp-all-v01-c154618904093'}, {'id...",False,437,1961-03-04T00:00:00+00:00,"[1452,1001,607,211]",JDG-1961-03-04-a-i0097,tr-nobp-all-v01-c154619006960,4,0,50.980392,JDG,JDG-1961-03-04-a


## Day delta

Number of days between the first and last mention of the text in the cluster.

In [42]:
result = impresso.text_reuse.passages.find(
    text="banana",
    day_delta=(50, 100),
)
result.df.head(1)

Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c34359907704-LLS-1937-05-08-a-i0013@0:2710,0,2710,Les Sociétés anonymes suisses\npendant la cris...,Les Sociétés anonymes suisses pendant la crise,[4],[local-FQTNIpkU-bvKkvb9s],"[{'id': 'tr-nobp-all-v01-c34359907704'}, {'id'...",False,2710,1937-05-08T00:00:00+00:00,"[140,2361,736,2146]",LLS-1937-05-08-a-i0013,tr-nobp-all-v01-c34359907704,2,63,78.536585,LLS,LLS-1937-05-08-a


## Date range

In [44]:
from impresso import DateRange

result = impresso.text_reuse.passages.find(
    text="banana", 
    date_range=DateRange("1921-05-21", "2001-01-02")
)
print(f"{result.total} items found.")
result.df.head(1)

194 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c77309774782-IMP-1967-06-23-a-i0023@1879:1976,1879,1976,"colonies de vacances », « Juanita\nBanana », «...","Des chansons dans l'air, une fanfare pour troi...",[4],"[local-la-cVcTVCC4-nnaiNiEv, local-An-RPAyTHnn...","[{'id': 'tr-nobp-all-v01-c299324'}, {'id': 'tr...",False,97,1967-06-23T00:00:00+00:00,"[721,1570,313,55]",IMP-1967-06-23-a-i0023,tr-nobp-all-v01-c77309774782,2,101,37.5,IMP,IMP-1967-06-23-a


## Newspaper

In [46]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    newspaper_id=OR("EXP", "GDL"),
)
print(f"{result.total} items found.")
result.df.head(1)

145 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c51539934627-EXP-2013-08-31-a-i0228@0:634,0,634,"Banana, Feindouno et\nFeltscher à Lausanne\nFO...","Banana, Feindouno et Feltscher à Lausanne",[27],[local-duma-_liThC5Z],[{'id': 'tr-nobp-all-v01-c51539934627'}],False,634,2013-08-31T00:00:00+00:00,"[1446,1166,1928,2217]",EXP-2013-08-31-a-i0228,tr-nobp-all-v01-c51539934627,2,0,100.0,EXP,EXP-2013-08-31-a


## Collection

In [48]:
result = impresso.text_reuse.passages.find(collection_id="12312312")
print(f"{result.total} items found.")
result.df.head(1)

0 items found.


## Front page

In [50]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    front_page=True,
)
print(f"{result.total} items found.")
result.df.head(1)

20 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c231928631691-LLE-1914-09-29-a-i0001@15035:15274,15035,15274,"La « Panther » coulée\nMilan , 28 teptembre .\...",LA GUERRE EUROPÉENNE,[1],"[local-eb-FuR4RpIy, local-eb-3rEiI2S8, local-d...","[{'id': 'tr-nobp-all-v01-c197568509353'}, {'id...",True,239,1914-09-29T00:00:00+00:00,"[2577,3325,476,275]",LLE-1914-09-29-a-i0001,tr-nobp-all-v01-c231928631691,2,0,60.606061,LLE,LLE-1914-09-29-a


## Topic

In [52]:
result = impresso.text_reuse.passages.find(
    text="banana", 
    topic_id=OR("tm-fr-all-v2.0_tp07_fr", "tm-fr-all-v2.0_tp48_fr")
)
print(f"{result.total} items found.")
result.df.head(1)

33 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c117193-EXP-2008-07-10-a-i0330@0:815,0,815,Deschansonsàenfairetrémousserplusd’un\nFocus\n...,Deschansonsàenfairetrémousserplusd’un,[31],[local-la-cVcTVCC4-nnaiNiEv],"[{'id': 'tr-nobp-all-v01-c117193'}, {'id': 'tr...",False,815,2008-07-10T00:00:00+00:00,"[890,607,805,585]",EXP-2008-07-10-a-i0330,tr-nobp-all-v01-c117193,2,0,100.0,EXP,EXP-2008-07-10-a


## Language

In [54]:
result = impresso.text_reuse.passages.find(
    text="luxembourg", 
    language=OR("it", "en")
)
print(f"{result.total} items found.")
result.df.head(1)

13 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c8590352366-JDG-1990-03-14-a-i0145@141:632,141,632,"L-1526 Luxembourg-5, bd de ia Foire\nRC. Luxem...",,[20],[],[{'id': 'tr-nobp-all-v01-c8590352366'}],False,491,1990-03-14T00:00:00+00:00,"[418,3682,1570,913]",JDG-1990-03-14-a-i0145,tr-nobp-all-v01-c8590352366,30,7929,18.84058,JDG,JDG-1990-03-14-a


## Country

In [56]:
result = impresso.text_reuse.passages.find(
    text="schengen", 
    country=OR("FR", "CH")
)
print(f"{result.total} items found.")
result.df.head(1)

3908 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c249108133968-IMP-2001-03-27-a-i0004@0:274,0,274,En route vers\nSchengen\nSUISSE _.\nEn dépit d...,En route vers Schengen,[1],[],[{'id': 'tr-nobp-all-v01-c249108133968'}],True,274,2001-03-27T00:00:00+00:00,"[2435,4461,551,791]",IMP-2001-03-27-a-i0004,tr-nobp-all-v01-c249108133968,2,0,97.297297,IMP,IMP-2001-03-27-a


## Entity mention

In [58]:
result = impresso.text_reuse.passages.find(
    text="schengen", 
    mention=OR("Belval", "Lausanne")
)
print(f"{result.total} items found.")
result.df.head(1)

93 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c34360149524-EXP-2008-09-13-a-i0231@0:496,0,496,Une première évaluation positive\nKEYSTONE\nSC...,Une première évaluation positive,[27],"[local-IIV_6APb-9J2N1eZG, local-duma-_liThC5Z]",[{'id': 'tr-nobp-all-v01-c34360149524'}],False,496,2008-09-13T00:00:00+00:00,"[311,241,1098,203]",EXP-2008-09-13-a-i0231,tr-nobp-all-v01-c34360149524,2,0,100.0,EXP,EXP-2008-09-13-a


## Entity by ID

In [60]:
result = impresso.text_reuse.passages.find(
    text="banana",
    entity_id="aida-0001-54-Switzerland"
)
print(f"{result.total} items found.")
result.df.head(1)

41 items found.


Unnamed: 0_level_0,offsetStart,offsetEnd,content,title,pageNumbers,collections,connectedClusters,isFront,size,date,pageRegions,article.id,textReuseCluster.id,textReuseCluster.clusterSize,textReuseCluster.timeDifferenceDay,textReuseCluster.lexicalOverlap,newspaper.id,issue.id
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
c128849280553-NZZ-1892-02-19-a-i0002@20029:21335,20029,21335,"Schreckliche Seefahrt . Antwerpen ,\n(>; v . O...",,[2],[local-T1DjhD1h-uNBjhkZe],[{'id': 'tr-nobp-all-v01-c128849280553'}],False,1306,1892-02-19T00:00:00+00:00,"[2927,2504,864,1040]",NZZ-1892-02-19-a-i0002,tr-nobp-all-v01-c128849280553,2,1,71.527778,NZZ,NZZ-1892-02-19-a


# Text reuse passages facets

## newspaper

In [62]:
result = impresso.text_reuse.clusters.facet("newspaper")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

** SearchFacet(type='newspaper', num_buckets=77, buckets=[SearchFacetBucket(count=2816866, val='EXP', uid='EXP', item=Year(uid='EXP', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=2633785, val='IMP', uid='IMP', item=Year(uid='IMP', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=2176771, val='JDG', uid='JDG', item=Year(uid='JDG', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=2006765, val='GDL', uid='GDL', item=Year(uid='GDL', values=<impresso.api_client.types.Unset object at 0x11f566390>, refs=<impresso.api_client.types.Unset object at 0x11f566390>)), SearchFacetBucket(count=1844334, val='LLE', uid='LLE', item=Year(uid='LLE', values=<impresso.api_client.types.Unset object at 

Unnamed: 0_level_0,count,uid,item.uid,item.acronym,item.labels,item.languages,item.included,item.name,item.endYear,item.startYear,item.countArticles,item.countIssues,item.countPages,item.deltaYear,item.properties,item.fetched
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
EXP,2816866,EXP,EXP,,,,,,,,,,,,,


## daterange

In [64]:
result = impresso.text_reuse.passages.facet("daterange", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

59 items found. Returning a page of 59 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
1875-01-01T00:00:00Z,1


## yearmonth

In [66]:
result = impresso.text_reuse.passages.facet("yearmonth", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

114 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
1961-03,25


## year

In [68]:
result = impresso.text_reuse.passages.facet("year", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

59 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count,uid,item.refs.c,item.refs.a
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1961,30,1961,282880,279422


## connectedClusters

In [70]:
result = impresso.text_reuse.passages.facet("connectedClusters", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

704 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
tr-nobp-all-v01-c163801,10


## textReuseClusterSize

In [72]:
result = impresso.text_reuse.passages.facet("textReuseClusterSize", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

200 items found. Returning a page of 200 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
2,368


## textReuseClusterLexicalOverlap

In [74]:
result = impresso.text_reuse.passages.facet("textReuseClusterLexicalOverlap", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

100 items found. Returning a page of 100 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
0,2


## textReuseClusterDayDelta

In [76]:
result = impresso.text_reuse.passages.facet("textReuseClusterDayDelta", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

800 items found. Returning a page of 800 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
0,331


## textReuseCluster

In [78]:
result = impresso.text_reuse.passages.facet("textReuseCluster", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

190 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
tr-nobp-all-v01-c8590322057,5


## collection

In [80]:
result = impresso.text_reuse.passages.facet("collection", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

0 items found. Returning a page of 0 facets.


## topic

In [82]:
result = impresso.text_reuse.passages.facet("topic", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

147 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count,uid,item.uid,item.language,item.community,item.pagerank,item.degree,item.x,item.y,item.relatedTopics,item.countItems,item.excerpt,item.words,item.model
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
tm-fr-all-v2.0_tp74_fr,64,tm-fr-all-v2.0_tp74_fr,,,,0,0,0,0,[],-1,[],[],


## person

In [84]:
result = impresso.text_reuse.passages.facet("person", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

535 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count,uid,item.uid
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aida-0001-50-Marco_Schneuwly,21,aida-0001-50-Marco_Schneuwly,aida-0001-50-Marco_Schneuwly


## location

In [86]:
result = impresso.text_reuse.passages.facet("location", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

498 items found. Returning a page of 10 facets.


Unnamed: 0_level_0,count,uid,item.uid
val,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
aida-0001-54-Suisse$2c$_Moselle,83,aida-0001-54-Suisse$2c$_Moselle,aida-0001-54-Suisse$2c$_Moselle


## nag

In [88]:
result = impresso.text_reuse.passages.facet("nag", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

0 items found. Returning a page of 0 facets.


## language

In [90]:
result = impresso.text_reuse.passages.facet("language", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

2 items found. Returning a page of 2 facets.


Unnamed: 0_level_0,count,uid
val,Unnamed: 1_level_1,Unnamed: 2_level_1
fr,348,fr


## country

In [92]:
result = impresso.text_reuse.passages.facet("country", text="banana")
print(f"{result.total} items found. Returning a page of {result.limit} facets.")
result.df.head(1)

2 items found. Returning a page of 2 facets.


Unnamed: 0_level_0,count
val,Unnamed: 1_level_1
CH,359
