## get Meta

In [12]:
import requests
import bz2
import xml.etree.ElementTree as ET
import pandas as pd
import re
import os

# ============================================================================
# STEP 1: Download first 100 MB of the META-CURRENT dump (has expanded text)
# ============================================================================
url = "https://dumps.wikimedia.org/frwikisource/latest/frwikisource-latest-pages-meta-current.xml.bz2"

print("="*80)
print("DOWNLOADING FIRST 100 MB OF META-CURRENT DUMP")
print("="*80)
print(f"URL: {url}")
print("This dump contains expanded/rendered text (transclusions resolved)")
print()

headers = {'Range': 'bytes=0-104857600'}  # First 100 MB
response = requests.get(url, headers=headers, stream=True)

compressed_file = "frwikisource_meta_sample.xml.bz2"
decompressed_file = "frwikisource_meta_partial.xml"

if response.status_code in [200, 206]:
    with open(compressed_file, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    
    print(f"âœ“ Downloaded {os.path.getsize(compressed_file):,} bytes")
    
    # ============================================================================
    # STEP 2: FULL Decompression of the bz2 file
    # ============================================================================
    print("\n" + "="*80)
    print("DECOMPRESSING BZ2 FILE")
    print("="*80)
    
    try:
        compressed_size = os.path.getsize(compressed_file)
        bytes_decompressed = 0
        
        with bz2.open(compressed_file, 'rb') as f_in:
            with open(decompressed_file, 'wb') as f_out:
                # Read in chunks to handle large files
                chunk_size = 1024 * 1024  # 1 MB chunks
                while True:
                    chunk = f_in.read(chunk_size)
                    if not chunk:
                        break
                    f_out.write(chunk)
                    bytes_decompressed += len(chunk)
                    
                    # Show progress every 100 MB
                    if bytes_decompressed % (100 * 1024 * 1024) == 0:
                        print(f"  Decompressed: {bytes_decompressed / (1024**2):.0f} MB")
        
        decompressed_size = os.path.getsize(decompressed_file)
        print(f"âœ“ Full decompression complete!")
        print(f"  Compressed:   {compressed_size:,} bytes ({compressed_size / (1024**2):.1f} MB)")
        print(f"  Decompressed: {decompressed_size:,} bytes ({decompressed_size / (1024**2):.1f} MB)")
        print(f"  Ratio: {decompressed_size / compressed_size:.1f}x")
        
    except Exception as e:
        print(f"âš  Decompression error: {e}")
        # Check if we got partial data
        if os.path.exists(decompressed_file):
            partial_size = os.path.getsize(decompressed_file)
            print(f"  Partial file created: {partial_size:,} bytes")
            print("  Will try to parse what we have...")
        else:
            raise
    
    # ============================================================================
    # STEP 3: Quick check - search for Mississippi
    # ============================================================================
    print("\n" + "="*80)
    print("CHECKING FOR MISSISSIPPI CONTENT")
    print("="*80)
    
    with open(decompressed_file, 'r', encoding='utf-8') as f:
        content = f.read()
        
        # Check if Mississippi article is in this partial download
        if 'Voyage sur le Mississipi' in content:
            print("âœ“ Found 'Voyage sur le Mississipi' in the dump!")
            
            # Try to extract a snippet
            idx = content.find('<title>Voyage sur le Mississipi</title>')
            if idx != -1:
                # Show 3000 chars starting from title
                snippet = content[idx:idx+3000]
                print("\nSnippet from the article:")
                print("-" * 80)
                print(snippet)
                print("-" * 80)
        else:
            print("âœ— 'Voyage sur le Mississipi' not found in this partial download")
            print("  You may need to download more data")
    
    print(f"\nâœ“ Decompressed file ready: {decompressed_file}")
    print("  Ready to parse with your parser!")
    
else:
    print(f"âœ— Download failed with status code: {response.status_code}")

DOWNLOADING FIRST 100 MB OF META-CURRENT DUMP
URL: https://dumps.wikimedia.org/frwikisource/latest/frwikisource-latest-pages-meta-current.xml.bz2
This dump contains expanded/rendered text (transclusions resolved)

âœ“ Downloaded 104,857,601 bytes

DECOMPRESSING BZ2 FILE
  Decompressed: 100 MB
  Decompressed: 200 MB
  Decompressed: 300 MB
  Decompressed: 400 MB
âš  Decompression error: Compressed file ended before the end-of-stream marker was reached
  Partial file created: 478,150,656 bytes
  Will try to parse what we have...

CHECKING FOR MISSISSIPPI CONTENT
âœ“ Found 'Voyage sur le Mississipi' in the dump!

Snippet from the article:
--------------------------------------------------------------------------------
<title>Voyage sur le Mississipi</title>
    <ns>0</ns>
    <id>21992</id>
    <revision>
      <id>10644916</id>
      <parentid>9805094</parentid>
      <timestamp>2020-08-13T20:37:14Z</timestamp>
      <contributor>
        <username>Berniepyt</username>
        <id>17489</

In [None]:
import requests
import bz2
import xml.etree.ElementTree as ET
import pandas as pd
import re
import os

# ============================================================================
# STEP 1: Download first 10 MB of the compressed dump
# ============================================================================
url = "https://dumps.wikimedia.org/frwikisource/latest/frwikisource-latest-pages-articles.xml.bz2"

print("Downloading first 10 MB of compressed dump...")
headers = {'Range': 'bytes=0-10485760'}  # First 10 MB
response = requests.get(url, headers=headers, stream=True)

compressed_file = "frwikisource_sample.xml.bz2"
decompressed_file = "frwikisource_partial_2.xml"

if response.status_code in [200, 206]:
    with open(compressed_file, "wb") as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    
    print(f"âœ“ Downloaded {os.path.getsize(compressed_file):,} bytes")
    
    # ============================================================================
    # STEP 2: Decompress the bz2 file


Downloading first 10 MB of compressed dump...
âœ“ Downloaded 10,485,761 bytes

Decompressing bz2 file...
âš  Decompression incomplete (expected with partial file): Compressed file ended before the end-of-stream marker was reached
Will try to parse what we have...


In [11]:
# ============================================================================
# FULL DECOMPRESSION
# ============================================================================
decompressed_file = "frwikisource_partial_3.xml"

print("\nDecompressing bz2 file (FULL decompression)...")
try:
    compressed_size = os.path.getsize(compressed_file)
    bytes_decompressed = 0
    
    with bz2.open(compressed_file, 'rb') as f_in:
        with open(decompressed_file, 'wb') as f_out:
            # Read in chunks to handle large files
            chunk_size = 1024 * 1024  # 1 MB chunks
            while True:
                chunk = f_in.read(chunk_size)
                if not chunk:
                    break
                f_out.write(chunk)
                bytes_decompressed += len(chunk)
                
                # Show progress every 100 MB
                if bytes_decompressed % (100 * 1024 * 1024) == 0:
                    print(f"  Decompressed: {bytes_decompressed / (1024**2):.0f} MB")
    
    decompressed_size = os.path.getsize(decompressed_file)
    print(f"âœ“ Full decompression complete!")
    print(f"  Decompressed to: {decompressed_size:,} bytes ({decompressed_size / (1024**2):.1f} MB)")
    
except Exception as e:
    print(f"âš  Decompression error: {e}")
    # Check if we got partial data
    if os.path.exists(decompressed_file):
        partial_size = os.path.getsize(decompressed_file)
        print(f"  Partial file created: {partial_size:,} bytes")
        print("  Will try to parse what we have...")
    else:
        raise


Decompressing bz2 file (FULL decompression)...
âš  Decompression error: Compressed file ended before the end-of-stream marker was reached
  Partial file created: 50,331,648 bytes
  Will try to parse what we have...


In [1]:
import urllib.request
import urllib.parse
import bz2
import xml.etree.ElementTree as ET
import pandas as pd
import requests
import time
from tqdm import tqdm

def parse_wikisource_dump(dump_file="frwikisource_partial.xml"):
    pages = []
    
    try:
        context = ET.iterparse(dump_file, events=('start', 'end'))
        context = iter(context)
        
        event, root = next(context)
        
        current_page = {}
        in_page = False
        in_revision = False
        
        for event, elem in context:
            tag = elem.tag.split('}')[-1]
            
            if event == 'start':
                if tag == 'page':
                    in_page = True
                    current_page = {}
                elif tag == 'revision':
                    in_revision = True
            
            elif event == 'end':
                if tag == 'title' and in_page:
                    current_page['title'] = elem.text
                
                elif tag == 'ns' and in_page:
                    current_page['namespace'] = elem.text
                
                elif tag == 'id' and in_page and 'page_id' not in current_page:
                    current_page['page_id'] = elem.text
                
                elif tag == 'redirect' and in_page:
                    current_page['is_redirect'] = True
                    current_page['redirect_to'] = elem.get('title', '')
                
                elif tag == 'timestamp' and in_revision:
                    current_page['timestamp'] = elem.text
                
                elif tag == 'text' and in_revision:
                    current_page['content'] = elem.text if elem.text else ''
                
                elif tag == 'revision':
                    in_revision = False
                
                elif tag == 'page':
                    if 'is_redirect' not in current_page:
                        current_page['is_redirect'] = False
                        current_page['redirect_to'] = None
                    
                    # Add Wikisource URL
                    if 'title' in current_page:
                        title_encoded = current_page['title'].replace(' ', '_')
                        current_page['wikisource_url'] = f"https://fr.wikisource.org/wiki/{title_encoded}"
                    else:
                        current_page['wikisource_url'] = None
                    
                    pages.append(current_page.copy())
                    in_page = False
                    current_page = {}
                    root.clear()
                    
                    if len(pages) % 100 == 0:
                        print(f"\rParsed {len(pages)} pages...", end="")
    
    except ET.ParseError as e:
        print(f"\nâš  XML parsing stopped (expected with partial file)")
    
    print(f"\nâœ“ Parsed {len(pages):,} pages total")
    return pd.DataFrame(pages)

# Parse the dump with Wikisource URLs
df = parse_wikisource_dump()

Parsed 19500 pages...
âš  XML parsing stopped (expected with partial file)

âœ“ Parsed 19,583 pages total


In [5]:
import urllib.request
import urllib.parse
import bz2
import xml.etree.ElementTree as ET
import pandas as pd
import requests
import time
import re
from tqdm import tqdm

def extract_metadata(content):
    """
    Extract author, date, and title from Wikisource content.
    """
    metadata = {
        'extracted_author': None,
        'extracted_date': None, 
        'extracted_title': None,
        'extracted_translator': None,
        'extracted_publisher': None
    }
    
    if not content or pd.isna(content):
        return metadata
    
    # Look only in first 3000 characters
    text = str(content)[:3000]
    
    # ========== EXTRACT AUTHOR ==========
    author_patterns = [
        r'Auteur:\s*\[\[([^\]]+)\]\]',           # Auteur:[[Name]]
        r'Auteur:\s*([^\n\|}\]<]+)',             # Auteur:Name
        r'auteur\s*=\s*\[\[([^\]]+)\]\]',        # auteur=[[Name]]
        r'auteur\s*=\s*([^\n\|}\]<]+)',          # auteur=Name
        r'par\s+([A-ZÃ€-Ãœ][a-zÃ -Ã¼]+(?:\s+[A-ZÃ€-Ãœ][a-zÃ -Ã¼]+){1,3})',  # par Author Name
    ]
    
    for pattern in author_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            author = match.group(1).strip()
            author = re.sub(r'\|.*$', '', author)
            metadata['extracted_author'] = author
            break
    
    # ========== EXTRACT DATE ==========
    date_patterns = [
        r'annÃ©e\s*=\s*(\d{4})',
        r'date\s*=\s*(\d{4})',
        r'Ã©diteur,?\s+(\d{4})',
        r'\b(1[0-9]{3}|20[0-2][0-9])\b',
    ]
    
    for pattern in date_patterns:
        match = re.search(pattern, text)
        if match:
            metadata['extracted_date'] = match.group(1)
            break
    
    # ========== EXTRACT TITLE ==========
    title_patterns = [
        r'titre\s*=\s*([^\n\|}\]<]+)',
        r'title\s*=\s*([^\n\|}\]<]+)',
        r'==\s*([^=\n]+?)\s*==',
        r'<h[12]>([^<]+)</h[12]>',
    ]
    
    for pattern in title_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            title = match.group(1).strip()
            if title.upper() not in ['INTRODUCTION', 'PRÃ‰FACE', 'AVANT-PROPOS', 
                                     'SOMMAIRE', 'TABLE DES MATIÃˆRES', 'INDEX']:
                metadata['extracted_title'] = title
                break
    
    # ========== EXTRACT TRANSLATOR ==========
    translator_patterns = [
        r'Traduction par\s+\[\[([^\]]+)\]\]',
        r'Traduction par\s+([A-ZÃ€-Ãœ][^\n\.,<]+)',
        r'traducteur\s*=\s*\[\[([^\]]+)\]\]',
        r'traducteur\s*=\s*([^\n\|}\]<]+)',
    ]
    
    for pattern in translator_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            translator = match.group(1).strip()
            translator = re.sub(r'\|.*$', '', translator)
            metadata['extracted_translator'] = translator
            break
    
    # ========== EXTRACT PUBLISHER ==========
    publisher_patterns = [
        r'Ã©diteur\s*=\s*([^\n\|}\]<]+)',
        r'\[\[([^\]]+)\]\],?\s*Ã©diteur',
        r'([A-ZÃ€-Ãœ][a-zÃ -Ã¼]+),?\s*Ã©diteur',
    ]
    
    for pattern in publisher_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            publisher = match.group(1).strip()
            publisher = re.sub(r'\|.*$', '', publisher)
            metadata['extracted_publisher'] = publisher
            break
    
    return metadata

def parse_wikisource_dump(dump_file="frwikisource_partial.xml"):
    """
    Parse Wikisource dump and extract metadata from content.
    """
    pages = []
    
    try:
        context = ET.iterparse(dump_file, events=('start', 'end'))
        context = iter(context)
        
        event, root = next(context)
        
        current_page = {}
        in_page = False
        in_revision = False
        
        for event, elem in context:
            tag = elem.tag.split('}')[-1]
            
            if event == 'start':
                if tag == 'page':
                    in_page = True
                    current_page = {}
                elif tag == 'revision':
                    in_revision = True
            
            elif event == 'end':
                if tag == 'title' and in_page:
                    current_page['title'] = elem.text
                
                elif tag == 'ns' and in_page:
                    current_page['namespace'] = elem.text
                
                elif tag == 'id' and in_page and 'page_id' not in current_page:
                    current_page['page_id'] = elem.text
                
                elif tag == 'redirect' and in_page:
                    current_page['is_redirect'] = True
                    current_page['redirect_to'] = elem.get('title', '')
                
                elif tag == 'timestamp' and in_revision:
                    current_page['timestamp'] = elem.text
                
                elif tag == 'text' and in_revision:
                    current_page['content'] = elem.text if elem.text else ''
                
                elif tag == 'revision':
                    in_revision = False
                
                elif tag == 'page':
                    if 'is_redirect' not in current_page:
                        current_page['is_redirect'] = False
                        current_page['redirect_to'] = None
                    
                    # Add Wikisource URL
                    if 'title' in current_page:
                        title_encoded = current_page['title'].replace(' ', '_')
                        current_page['wikisource_url'] = f"https://fr.wikisource.org/wiki/{title_encoded}"
                    else:
                        current_page['wikisource_url'] = None
                    
                    # Extract metadata from content
                    if 'content' in current_page:
                        metadata = extract_metadata(current_page['content'])
                        current_page.update(metadata)
                    
                    pages.append(current_page.copy())
                    in_page = False
                    current_page = {}
                    root.clear()
                    
                    if len(pages) % 100 == 0:
                        print(f"\rParsed {len(pages)} pages...", end="")
    
    except ET.ParseError as e:
        print(f"\nâš  XML parsing stopped (expected with partial file)")
    
    print(f"\nâœ“ Parsed {len(pages):,} pages total")
    return pd.DataFrame(pages)

# Parse the dump with metadata extraction
print("Parsing Wikisource dump with metadata extraction...")
df = parse_wikisource_dump()

df

Parsing Wikisource dump with metadata extraction...
Parsed 19500 pages...
âš  XML parsing stopped (expected with partial file)

âœ“ Parsed 19,583 pages total


Unnamed: 0,title,namespace,page_id,timestamp,content,is_redirect,redirect_to,wikisource_url,extracted_author,extracted_date,extracted_title,extracted_translator,extracted_publisher
0,MediaWiki:Monobook.css,8,3,2025-11-30T23:06:51Z,/* edit this file to customize the monobook sk...,False,,https://fr.wikisource.org/wiki/MediaWiki:Monob...,,,Espaces wikisources,,
1,MediaWiki:Monobook.js,8,4,2019-04-13T16:32:56Z,,False,,https://fr.wikisource.org/wiki/MediaWiki:Monob...,,,,,
2,MediaWiki:Article,8,54,2006-01-10T08:38:18Z,Article,False,,https://fr.wikisource.org/wiki/MediaWiki:Article,,,,,
3,MediaWiki:Categoryarticlecount,8,104,2005-08-28T21:25:35Z,Il y a $1 Ã©lÃ©ments dans cette catÃ©gorie.,False,,https://fr.wikisource.org/wiki/MediaWiki:Categ...,,,,,
4,MediaWiki:Categoryarticlecount1,8,105,2005-08-28T21:24:57Z,Il y a $1 Ã©lÃ©ment dans cette catÃ©gorie.,False,,https://fr.wikisource.org/wiki/MediaWiki:Categ...,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
19578,Page:Diderot - Encyclopedie 1ere edition tome ...,104,51901,2024-08-28T17:58:16Z,"<noinclude><pagequality level=""3"" user=""AcÃ©lan...",False,,https://fr.wikisource.org/wiki/Page:Diderot_-_...,un conseiller,,,,
19579,Page:Diderot - Encyclopedie 1ere edition tome ...,104,51902,2018-07-23T00:14:52Z,"<noinclude><pagequality level=""3"" user=""AcÃ©lan...",False,,https://fr.wikisource.org/wiki/Page:Diderot_-_...,des lettres de dispense,1530,,,
19580,Page:Diderot - Encyclopedie 1ere edition tome ...,104,51903,2018-07-23T00:14:52Z,"<noinclude><pagequality level=""3"" user=""AcÃ©lan...",False,,https://fr.wikisource.org/wiki/Page:Diderot_-_...,rapport aux\nrangs distincts,,,,
19581,Page:Diderot - Encyclopedie 1ere edition tome ...,104,51904,2024-09-09T04:50:48Z,"<noinclude><pagequality level=""3"" user=""AcÃ©lan...",False,,https://fr.wikisource.org/wiki/Page:Diderot_-_...,ledit billet sur une,,,,


In [7]:
df[df['wikisource_url']=='https://fr.wikisource.org/wiki/Voyage_sur_le_Mississipi']['content'].iloc[0]

'{{TextQuality|Textes validÃ©s}}\n\n<pages index="Revue des Deux Mondes - 1833 - tome 1.djvu" from=531 to=550  header=1 auteur="[[Auteur:EugÃ¨ne Ney|EugÃ¨ne Ney]]" prev="" next="" />\n\n[[CatÃ©gorie:Articles de la Revue des Deux Mondes]]\n[[CatÃ©gorie:Explorations et voyages aux Ã‰tats-Unis]]\n[[CatÃ©gorie:Articles par Auteur]]\n[[CatÃ©gorie:Articles de 1833]]'

In [3]:
df.to_csv('wikisource_pages_with_urls.csv', index=False)

In [2]:
df = df[df['is_redirect']==False]
df['content_length'] = df['content'].fillna('').str.len()
df = df[df['content_length'] >= 200]

In [None]:
# Show info
print("\n" + "="*60)
print("FRENCH WIKISOURCE DATA")
print("="*60)
print(f"\nTotal pages: {len(df):,}")

if len(df) > 0:
    print(f"\nNamespace breakdown:")
    print(df['namespace'].value_counts())
    
    # Filter main namespace only (ns=0)
    df_main = df[df['namespace'] == '0'].copy()
    print(f"\nMain namespace articles: {len(df_main):,}")
    
    print("\nðŸ“‹ First 10 articles:")
    print(df_main[['page_id', 'title']].head(10))
    
    # STEP 4: Get Wikidata IDs
    print("\n" + "=" * 60)
    print("STEP 4: Fetching Wikidata IDs (first 20 articles)")
    print("=" * 60)
    
    def get_wikidata_id_from_wikisource(wikisource_url):
        """
        Get Wikidata ID for a Wikisource page.
        Handles redirects and different apostrophe characters.
        """
        headers = {'User-Agent': 'WikidataBot/1.0 (Python requests)'}
        
        # Extract page title from URL
        parts = wikisource_url.split('/wiki/')
        if len(parts) < 2:
            return None
        
        page_title = urllib.parse.unquote(parts[1])
        
        # Determine the wiki site from URL (e.g., 'fr' from 'fr.wikisource.org')
        wiki_site = wikisource_url.split('//')[1].split('.')[0] + 'wikisource'
        api_base = f"https://{wikisource_url.split('//')[1].split('/')[0]}/w/api.php"
        
        # Step 1: Follow redirects to get the actual page title
        params = {
            'action': 'query',
            'titles': page_title,
            'redirects': '',
            'prop': 'pageprops',
            'format': 'json'
        }
        
        try:
            r = requests.get(api_base, params=params, headers=headers, timeout=15)
            data = r.json()
            
            # Check for redirects
            redirects = data.get('query', {}).get('redirects', [])
            if redirects:
                page_title = redirects[-1]['to']  # Get final redirect target
            
            # Check if wikibase_item is in pageprops
            pages = data.get('query', {}).get('pages', {})
            for page_id, page_data in pages.items():
                if page_id != '-1':
                    wikibase_item = page_data.get('pageprops', {}).get('wikibase_item')
                    if wikibase_item:
                        return wikibase_item
        except:
            pass
        
        # Step 2: Query Wikidata directly using the (possibly redirected) title
        try:
            wd_api_url = "https://www.wikidata.org/w/api.php"
            params = {
                'action': 'wbgetentities',
                'sites': wiki_site,
                'titles': page_title,
                'format': 'json'
            }
            
            r = requests.get(wd_api_url, params=params, headers=headers, timeout=15)
            data = r.json()
            
            entities = data.get('entities', {})
            for entity_id, entity_data in entities.items():
                if entity_id != '-1' and not entity_id.startswith('-'):
                    return entity_id
        except:
            pass
        
        return None
    
    wikidata_ids = []
    
    for idx, row in tqdm(df_main.head(20).iterrows(), total=min(20, len(df_main)), desc="Querying Wikidata"):
        wikisource_url = row['wikisource_url']
        
        wikidata_id = get_wikidata_id_from_wikisource(wikisource_url)
        
        wikidata_ids.append({
            'page_id': row['page_id'],
            'title': row['title'],
            'wikisource_url': wikisource_url,
            'wikidata_id': wikidata_id
        })
        
        time.sleep(0.5)
    
    results_df = pd.DataFrame(wikidata_ids)
    
    print("\n" + "="*60)
    print("RESULTS: Wikidata IDs Found")
    print("="*60)
    print(f"\nTotal pages checked: {len(results_df)}")
    print(f"Pages with Wikidata ID: {len(results_df[results_df['wikidata_id'].notna()])}")
    
    print("\nðŸ“Š Pages with Wikidata IDs:")
    for idx, row in results_df[results_df['wikidata_id'].notna()].iterrows():
        print(f"\nâ€¢ {row['title']}")
        print(f"  Wikisource: {row['wikisource_url']}")
        print(f"  Wikidata ID: {row['wikidata_id']}")
    
    results_df.to_csv('frwikisource_wikidata_ids.csv', index=False)
    print(f"\nâœ“ Saved to frwikisource_wikidata_ids.csv")
else:
    print("âš  No pages were parsed. Try downloading more data.")


FRENCH WIKISOURCE DATA

Total pages: 9,702

Namespace breakdown:
namespace
0      5288
104    3360
102     758
14      114
10       64
4        43
6        26
106      24
12       12
8         8
112       5
Name: count, dtype: int64

Main namespace articles: 5,288

ðŸ“‹ First 10 articles:
   page_id                                              title
19    1309  Charte internationale des Droits de lâ€™Homme (1...
20    1310  Constitution du 4 octobre 1958 (Ã  jour de la r...
21    1311  DÃ©claration des Droits de lâ€™Homme et du Citoye...
27    1318         Les Fleurs du mal (1861)/Lâ€™Homme et la Mer
28    1319                          Pour le bien-Ãªtre de tous
29    1320    Commentaires sur Unto This Last de M. K. Gandhi
30    1322  DÃ©cret de la Convention nationale portant sur ...
32    1329  Constitution des Ã‰tats-Unis dâ€™AmÃ©rique (trad. ...
38    1342                                  PremiÃ¨re Solitude
39    1343                                  Aux amis inconnus

STEP 4: Fet

Querying Wikidata:   5%|â–Œ         | 1/20 [00:30<09:40, 30.57s/it]

Wikidata ID: None
