In [1]:
import pandas as pd
essum_dataset = "ESBM-DBpedia" # other dataset is "FACES"
entity_df= pd.read_csv(f'data.v.1.2/{essum_dataset}/elist.txt', sep='\t')
entity_df.columns = entity_df.columns.str.strip()  # Remove extra whitespace
entity_uris = entity_df['euri']  # Should now work
# Display URIs
for entity in entity_uris:
    print(entity)

http://dbpedia.org/resource/3WAY_FM
http://dbpedia.org/resource/Roderick_Carr
http://dbpedia.org/resource/2009–10_Swiss_Cup
http://dbpedia.org/resource/2011_Kor_Royal_Cup
http://dbpedia.org/resource/2011_League_of_Ireland_Cup_Final
http://dbpedia.org/resource/2011_Sparta_Prague_Open
http://dbpedia.org/resource/A._Scott_Sloan
http://dbpedia.org/resource/Juhan_Muks
http://dbpedia.org/resource/Lucy_Ward_(musician)
http://dbpedia.org/resource/Roque_Ceruti
http://dbpedia.org/resource/Battle_of_Sampur
http://dbpedia.org/resource/Battle_of_Zacatecas_(1914)
http://dbpedia.org/resource/Convoy_HX_156
http://dbpedia.org/resource/2012–13_UEFA_Champions_League
http://dbpedia.org/resource/Raid_on_Griessie
http://dbpedia.org/resource/Ayrovo
http://dbpedia.org/resource/Fleckistock
http://dbpedia.org/resource/Henlow
http://dbpedia.org/resource/North_Haledon,_New_Jersey
http://dbpedia.org/resource/Tchonoro
http://dbpedia.org/resource/Gerbil_mouse
http://dbpedia.org/resource/Nemapogon_nigralbella
http://

In [2]:
import requests
from urllib.parse import unquote
from tqdm import tqdm

def dbpedia_uri_to_title(uri):
    return unquote(uri.split("/")[-1]).replace("_", " ")

def get_first_paragraph_wikipedia_rest(title, lang='en'):
    url = f"https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title.replace(' ', '_')}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("extract", "No extract found.")
    elif response.status_code == 404:
        return "Wikipedia page not found."
    else:
        return f"Error: HTTP {response.status_code}"
        
def save_paragraph_to_txt(entity, paragraph):
    filename = entity.split("/")[-1] + ".txt"
    with open(f"data.v.1.2/{essum_dataset}/ESSUM/silver-standard-summaries/{filename}", "w", encoding="utf-8") as f:
        f.write(paragraph)
    print(f"Saved to: {filename}")
    
def is_empty_paragraph(text):
    return not text or text.strip() == ""
    
count = 0
for uri in tqdm(entity_uris): 
    dbpedia_uri = uri
    title = dbpedia_uri_to_title(dbpedia_uri)
    paragraph = get_first_paragraph_wikipedia_rest(title)
    if is_empty_paragraph(paragraph):
        print(f"{dbpedia_uri} has no text")
        continue
    else:
        save_paragraph_to_txt(dbpedia_uri, paragraph)
        count += 1
    #print(f"Wikipedia title: {title}")
    #print("First paragraph:\n", paragraph)
print("count: ", count)

  2%|▏         | 2/110 [00:00<00:21,  4.92it/s]

Saved to: 3WAY_FM.txt
Saved to: Roderick_Carr.txt


  4%|▎         | 4/110 [00:00<00:18,  5.74it/s]

Saved to: 2009–10_Swiss_Cup.txt
Saved to: 2011_Kor_Royal_Cup.txt


  5%|▌         | 6/110 [00:01<00:20,  5.09it/s]

Saved to: 2011_League_of_Ireland_Cup_Final.txt
Saved to: 2011_Sparta_Prague_Open.txt


  7%|▋         | 8/110 [00:01<00:22,  4.44it/s]

Saved to: A._Scott_Sloan.txt
Saved to: Juhan_Muks.txt


  9%|▉         | 10/110 [00:02<00:17,  5.68it/s]

Saved to: Lucy_Ward_(musician).txt
Saved to: Roque_Ceruti.txt


 11%|█         | 12/110 [00:02<00:15,  6.35it/s]

Saved to: Battle_of_Sampur.txt
Saved to: Battle_of_Zacatecas_(1914).txt


 14%|█▎        | 15/110 [00:02<00:12,  7.75it/s]

Saved to: Convoy_HX_156.txt
Saved to: 2012–13_UEFA_Champions_League.txt
Saved to: Raid_on_Griessie.txt


 15%|█▌        | 17/110 [00:03<00:20,  4.61it/s]

Saved to: Ayrovo.txt
Saved to: Fleckistock.txt
Saved to: Henlow.txt


 18%|█▊        | 20/110 [00:03<00:16,  5.44it/s]

Saved to: North_Haledon,_New_Jersey.txt
Saved to: Tchonoro.txt


 19%|█▉        | 21/110 [00:04<00:16,  5.51it/s]

Saved to: Gerbil_mouse.txt


 22%|██▏       | 24/110 [00:04<00:19,  4.39it/s]

Saved to: Nemapogon_nigralbella.txt
Saved to: Battle_of_Bregalnica.txt
Saved to: Schistura_jarutanini.txt


 24%|██▎       | 26/110 [00:05<00:19,  4.41it/s]

Saved to: Can_U_Get_wit_It.txt
Saved to: Silence_Is_Easy_(song).txt


 25%|██▍       | 27/110 [00:05<00:17,  4.68it/s]

Saved to: The_Honolulu_Advertiser.txt


 26%|██▋       | 29/110 [00:06<00:16,  5.05it/s]

Saved to: You_Better_Run.txt
Saved to: Battle_of_Rottofreddo.txt


 27%|██▋       | 30/110 [00:06<00:14,  5.49it/s]

Saved to: Burgery_ambush.txt


 28%|██▊       | 31/110 [00:06<00:18,  4.38it/s]

Saved to: Massacre_on_34th_Street.txt
Saved to: Adrian_Griffin.txt


 31%|███       | 34/110 [00:06<00:12,  5.88it/s]

Saved to: Triathlon_at_the_2000_Summer_Olympics_–_Men's.txt
Saved to: Akalwadi.txt


 33%|███▎      | 36/110 [00:07<00:11,  6.29it/s]

Saved to: Chitita.txt
Saved to: Kuleh_Bayan.txt


 35%|███▍      | 38/110 [00:07<00:11,  6.52it/s]

Saved to: Reamer_Barn.txt
Saved to: Richmond–Petersburg_Turnpike.txt


 36%|███▋      | 40/110 [00:07<00:09,  7.05it/s]

Saved to: Uelsby.txt
Saved to: Wehlaberg.txt


 37%|███▋      | 41/110 [00:08<00:16,  4.25it/s]

Saved to: Wernshausen.txt


 39%|███▉      | 43/110 [00:08<00:16,  4.02it/s]

Saved to: Andrew_Kippis.txt
Saved to: Yayoidai_Station.txt
Saved to: African_grey_hornbill.txt


 41%|████      | 45/110 [00:09<00:14,  4.62it/s]

Saved to: Bornean_mountain_ground_squirrel.txt


 43%|████▎     | 47/110 [00:09<00:13,  4.59it/s]

Saved to: Enallagma_truncatum.txt
Saved to: Lepiota_helveola.txt
Saved to: Lygodium_microphyllum.txt


 45%|████▍     | 49/110 [00:10<00:22,  2.70it/s]

Saved to: Ovophis.txt


 46%|████▋     | 51/110 [00:11<00:22,  2.62it/s]

Saved to: Rubus_arizonensis.txt
Saved to: Siamese_mud_carp.txt


 48%|████▊     | 53/110 [00:11<00:15,  3.69it/s]

Saved to: Anthony_Beaumont-Dark.txt
Saved to: Trachelipus_dimorphus.txt


 49%|████▉     | 54/110 [00:12<00:13,  4.31it/s]

Saved to: Hey_Boy_(Teddybears_song).txt


 50%|█████     | 55/110 [00:12<00:23,  2.37it/s]

Saved to: King_of_the_Mountain_(film).txt


 52%|█████▏    | 57/110 [00:13<00:16,  3.15it/s]

Saved to: Our_Leading_Citizen_(1939_film).txt
Saved to: Simon_(2004_film).txt


 53%|█████▎    | 58/110 [00:13<00:20,  2.57it/s]

Saved to: Sky_(Faye_Wong_album).txt


 55%|█████▍    | 60/110 [00:14<00:14,  3.53it/s]

Saved to: Sting_Me.txt
Saved to: The_Crowd_Snores.txt


 56%|█████▋    | 62/110 [00:14<00:10,  4.78it/s]

Saved to: Dallas_Keuchel.txt
Saved to: Touch_of_Death_(1961_film).txt
Saved to: Ashot_I_of_Iberia.txt


 58%|█████▊    | 64/110 [00:15<00:09,  4.68it/s]

Saved to: Cindy_Mackey.txt


 59%|█████▉    | 65/110 [00:15<00:16,  2.75it/s]

Saved to: Edmund_Smith_Conklin.txt


 61%|██████    | 67/110 [00:16<00:13,  3.15it/s]

Saved to: Fabrice_Gautrat.txt
Saved to: Hiroshi_Mori_(writer).txt
Saved to: Momchil_Tsvetanov.txt


 63%|██████▎   | 69/110 [00:16<00:09,  4.51it/s]

Saved to: Najmadin_Shukr_Rauf.txt


 65%|██████▍   | 71/110 [00:17<00:09,  4.06it/s]

Saved to: Storme_Warren.txt
Saved to: E._K._Mawlong.txt


 66%|██████▋   | 73/110 [00:17<00:07,  5.12it/s]

Saved to: 1960_Glover_Trophy.txt
Saved to: 1967_Italian_Grand_Prix.txt


 67%|██████▋   | 74/110 [00:17<00:07,  4.56it/s]

Saved to: 2008_Copa_del_Rey_Final.txt


 68%|██████▊   | 75/110 [00:18<00:12,  2.76it/s]

Saved to: 2010_Belgian_Super_Cup.txt


 70%|███████   | 77/110 [00:19<00:09,  3.36it/s]

Saved to: 2013_Slovak_Cup_Final.txt
Saved to: Battle_of_Cepeda_(1820).txt


 71%|███████   | 78/110 [00:20<00:21,  1.47it/s]

Saved to: Battle_on_the_Elster.txt


 73%|███████▎  | 80/110 [00:21<00:16,  1.80it/s]

Saved to: Finn_Schiander.txt
Saved to: Operation_Hump.txt


 75%|███████▍  | 82/110 [00:21<00:09,  2.92it/s]

Saved to: Darreh_Dang.txt
Saved to: Jalalia,_Khyber_Pakhtunkhwa.txt


 75%|███████▌  | 83/110 [00:22<00:07,  3.49it/s]

Saved to: Kings_Ripton.txt


 77%|███████▋  | 85/110 [00:22<00:07,  3.29it/s]

Saved to: Kotumachagi.txt
Saved to: Muławki.txt


 78%|███████▊  | 86/110 [00:22<00:06,  3.89it/s]

Saved to: Pinnacle_Mountain_(South_Carolina).txt


 79%|███████▉  | 87/110 [00:23<00:06,  3.47it/s]

Saved to: Saint-Raphaël,_Var.txt


 80%|████████  | 88/110 [00:24<00:09,  2.41it/s]

Saved to: Sauxillanges.txt


 81%|████████  | 89/110 [00:25<00:12,  1.65it/s]

Saved to: Stara_Bučka.txt


 82%|████████▏ | 90/110 [00:25<00:10,  1.93it/s]

Saved to: Hagar_Wilde.txt


 84%|████████▎ | 92/110 [00:26<00:08,  2.13it/s]

Saved to: Zarudcze.txt
Saved to: Amphisbaena_ridleyi.txt


 85%|████████▌ | 94/110 [00:26<00:04,  3.34it/s]

Saved to: Balanites.txt
Saved to: Bryotropha_plantariella.txt


 87%|████████▋ | 96/110 [00:26<00:03,  4.25it/s]

Saved to: Hilarographa_excellens.txt
Saved to: Inverted_repeat-lacking_clade.txt


 89%|████████▉ | 98/110 [00:27<00:03,  3.85it/s]

Saved to: Melaleuca_sheathiana.txt
Saved to: Pseudanos_trimaculatus.txt


 90%|█████████ | 99/110 [00:27<00:03,  3.39it/s]

Saved to: Stemonoporus_laevifolius.txt


 92%|█████████▏| 101/110 [00:28<00:02,  3.90it/s]

Saved to: Thaia_saprophytica.txt
Saved to: Trichoscypha_cavalliensis.txt


 95%|█████████▍| 104/110 [00:28<00:00,  6.05it/s]

Saved to: 392_(album).txt
Saved to: Drama_City.txt
Saved to: If_(Glasvegas_song).txt


 95%|█████████▌| 105/110 [00:28<00:00,  6.27it/s]

Saved to: Intensive_Care_Medicine_(journal).txt
Saved to: It's_Still_Rock_and_Roll_to_Me.txt
Saved to: Politiken.txt


 98%|█████████▊| 108/110 [00:29<00:00,  8.53it/s]

Saved to: Rebel_Love_Song.txt
Saved to: Terrorist_Threats.txt


100%|██████████| 110/110 [00:30<00:00,  3.66it/s]

Saved to: Time_(Dave_Clark_album).txt
count:  110



