In [2]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np
import networkx as nx


In [3]:
# Base URL for Narutopedia character category
url_base = 'https://naruto.fandom.com/wiki/Category:Characters{}'
query_list = [
    '', 
    '?from=Eiki+Fūma%0AEiki+Fūma', 
    '?from=Hidari%0AHidari', 
    '?from=Karai%0AKarai', 
    '?from=Matsuba%0AMatsuba', 
    '?from=Rikumaru%0ARikumaru', 
    '?from=Taiki%0ATaiki', 
    '?from=Yubina%0AYubina'
]

# Create an empty string to store the concatenated content
all_content = ""

for query in query_list:
    # Construct the URL for the current query
    url = url_base.format(query)
    
    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the element with class 'category-page__members'
        category_members = soup.find('div', {'class': 'category-page__members'})

        # Extract the content within the 'category-page__members' div
        if category_members:
            content = category_members.get_text()
            # Append the content to the all_content string
            all_content += content
        else:
            print("Div with class 'category-page__members' not found on the page.")
    else:
        print("Failed to retrieve the page. Status code:", response.status_code)

# Save the concatenated content to a single text file
with open('narutopedia_combined_content.txt', 'w', encoding='utf-8') as file:
    file.write(all_content)
    print("Saved combined content to narutopedia_combined_content.txt")


Saved combined content to narutopedia_combined_content.txt


In [3]:
url_example = 'https://naruto.fandom.com/wiki/Ada'
response = requests.get(url_example)
if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the element with class 'category-page__members'
        category_members = soup.find_all('p')
        all_links = [tag['href'] for tag in soup.select('p a[href]')]
        for data in category_members:
                print(data.get_text())



Ada (エイダ, Eida) is a former member of Kara and a citizen of Konohagakure.

In his plot to kill Jigen, Amado heavily modified Ada, along with her younger brother Daemon, with Shinobi-Ware with capabilities exceeding that of Jigen. The effects of her modifications left her forever despising Amado. Because of her superior might, Jigen had ordered for her disposal. Boro was given the order. However, affected by Ada's powers, Boro could not bring himself to complete the task, and instead hid Ada and Daemon in a remote location operated by his cult for his own uses.

Ada is a very cold and indifferent person who cares little for the events around her. This comes from having grown bored in the life that was forced upon her. With all people automatically drawn to her and unable to physically oppose her, she actually finds someone able to resist her at all amusing. Stemming from this, her greatest desire is to find someone who will genuinely love her. Ada showed great interest in Kawaki after

In [8]:

for link in all_links:
    link = link.replace('/wiki/', '')
    print(link)

Kara
Konohagakure
Jigen
Amado
Daemon
Shinobi-Ware
Boro
Land_of_Snow#Boro's_Facility
%C5%8Ctsutsuki_Cult
Kawaki
Boruto_Uzumaki
%C5%8Ctsutsuki
Code
Ten-Tails_(Kara)
Delta
Shikamaru
Seventh_Hokage
Sasuke
Reflection_Shinjutsu
Omnipotence
Senrigan
#cite_note-1
Kara
Scientific_Ninja_Tools
Taijutsu
#cite_note-2
#cite_note-3
Shibai_%C5%8Ctsutsuki
#cite_note-4
Senrigan
#cite_note-5
#cite_note-6
#cite_note-7
#cite_note-8
#cite_note-9
#cite_note-10
Shadow_Imitation_Technique
#cite_note-11
#cite_note-12
Shinjutsu
Omnipotence
#cite_note-13
%C5%8Ctsutsuki
Code%27s_Assault_Arc
Isshiki_%C5%8Ctsutsuki
Bug
K%C4%81ma
God_Tree_(disambiguation)
Reflection_Shinjutsu
Momoshiki
Ultra-Big_Ball_Rasengan
Claw_Mark
Omnipotence_Arc
Shadow_Imitation_Technique
Konoha
%C5%8Ctsutsuki
Team_7_(Konohamaru)
Senrigan
Sai_Yamanaka
Konohamaru_Sarutobi
Shinjutsu
Akebi_Sanzu
Sumire_Kakei
Sarada_Uchiha
Shikadai_Nara
Claw_Grime
Hinata
Daikokuten
Sasuke
Boruto%27s_Return_Arc
Rasengan:_%22Uzuhiko%22


In [5]:
cleaned_string = re.sub(r'.\t', '', all_content.replace('\n', ' ')).replace('\t', ' ')

# Split the input string by two or more whitespace characters using regular expression
name_list = re.split(r'\s{2,}', cleaned_string)

# Filter out any empty strings
name_list = [name.strip() for name in name_list if name.strip()]


for name in name_list:
    name=name.replace(' ', '_')

['A (First Raikage)', 'A (Fourth Raikage)', 'A (Second Raikage)', 'A (Third Raikage)', 'Abiru', 'Ada', 'Agara', 'Agari', 'Agari Kaisen', 'Ageha', 'Agira Ryūdōin', 'Ahiko', 'Aho Bird', 'Aino', 'Ajisai', 'Akaboshi', 'Akahoshi', 'Akamaru', 'Akane', 'Akane (Kunoichi)', "Akane's Father", 'Akari', 'Akari Tatsushiro', 'Akatsuchi', 'Akebi Sanzu', 'Akemaru', 'Akino', 'Akio', 'Akita Inuzuka', 'Akkun', 'Amachi', 'Amado Sanzu', 'Amagi', 'Amagi Izuno', 'Amai', 'Amaru', 'Amayo', 'Ameno', 'Ameyuki', 'Ameyuri Ringo', 'Ami', 'Amino', 'Amuda', 'Anaguma', 'Anato', 'Anbu Commander', 'Aniki', 'Aniki (missing-nin)', 'Anko Mitarashi', 'Ankorodō Shop Owner', 'Ao', 'Aoba Yamashiro', 'Aoda', 'Aoi (game)', 'Aoi Rokushō', 'Aoneko', 'Aosa', 'Arai', 'Arashi', 'Arashi Umeya', 'Araumi Funato', 'Araya', 'Armadiko', 'Asaka', 'Asaki', 'Asami', 'Ashimaru', 'Ashina', 'Ashina Uzumaki', 'Ashitaba', 'Asuma Sarutobi', 'Asura Path (character)', 'Asura Ōtsutsuki', 'Atsui', 'Awaji Unabara', 'Ayame', 'Azami', 'Azure Fang', 'Babu'