In [1]:
from bs4 import BeautifulSoup
import networkx as nx
import matplotlib.pyplot as plt
from collections import Counter
import re

def extract_text_from_html(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'html.parser')
        text = soup.get_text()
    return text

def find_characters(text):
    # Example list of characters, replace with actual characters
    characters = ['Draco', 'Hermione', 'Harry', 'Lucius', 'Voldemort','Astoria', 'Snape', 'Madeye Moody']
    character_mentions = {character: [] for character in characters}
    
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', text)
    for idx, sentence in enumerate(sentences):
        for character in characters:
            if character in sentence:
                character_mentions[character].append(idx)
    return character_mentions, sentences

def create_cooccurrence_matrix(character_mentions, sentences, window=5):
    cooccurrence = Counter()
    for character, indices in character_mentions.items():
        for idx in indices:
            start = max(0, idx - window)
            end = min(len(sentences), idx + window + 1)
            for other_character, other_indices in character_mentions.items():
                if other_character != character:
                    if any(i in range(start, end) for i in other_indices):
                        cooccurrence[(character, other_character)] += 1
    return cooccurrence

def plot_network(cooccurrence):
    G = nx.Graph()
    for (char1, char2), weight in cooccurrence.items():
        G.add_edge(char1, char2, weight=weight)

    pos = nx.spring_layout(G)
    plt.figure(figsize=(12, 12))

    nx.draw_networkx_nodes(G, pos, node_size=7000, node_color='skyblue')
    nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.5)
    nx.draw_networkx_labels(G, pos, font_size=12, font_family='sans-serif')

    centrality = nx.degree_centrality(G)
    nx.draw_networkx