### import importlib
import sys
import os
import nltk
import pandas as pd
import networkx as nx

# Add the 'src' directory to the system path
sys.path.append(os.path.abspath('../src'))
src_path = os.path.abspath('../src')

nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('stopwords')

In [1]:
# Import classes from the modules using their correct filenames
from DataLoaderClass import DataLoader

ModuleNotFoundError: No module named 'DataLoaderClass'

In [None]:
# Initialize paths
BIB_FILE_PATH = '../examples/EX1_POWER_SYSTEM_FPGA_FREQUENCY_ESTIMATORS/index.bib'
PDF_FOLDER_PATH = '../examples/EX1_POWER_SYSTEM_FPGA_FREQUENCY_ESTIMATORS/files'

In [None]:
# Step 1: Data Loading and Processing
loader = DataLoader(BIB_FILE_PATH, PDF_FOLDER_PATH)
processed_data = loader.load_and_process()

In [None]:
# -------------------------------------------------------------- #
# -- EDA M3 :: Sources ----------------------------------------- #
# -------------------------------------------------------------- #
from eda.m3_sources_analysis import Processor
from eda.m3_sources_analysis import Visualizer
from eda.m3_sources_analysis import Reporter
# -------------------------------------------------------------- #

# Data
data = processed_data

# Process data
processor = Processor(data)
visualizer = Visualizer()
reporter = Reporter()

In [None]:
#################################################################################
### Function 1 => Most Frequent Quotes (Table & Barplot)
#################################################################################

In [None]:
# Step 1: Extract and count most frequent quotes
top_quotes = processor.get_most_frequent_quotes(n=10)

# Step 2: Visualize the most frequent quotes
Visualizer.plot_most_frequent_quotes(
    df=top_quotes,
    title="Top 10 Most Frequent Quotes",
    filename="most_frequent_quotes"
)

# Step 3: Save the table
Reporter.save_to_csv(top_quotes, "most_frequent_quotes")

# Step 4: Display the table
top_quotes

In [None]:
#################################################################################
### Function 2 => Quotes by Year (Table & Line Chart)
#################################################################################

### **Function 2: Quotes by Year**

#### **Overview**:
This analysis examines how quotes are distributed over time, providing insights into the evolution of popular ideas and recurring themes in the dataset. By grouping quotes by year and counting their occurrences, we can track the rise or decline of specific quotes.

#### **Interpretation**:
- **Emerging Quotes**: Quotes that appear frequently in recent years may indicate emerging ideas or trends in the field.
- **Declining Quotes**: Quotes that were more common in earlier years but less so now might represent outdated concepts or fading interest.
- **Consistent Quotes**: Quotes with steady usage across years may highlight foundational or widely accepted principles.

In [None]:
# Step 1: Analyze quotes by year
quotes_by_year = processor.get_quotes_by_year()

# Step 2: Visualize the trends of quotes over time
Visualizer.plot_quote_trends(
    df=quotes_by_year,
    title="Quote Trends Over Time",
    filename="quote_trends"
)

# Step 3: Save the table
Reporter.save_to_csv(quotes_by_year, "quotes_by_year")

# Step 4: Display the table
quotes_by_year.head(10)

In [None]:
#################################################################################
### Function 3 => Quotes Context Analysis (Table)
#################################################################################

### **Function 3: Quotes Context Analysis**

#### **Overview**:
This analysis extracts the context in which specific quotes appear, including sentences or paragraphs before and after the quote. It helps understand how quotes are used and their relevance to the surrounding discussion.

#### **Interpretation**:
- **Contextual Insights**: Analyze the sentences around a quote to better understand its role in the text.
- **Relevance**: Identify how quotes are used to support arguments or highlight key ideas.
- **Metadata**: Connect quotes to their associated metadata, such as title, authors, and year, for a richer understanding.

In [None]:


# Step 1: Define quotes to analyze
quotes_to_analyze = [
    "Frequency is a challenging parameter to estimate.",
    "Power systems require stability and robustness."
]

# Step 2: Extract contexts for selected quotes
quote_contexts = processor.get_quote_contexts(quotes_to_analyze, context_window=2)

# Step 3: Save the table
Reporter.save_to_csv(quote_contexts, "quote_contexts")

# Step 4: Display the table
quote_contexts.head(10)

In [None]:
#################################################################################
### Sentiment Analysis of Quote Contexts
#################################################################################


In [None]:
# Step 1: Extract contexts for selected quotes
contexts = processor.get_quote_contexts(
    quotes_to_analyze=["Frequency is a challenging parameter to estimate."],
    context_window=2
)

# Step 2: Analyze sentiment of the contexts
sentiment_df = processor.analyze_sentiment_of_contexts(contexts)

# Step 3: Visualize sentiment distribution
Visualizer.plot_sentiment_distribution(
    df=sentiment_df,
    title="Sentiment Distribution of Quote Contexts",
    filename="quote_sentiment_distribution"
)

# Step 4: Save the table
Reporter.save_to_csv(sentiment_df, "quote_sentiment_analysis")

# Step 5: Display the sentiment analysis table
sentiment_df.head(10)

In [None]:
#################################################################################
### Theme Extraction from Quote Contexts
#################################################################################

In [None]:
# Step 1: Extract contexts for selected quotes
contexts = processor.get_quote_contexts(
    quotes_to_analyze=["Frequency is a challenging parameter to estimate."],
    context_window=2
)

# Step 2: Extract themes (keywords) from the contexts
themes_df = processor.extract_themes_from_contexts(contexts, top_n=10)

# Step 3: Visualize themes with a word cloud
Visualizer.plot_word_cloud_from_themes(
    df=themes_df,
    title="Themes from Quote Contexts",
    filename="quote_context_themes"
)

# Step 4: Save the table
Reporter.save_to_csv(themes_df, "quote_context_themes")

# Step 5: Display the themes table
themes_df
