In [2]:
import sys
# !{sys.executable} -m pip install pdfplumber spacy

In [3]:
import sys
# !{sys.executable} -m spacy download en_core_web_sm

    # Mannuly Specify the content table page number

# Text manuplating 

    # Get a Dataframe of Item with the range of corresponding pages 

In [4]:
import pdfplumber
import re
import pandas as pd

# By default, the function extracts items from page 2 of the PDF
def item_pages_df(pdf_path, page_num=2):
    # Function to extract item pages from a PDF file
    # pdf_path: Path to the PDF file
    # Returns a DataFrame with item titles and their corresponding page numbers
    with pdfplumber.open(pdf_path) as pdf:
        page = pdf.pages[page_num]
        text = page.extract_text()
    sentences = re.split(r'\n', text)
    items_dict = {}
    pattern = re.compile(r'(Item \d+[A-Z]?\..*?)\s+(\d+)$')
    for sentence in sentences:
        match = pattern.search(sentence)
        if match:
            title = match.group(1).strip()
            page = int(match.group(2))
            items_dict[title] = page
    return pd.DataFrame(items_dict.items(), columns=["Title", "Page Number"]), items_dict

    Extract sentences from the PDF based on the Item. 
    Return a dataframe of sentences in th Item
    Example: extract_item_specific("Item 1A. Business")

In [5]:
import pdfplumber
import pandas as pd
import nltk
import re
from nltk.tokenize import sent_tokenize

def extract_item_specific(selected_item, items_dict, pdf_file):


    # Check if the selected item exists
    if selected_item not in items_dict:
        print(f"Error: '{selected_item}' not found in items_dict. Please check the spelling.")
        exit()

    # Get the start and end page for the selected item
    sorted_items = sorted(items_dict.items(), key=lambda x: x[1])  # Sort items by page numbers
    start_page = items_dict[selected_item]
    end_page = None

    # Find the next item's page to determine the range
    for i in range(len(sorted_items)):
        if sorted_items[i][0] == selected_item:
            end_page = sorted_items[i + 1][1] - 1 if i + 1 < len(sorted_items) else None
            break

    # Dictionary to store extracted text for the selected item
    extracted_text = ""

    # Open the PDF and extract only the required section
    with pdfplumber.open(pdf_file) as pdf:
        for page_num in range(start_page - 1, (end_page or len(pdf.pages))):  # Zero-based index
            try:
                page_text = pdf.pages[page_num].extract_text()
                if page_text:
                    extracted_text += "\n" + page_text.strip()
            except IndexError:
                print(f"Warning: Page {page_num + 1} out of range.")
                break

    # Clean and tokenize the extracted text
    clean_text = re.sub(r'\s+', ' ', extracted_text).strip()  # Remove extra spaces/newlines
    clean_text = re.sub(r'[^A-Za-z0-9,.?! ]+', '', clean_text)  # Remove special characters

    # Split into sentences
    sentences = sent_tokenize(clean_text)

    # Store as a DataFrame
    df_selected_item = pd.DataFrame(sentences, columns=["Sentences"])
    return df_selected_item

    # Extracts key words related to a given target word (e.g., 'risk') using dependency parsing.

In [6]:
import spacy
from collections import Counter
import pandas as pd

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# ✅ Step 1: Preprocess Text (KEEP Stop Words)
def preprocess_text(df):
    """Process text by tokenizing, lemmatizing, and keeping all stopwords."""
    processed_sentences = []
    
    for sentence in df["Sentences"]:
        doc = nlp(sentence)
        tokens = [token.lemma_ for token in doc if token.is_alpha]  # Keep all stop words
        processed_sentences.append(" ".join(tokens))
    
    return pd.DataFrame(processed_sentences, columns=["Processed Sentences"])

# ✅ Step 2: Extract Context Words (EXCLUDE Stop Words in Counting)
def extract_context_words(df, target_word, num_display=20, column_name="Processed Sentences"):
    """Extracts key words related to a given target word (e.g., 'risk') using dependency parsing."""
    related_words = []
    stop_words = nlp.Defaults.stop_words  # Get default stop words from spaCy

    # Ensure DataFrame is sorted consistently
    df = df.sort_values(by=column_name, ignore_index=True)

    for sentence in df[column_name]:
        doc = nlp(sentence)
        
        for token in doc:
            if token.text.lower() == target_word.lower():
                # Collect adjectives modifying "risk" (e.g., "high risk", "financial risk")
                for child in token.children:
                    if child.dep_ in ["amod", "compound", "nsubj", "prep"] and child.text.lower() not in stop_words:
                        related_words.append(child.text.lower())

                # Collect nouns associated with "risk" (e.g., "market risk", "liquidity risk")
                if token.head.pos_ in ["NOUN", "PROPN"] and token.head.text.lower() not in stop_words:
                    related_words.append(token.head.text.lower())

                # Find adjectives appearing **later in the sentence** (e.g., "risk is inefficient")
                for ancestor in token.ancestors:
                    if ancestor.dep_ in ["acomp"] and ancestor.text.lower() not in stop_words:
                        related_words.append(ancestor.text.lower())

    # Convert to DataFrame
    word_counts = Counter(related_words).most_common()
    df_words = pd.DataFrame(word_counts, columns=["Keyword", "Frequency"])
    
    return df_words

# Sentiment analysis and polarize Function

In [7]:
# pip install vaderSentiment

In [8]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

def analyze_word_sentiment(df):
    """Computes sentiment polarity (-1 to 1) for extracted words using VADER."""
    analyzer = SentimentIntensityAnalyzer()
    df["Sentiment"] = df["Keyword"].apply(lambda word: analyzer.polarity_scores(word)['compound'])

    # Calculate weighted sentiment
    df["Weighted Sentiment"] = df["Sentiment"] * df["Frequency"]
    average_sentiment = df["Weighted Sentiment"].sum() / df["Frequency"].sum()

    # Exclude neutral words
    df_non_neutral = df[df["Sentiment"] != 0.0]
    non_neutral_sentiment = df_non_neutral["Weighted Sentiment"].sum() / df_non_neutral["Frequency"].sum() if not df_non_neutral.empty else 0.0

    print(f"Weighted Sentiment Score (Including Neutral Words): {average_sentiment}")
    print(f"Weighted Sentiment Score (Excluding Neutral Words): {non_neutral_sentiment}")
    
    return df


# Prepocessing for 2023 10-k Starting here
# 2023

In [9]:
# read the pdf
pdf_file = "Apollo-23-10-k.pdf"  # Replace with actual file path

# Items pages dataframe (input the page number mannuly)
pages_df = item_pages_df(pdf_path=pdf_file, page_num=2)[0]
pages_dict = item_pages_df(pdf_path=pdf_file, page_num=2)[1]

pages_df

Unnamed: 0,Title,Page Number
0,Item 1. Business,13
1,Item 1A. Risk Factors,33
2,Item 1B. Unresolved Staff Comments,59
3,Item 1C. Cybersecurity,59
4,Item 2. Properties,62
5,Item 3. Legal Proceedings,62
6,Item 4. Mine Safety Disclosures,62
7,"Item 5. Market for Registrant's Common Equity,...",63
8,Item 6. [Reserved],65
9,Item 7. Management's Discussion and Analysis o...,66


## Q1.How it has changed from 2023-2024 (Risk Factors more or less and to what degree) 
i. Risk Profiling by segmenting key words from 1A from both 2023 and 2024

    # key word: “risk”
    # range: Item 1A. Risk Factors
    # year: 2023


In [10]:
# Extract specific Item range of interest
df_item = extract_item_specific("Item 1A. Risk Factors", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [11]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "risk", num_display=30)

In [12]:
# Analyze sentiment of the context words
Q1_2023context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q1_2023context_words_with_sentiment

Weighted Sentiment Score (Including Neutral Words): 0.04088407079646017
Weighted Sentiment Score (Excluding Neutral Words): 0.16499642857142857


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,subject,20,0.0,0.0
1,credit,7,0.3818,2.6726
2,associate,5,0.0,0.0
3,concentration,5,0.0,0.0
4,additional,4,0.0,0.0
5,significant,4,0.2023,0.8092
6,litigation,3,-0.2023,-0.6069
7,regulatory,3,0.0,0.0
8,expose,3,-0.1531,-0.4593
9,liquidity,3,0.0,0.0


# Q2. Investments (where are they investing the money)
i. (starting from Page 151 breaking it down from Assets = Liabilities + Equity) 

    # key word: “investment” -sentences involving investments and frequency 
    # range: Item 1. Business
    # year: 2023


In [13]:
# Extract specific Item range of interest
df_item = extract_item_specific("Item 1. Business", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [14]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "investment", num_display=30)

In [15]:
# Analyze sentiment of the context words
Q2_2023context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q2_2023context_words_with_sentiment

Weighted Sentiment Score (Including Neutral Words): 0.02577484662576687
Weighted Sentiment Score (Excluding Neutral Words): 0.18266521739130437


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,strategy,9,0.0000,0.000
1,opportunity,8,0.4215,3.372
2,portfolio,7,0.0000,0.000
3,adviser,6,0.0000,0.000
4,management,6,0.0000,0.000
...,...,...,...,...
87,return,1,0.0000,0.000
88,executing,1,0.0000,0.000
89,exit,1,0.0000,0.000
90,pursue,1,0.0000,0.000


# Q3. Value Generation( How it is making Money) 
i. Finding sentiment around the key words of ‘profit’

    # key word: "income" -sentences involving investments and frequency 
    # range: Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
    # year: 2023


In [16]:
# Extract specific Item range of interestItem 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
df_item = extract_item_specific("Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [17]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "income", num_display=30)

In [71]:
# Analyze sentiment of the context words
Q3_2023context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q3_2023context_words_with_sentiment.head()

Weighted Sentiment Score (Including Neutral Words): 0.019273684210526318
Weighted Sentiment Score (Excluding Neutral Words): 0.3662


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,feegenerating,15,0.0,0.0
1,total,11,0.0,0.0
2,performance,11,0.0,0.0
3,base,8,0.0,0.0
4,feeeligible,5,0.0,0.0


# Q4. Total AUM change from 2023-2024
i. Basic analysis of balance sheet between 2023 and 2024 and AUM delta

    # keyword: “AUM”
    # Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
    # year 2023

In [19]:
# Extract specific Item range of interestItem 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
df_item = extract_item_specific("Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [None]:
# Extract context words
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "AUM", num_display=30)

In [21]:
# Analyze sentiment of the context words
Q4_2023context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q4_2023context_words_with_sentiment

Weighted Sentiment Score (Including Neutral Words): 0.0053117647058823535
Weighted Sentiment Score (Excluding Neutral Words): 0.3612


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,feegenerating,14,0.0,0.0
1,performance,10,0.0,0.0
2,base,8,0.0,0.0
3,total,8,0.0,0.0
4,feeeligible,5,0.0,0.0
5,aum,3,0.0,0.0
6,nav,2,0.0,0.0
7,present,2,0.0,0.0
8,apollos,2,0.0,0.0
9,end,2,0.0,0.0


# Prepocessing for 2024 10-k Starting here
# 2024

In [22]:
# read the pdf
pdf_file = "Apollo-24-10-k.pdf"  # Replace with actual file path

# Items pages dataframe (input the page number mannuly)
pages_df = item_pages_df(pdf_path=pdf_file, page_num=2)[0]
pages_dict = item_pages_df(pdf_path=pdf_file, page_num=2)[1]

pages_df

Unnamed: 0,Title,Page Number
0,Item 1. Business,12
1,Item 1A. Risk Factors,31
2,Item 1B. Unresolved Staff Comments,59
3,Item 1C. Cybersecurity,59
4,Item 2. Properties,62
5,Item 3. Legal Proceedings,62
6,Item 4. Mine Safety Disclosures,62
7,"Item 5. Market for Registrant's Common Equity,...",63
8,Item 6. [Reserved],65
9,Item 7. Management's Discussion and Analysis o...,66


## Q1.How it has changed from 2023-2024 (Risk Factors more or less and to what degree) 
i. Risk Profiling by segmenting key words from 1A from both 2023 and 2024

    # key word: “risk”
    # range: Item 1A. Risk Factors
    # year: 2024


In [23]:
# Extract specific Item range of interest
df_item = extract_item_specific("Item 1A. Risk Factors", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [24]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "risk", num_display=30)

In [43]:
# Analyze sentiment of the context words
Q1_2024context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q1_2024context_words_with_sentiment.head()

Weighted Sentiment Score (Including Neutral Words): 0.019273684210526318
Weighted Sentiment Score (Excluding Neutral Words): 0.3662


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,feegenerating,15,0.0,0.0
1,total,11,0.0,0.0
2,performance,11,0.0,0.0
3,base,8,0.0,0.0
4,feeeligible,5,0.0,0.0


# Q2. Investments (where are they investing the money)
i. (starting from Page 151 breaking it down from Assets = Liabilities + Equity) 

    # key word: “investment” -sentences involving investments and frequency 
    # range: Item 1. Business
    # year: 2024


In [26]:
# Extract specific Item range of interest
df_item = extract_item_specific("Item 1. Business", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [50]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "investment", num_display=30)

In [52]:
# Analyze sentiment of the context words
Q2_2024context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q2_2024context_words_with_sentiment.head()

Weighted Sentiment Score (Including Neutral Words): 0.0072130081300813016
Weighted Sentiment Score (Excluding Neutral Words): 0.14786666666666667


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,income,37,0.0,0.0
1,fund,23,0.0,0.0
2,portfolio,21,0.0,0.0
3,net,17,0.0,0.0
4,total,17,0.0,0.0


# Q3. Value Generation( How it is making Money) 
i. Finding sentiment around the key words of ‘profit’

    # key word: "income" -sentences involving investments and frequency 
    # range: Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
    # year: 2024


In [29]:
# Extract specific Item range of interestItem 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
df_item = extract_item_specific("Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [30]:
# Extract context words
# Extract context words related to "risk"
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "income", num_display=30)

In [55]:
# Analyze sentiment of the context words
Q3_2024context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q3_2024context_words_with_sentiment.head()

Weighted Sentiment Score (Including Neutral Words): 0.0072130081300813016
Weighted Sentiment Score (Excluding Neutral Words): 0.14786666666666667


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,income,37,0.0,0.0
1,fund,23,0.0,0.0
2,portfolio,21,0.0,0.0
3,net,17,0.0,0.0
4,total,17,0.0,0.0


# Q4. Total AUM change from 2023 - 2024
i. Basic analysis of balance sheet between 2023 and 2024 and AUM delta

    # keyword: “AUM”
    # Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
    # year 2024



In [32]:
# Extract specific Item range of interestItem 7. Management's Discussion and Analysis of Financial Condition and Results of Operations
df_item = extract_item_specific("Item 7. Management's Discussion and Analysis of Financial Condition and Results of Operations", items_dict=pages_dict, pdf_file=pdf_file)
# Preprocess the text(tokenize, lemmatize)
df_item_processed = preprocess_text(df_item)

In [None]:
# Extract context words
# Count the frequency of each word and display the top 30 words
context_words = extract_context_words(df_item_processed, "AUM", num_display=30)

In [56]:
# Analyze sentiment of the context words
Q4_2024context_words_with_sentiment = analyze_word_sentiment(context_words)
# Display the context words with sentiment
Q4_2024context_words_with_sentiment.head()

Weighted Sentiment Score (Including Neutral Words): 0.0072130081300813016
Weighted Sentiment Score (Excluding Neutral Words): 0.14786666666666667


Unnamed: 0,Keyword,Frequency,Sentiment,Weighted Sentiment
0,income,37,0.0,0.0
1,fund,23,0.0,0.0
2,portfolio,21,0.0,0.0
3,net,17,0.0,0.0
4,total,17,0.0,0.0


In [73]:
def get_top_10_words(df_2023, df_2024):
    # Sort the DataFrames by 'Frequency' in descending order
    top_10_2023 = df_2023.sort_values(by='Frequency', ascending=False).head(10)
    top_10_2024 = df_2024.sort_values(by='Frequency', ascending=False).head(10)
    
    return top_10_2023, top_10_2024

# Get the top 10 words for Q1
top_10_Q1_2023, top_10_Q1_2024 = get_top_10_words(Q1_2023context_words_with_sentiment, Q1_2024context_words_with_sentiment)
print("Top 5 words for Q1 2023:")
print(top_10_Q1_2023[['Keyword', 'Frequency']].head())
print("\nTop 5 words for Q1 2024:")
print(top_10_Q1_2024[['Keyword', 'Frequency']].head())

# Get the top 10 words for Q2
top_10_Q2_2023, top_10_Q2_2024 = get_top_10_words(Q2_2023context_words_with_sentiment, Q2_2024context_words_with_sentiment)
print("\nTop 5 words for Q2 2023:")
print(top_10_Q2_2023[['Keyword', 'Frequency']].head())
print("\nTop 5 words for Q2 2024:")
print(top_10_Q2_2024[['Keyword', 'Frequency']].head())

# Get the top 10 words for Q3
top_10_Q3_2023, top_10_Q3_2024 = get_top_10_words(Q3_2023context_words_with_sentiment, Q3_2024context_words_with_sentiment)
print("\nTop 5 words for Q3 2023:")
print(top_10_Q3_2023[['Keyword', 'Frequency']].head())
print("\nTop 5 words for Q3 2024:")
print(top_10_Q3_2024[['Keyword', 'Frequency']].head())

# Get the top 10 words for Q4
top_10_Q4_2023, top_10_Q4_2024 = get_top_10_words(Q4_2023context_words_with_sentiment, Q4_2024context_words_with_sentiment)
print("\nTop 5 words for Q4 2023:")
print(top_10_Q4_2023[['Keyword', 'Frequency']].head())
print("\nTop 5 words for Q4 2024:")
print(top_10_Q4_2024[['Keyword', 'Frequency']].head())


Top 5 words for Q1 2023:
         Keyword  Frequency
0        subject         20
1         credit          7
2      associate          5
3  concentration          5
4     additional          4

Top 5 words for Q1 2024:
         Keyword  Frequency
0  feegenerating         15
2    performance         11
1          total         11
3           base          8
4    feeeligible          5

Top 5 words for Q2 2023:
       Keyword  Frequency
0     strategy          9
1  opportunity          8
2    portfolio          7
3      adviser          6
4   management          6

Top 5 words for Q2 2024:
     Keyword  Frequency
0     income         37
1       fund         23
2  portfolio         21
3        net         17
4      total         17

Top 5 words for Q3 2023:
         Keyword  Frequency
0  feegenerating         15
2    performance         11
1          total         11
3           base          8
4    feeeligible          5

Top 5 words for Q3 2024:
     Keyword  Frequency
0     income     

In [None]:
def compare_sentiment(df_2023, df_2024):
    # Merge the two dataframes on the 'Keyword' column
    comparison_df = pd.merge(df_2023[['Keyword', 'Weighted Sentiment']], 
                             df_2024[['Keyword', 'Weighted Sentiment']], 
                             on='Keyword', 
                             suffixes=('_2023', '_2024'))
    
    # Calculate the sentiment difference
    comparison_df['Sentiment_Difference'] = comparison_df['Weighted Sentiment_2024'] - comparison_df['Weighted Sentiment_2023']
    
    return comparison_df

# Compare sentiment for Q1
Q1_sentiment_comparison = compare_sentiment(Q1_2023context_words_with_sentiment, Q1_2024context_words_with_sentiment)
print("Q1 Sentiment Comparison:")
print(Q1_sentiment_comparison.head())

# Compare sentiment for Q2
Q2_sentiment_comparison = compare_sentiment(Q2_2023context_words_with_sentiment, Q2_2024context_words_with_sentiment)
print("\nQ2 Sentiment Comparison:")
print(Q2_sentiment_comparison.head())

# Compare sentiment for Q3
Q3_sentiment_comparison = compare_sentiment(Q3_2023context_words_with_sentiment, Q3_2024context_words_with_sentiment)
print("\nQ3 Sentiment Comparison:")
print(Q3_sentiment_comparison.head())

# Compare sentiment for Q4
Q4_sentiment_comparison = compare_sentiment(Q4_2023context_words_with_sentiment, Q4_2024context_words_with_sentiment)
print("\nQ4 Sentiment Comparison:")
print(Q4_sentiment_comparison.head())



Q1 Sentiment Comparison:
  Keyword  Weighted Sentiment_2023  Weighted Sentiment_2024  \
0  credit                   2.6726                   0.7636   

   Sentiment_Difference  
0                -1.909  

Q2 Sentiment Comparison:
       Keyword  Weighted Sentiment_2023  Weighted Sentiment_2024  \
0     strategy                    0.000                   0.0000   
1  opportunity                    3.372                   1.2645   
2    portfolio                    0.000                   0.0000   
3   management                    0.000                   0.0000   
4         fund                    0.000                   0.0000   

   Sentiment_Difference  
0                0.0000  
1               -2.1075  
2                0.0000  
3                0.0000  
4                0.0000  

Q3 Sentiment Comparison:
       Keyword  Weighted Sentiment_2023  Weighted Sentiment_2024  \
0        total                      0.0                      0.0   
1  performance                      0.0    

In [86]:
# Compute the sum of the weighted sentiment for 2023 and 2024
sum_weighted_sentiment_2023 = round(Q1_2023context_words_with_sentiment['Weighted Sentiment'].sum(), 2)
sum_weighted_sentiment_2024 = round(Q1_2024context_words_with_sentiment['Weighted Sentiment'].sum(), 2)

# Compare the two sums
sentiment_comparison = {
    "Sum Weighted Sentiment 2023": sum_weighted_sentiment_2023,
    "Sum Weighted Sentiment 2024": sum_weighted_sentiment_2024,
    "Difference": round(sum_weighted_sentiment_2024 - sum_weighted_sentiment_2023, 2)
}

print("Sentiment Comparison for Q1 for Risk Factors:")
print(sentiment_comparison)

# Compute the sum of the weighted sentiment for Q2 2023 and Q2 2024
sum_weighted_sentiment_Q2_2023 = round(Q2_2023context_words_with_sentiment['Weighted Sentiment'].sum(), 2)
sum_weighted_sentiment_Q2_2024 = round(Q2_2024context_words_with_sentiment['Weighted Sentiment'].sum(), 2)

# Compare the two sums for Q2
sentiment_comparison_Q2 = {
    "Sum Weighted Sentiment Q2 2023": sum_weighted_sentiment_Q2_2023,
    "Sum Weighted Sentiment Q2 2024": sum_weighted_sentiment_Q2_2024,
    "Difference": round(sum_weighted_sentiment_Q2_2024 - sum_weighted_sentiment_Q2_2023, 2)
}

print("Sentiment Comparison for Q2 For Investments:")
print(sentiment_comparison_Q2)

# Compute the sum of the weighted sentiment for Q3 2023 and Q3 2024
sum_weighted_sentiment_Q3_2023 = round(Q3_2023context_words_with_sentiment['Weighted Sentiment'].sum(), 2)
sum_weighted_sentiment_Q3_2024 = round(Q3_2024context_words_with_sentiment['Weighted Sentiment'].sum(), 2)

# Compare the two sums for Q3
sentiment_comparison_Q3 = {
    "Sum Weighted Sentiment Q3 2023": sum_weighted_sentiment_Q3_2023,
    "Sum Weighted Sentiment Q3 2024": sum_weighted_sentiment_Q3_2024,
    "Difference": round(sum_weighted_sentiment_Q3_2024 - sum_weighted_sentiment_Q3_2023, 2)
}

print("Sentiment Comparison for Q3 For Profit:")
print(sentiment_comparison_Q3)

# Compute the sum of the weighted sentiment for Q4 2023 and Q4 2024
sum_weighted_sentiment_Q4_2023 = round(Q4_2023context_words_with_sentiment['Weighted Sentiment'].sum(), 2)
sum_weighted_sentiment_Q4_2024 = round(Q4_2024context_words_with_sentiment['Weighted Sentiment'].sum(), 2)

# Compare the two sums for Q4
sentiment_comparison_Q4 = {
    "Sum Weighted Sentiment Q4 2023": sum_weighted_sentiment_Q4_2023,
    "Sum Weighted Sentiment Q4 2024": sum_weighted_sentiment_Q4_2024,
    "Difference": round(sum_weighted_sentiment_Q4_2024 - sum_weighted_sentiment_Q4_2023, 2)
}

print("Sentiment Comparison for Q4 For change in AUM:")
print(sentiment_comparison_Q4)

Sentiment Comparison for Q1 for Risk Factors:
{'Sum Weighted Sentiment 2023': 4.62, 'Sum Weighted Sentiment 2024': 1.46, 'Difference': -3.16}
Sentiment Comparison for Q2 For Investments:
{'Sum Weighted Sentiment Q2 2023': 4.2, 'Sum Weighted Sentiment Q2 2024': 2.66, 'Difference': -1.54}
Sentiment Comparison for Q3 For Profit:
{'Sum Weighted Sentiment Q3 2023': 1.46, 'Sum Weighted Sentiment Q3 2024': 2.66, 'Difference': 1.2}
Sentiment Comparison for Q4 For change in AUM:
{'Sum Weighted Sentiment Q4 2023': 0.36, 'Sum Weighted Sentiment Q4 2024': 1.46, 'Difference': 1.1}
