### Summary and Sentiment Scoring Per Report 

In [31]:
#!pip install llama_index_embeddings_huggingface llama_index_vector_stores_milvus
#!pip install backports.tarfile
#!pip install --quiet --upgrade llama-index llama-index-llms-azure-openai llama-index-embeddings-huggingface
#!pip install langchain_openai
import time
from langchain_openai import AzureChatOpenAI
import numpy as np
import pandas as pd
import re
from llama_index.core import Settings, VectorStoreIndex, StorageContext
from llama_index.core.vector_stores import MetadataFilters, MetadataFilter, FilterOperator
from llama_index.vector_stores.milvus import MilvusVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.llms.azure_openai import AzureOpenAI
import pandas as pd
import numpy as np

In [32]:
asset_class='Private Equity'
sector= 'Buyout'
quarter_end_date='2024-06-30'
collection_name = "quarterly_reports_1024_gk_2024_Q2_Buyout"

output_directory= '/home/jovyan/shared/projects/Sep_Iter/Sentiment/output_data/'
api_version='2023-05-15'
api_key='992ce21baef84c68a10972ffa78c4eeb'
azure_endpoint='https://usw1d-openai01.openai.azure.com'


This notebook is created based on the notes from Qi.

We are going to simplify down on the # of questions: these are the topics we want for market level: 

a)     Economic growth and outlook for 2024 and 2025

b)     Investment and portfolio performance

c)     Capital market liquidity and Deal transaction volume

d)     Expectation and Impact of interest rates on investments

e)     Expectation and Impact of inflation on investments    

In [33]:
# Specify the path to your CSV file
file_path = '/home/jovyan/shared/projects/Sep_Iter/Metadata/Data/Metadata_Final/metadata_final.csv'

# Read the CSV file into a DataFrame
metadata_df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame to confirm successful import
# Dropping the columns
metadata_df = metadata_df.drop(columns=['_merge', 'Date'])

# Changing "quarter_end_date" to pandas datetime format
metadata_df['quarter_end_date'] = pd.to_datetime(metadata_df['quarter_end_date'])

# Changing "Vintage" column to integer format
metadata_df['Vintage'] = metadata_df['Vintage'].astype(int)


In [34]:
# Function to filter the DataFrame
def filter_documents_ID(df, asset_class=None, sector=None, quarter_end_date=None):
    filtered_df = df  # Start with the full DataFrame
    
    if asset_class is not None:
        filtered_df = filtered_df[filtered_df['Asset_Class'] == asset_class]
    
    if sector is not None:
        filtered_df = filtered_df[filtered_df['Sector'] == sector]
    
    if quarter_end_date is not None:
        filtered_df = filtered_df[filtered_df['quarter_end_date'] == quarter_end_date]
    
    return filtered_df, filtered_df['OMNI Fund ID'].tolist()

# Function to filter the DataFrame
def filter_documents_pdf_name(df, asset_class=None, sector=None, quarter_end_date=None):
    filtered_df = df  # Start with the full DataFrame
    
    if asset_class is not None:
        filtered_df = filtered_df[filtered_df['Asset_Class'] == asset_class]
    
    if sector is not None:
        filtered_df = filtered_df[filtered_df['Sector'] == sector]
    
    if quarter_end_date is not None:
        filtered_df = filtered_df[filtered_df['quarter_end_date'] == quarter_end_date]
    
    return filtered_df, filtered_df['Document Name'].tolist()

def filter_documents_fund_name(df, asset_class=None, sector=None, quarter_end_date=None):
    filtered_df = df  # Start with the full DataFrame
    
    if asset_class is not None:
        filtered_df = filtered_df[filtered_df['Asset_Class'] == asset_class]
    
    if sector is not None:
        filtered_df = filtered_df[filtered_df['Sector'] == sector]
    
    if quarter_end_date is not None:
        filtered_df = filtered_df[filtered_df['quarter_end_date'] == quarter_end_date]
    
    return filtered_df,filtered_df['omni_fund_name'].tolist()


# Example usage
filtered_df, fund_names = filter_documents_pdf_name(metadata_df, asset_class=asset_class, sector=sector, quarter_end_date=quarter_end_date)
print(len(set(fund_names)))

filtered_df, fund_IDs = filter_documents_ID(metadata_df, asset_class=asset_class, sector=sector, quarter_end_date=quarter_end_date)
print(len(set(fund_IDs)))
fund_names = filtered_df["omni_fund_name"].tolist()
print(len(set(fund_names)))

433
421
421


#### Output Structure Setup

In [35]:
import pandas as pd

market_template = f"""
The input file {{fund_name}} is a quarterly financial reports from a private market investment manager. Please summarize it for each of the topics 1 through 5 below in the specified format. 
```
- Topic1: Economic growth and outlook for 2024 and 2025
- Topic2: Investment and portfolio performance
- Topic3: Capital market liquidity and Deal transaction volume
- Topic4: Expectation and Impact of interest rates on investments
- Topic5: Expectation and Impact of inflation on investments
```
**Response Format:**

1. Begin by stating the fund name: "The fund name is {{fund_name}}."
2. For each topic, use the following structure:
    - **Topic X: [Topic Name]**
    - If the report provides relevant information, summarize the content in at least 5 paragraphs. Include specific examples and data points from the report to support your summary and analysis.
    - If the report does not provide relevant information for a topic, clearly state: "The report does not provide specific details on [Topic Name]. Therefore, I cannot provide a summary for this topic based on the given information."

**Guidelines:**
Provide specific examples and data points from the report to support your summary and analysis. 
Begin your response by identifying the fund name.
In each bullet point, please include at least 5 paragraphs.
Please avoid footnotes and disclaimers while answering the question.
Please give the reasonsing of your response step by step. 
Please do not include false information.
If you do not know the answer, just say you don't know the answer.
Please cite specific references.
"""

In [36]:
column_summary_raw=['Market_Level_Summary']
column_sentiment_raw=['Sentiment_Score']


In [37]:
output_summary_raw = pd.DataFrame(index=fund_names, columns=column_summary_raw)
output_summary_raw

Unnamed: 0,Market_Level_Summary
"New State Capital Partners III, L.P.",
"Reverence Capital Partners Opportunities Fund II, L.P.",
"Halyard Capital Fund II, L.P.",
"Grain Communications Opportunity Fund, L.P.",
"Grain Communications Opportunity Fund II, L.P.",
...,...
TSG7 B L.P.,
Waud Capital Partners III,
"Waud Capital Partners IV, L.P.",
"WestView Capital Partners III, L.P.",


In [38]:
keywords = [
    "Economic growth and outlook for 2024 and 2025",
    "Investment and portfolio performance",
    "Capital market liquidity and Deal transaction volume",
    "Expectation and Impact of interest rates on investments",
    "Expectation and Impact of inflation on investments"
]

In [39]:
fund_list = ', '.join(fund_names[:-1]) + ', and ' + fund_names[-1]
fund_list

"New State Capital Partners III, L.P., Reverence Capital Partners Opportunities Fund II, L.P., Halyard Capital Fund II, L.P., Grain Communications Opportunity Fund, L.P., Grain Communications Opportunity Fund II, L.P., ICV Partners IV, L.P., ICV Partners III, L.P., Adelis Equity Partners III, SSID CV Fund AB, AE Industrial Partners Fund I, LP, AE Industrial Partners Fund II, LP, Kellstrom Co-Investment Partners, LP (Kellstrom), Moeller AE Co-Investment Partners, LP (Moeller), Align Capital Partners Fund I, LP, Align Capital Partners Fund II, LP, Align Capital Partners Fund III, Align ES EV Fund I, L.P., Allegro Fund II, LP, Allegro Fund III, LP, Allegro Fund IV, LP, Atlas Capital Resources Fund II, LP, August Equity Partners V, L.P., Avance Fund I, Avista Capital Partners V, Axcel VII, Breck Partners I, LP, Carlyle Europe Technology Partners V, Carlyle Sub-Saharan Africa Fund Limited, Center Rock Capital Partners I, Center Rock Capital Partners II, Consonance Private Equity II, L.P., E

#### LLM Setup

In [40]:
model="gpt-4o"
Settings.llm = AzureOpenAI(
                deployment_name=model,
                azure_deployment=model,
                use_azure_ad=False,
                api_version=api_version,
                api_key=api_key,
                azure_endpoint=azure_endpoint,
                temperature=0.0,
                timeout=60,
                max_retries=10,
            )

In [41]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

EMBED_MODEL = "Alibaba-NLP/gte-large-en-v1.5"
CHUNK_SIZE = 1024
embed_batch_size = 10

embedding = HuggingFaceEmbedding(
            model_name=EMBED_MODEL,
            max_length=CHUNK_SIZE,  # NOTE I think this needs to be the same as chunk_size to avoid truncation
            embed_batch_size=embed_batch_size,# NOTE!!! Carefully tune this based on the GPU vRAM!
            trust_remote_code=True
        )

In [42]:
vector_store = MilvusVectorStore(uri="http://stepstone-milvus.milvus.svc.cluster.local:19530", dim=1024, overwrite=False, collection_name=collection_name)
index = VectorStoreIndex.from_vector_store(vector_store, embed_model = embedding)

#### LLM Generate

In [43]:
output_summary_raw

Unnamed: 0,Market_Level_Summary
"New State Capital Partners III, L.P.",
"Reverence Capital Partners Opportunities Fund II, L.P.",
"Halyard Capital Fund II, L.P.",
"Grain Communications Opportunity Fund, L.P.",
"Grain Communications Opportunity Fund II, L.P.",
...,...
TSG7 B L.P.,
Waud Capital Partners III,
"Waud Capital Partners IV, L.P.",
"WestView Capital Partners III, L.P.",


In [44]:
output_summary_raw

Unnamed: 0,Market_Level_Summary
"New State Capital Partners III, L.P.",
"Reverence Capital Partners Opportunities Fund II, L.P.",
"Halyard Capital Fund II, L.P.",
"Grain Communications Opportunity Fund, L.P.",
"Grain Communications Opportunity Fund II, L.P.",
...,...
TSG7 B L.P.,
Waud Capital Partners III,
"Waud Capital Partners IV, L.P.",
"WestView Capital Partners III, L.P.",


In [45]:
start_time = time.time() / 60
for i, (fund_name, fund_ID) in enumerate(zip(fund_names,fund_IDs)):
    print(fund_name)
    filters = MetadataFilters(
        filters=[MetadataFilter(key="omni_fund_id", value=fund_ID, operator=FilterOperator.EQ)]
    )
    query_engine = index.as_query_engine(filters=filters, similarity_top_k=20, node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.6)])

    formatted_question = market_template.replace("{fund_name}", fund_name)
    #print(formatted_question)
    
    response = query_engine.query(formatted_question) 

    output_summary_raw.at[fund_name, "Market_Level_Summary"] = response.response
    print(f"{i}th iteration for fund {fund_name}\n")
    print(response.response)

end_time = time.time() / 60
print('Time taken (mins):', round(end_time - start_time, 2))

New State Capital Partners III, L.P.
0th iteration for fund New State Capital Partners III, L.P.

The fund name is New State Capital Partners III, L.P.

- **Topic 1: Economic growth and outlook for 2024 and 2025**
  The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

- **Topic 2: Investment and portfolio performance**
  The report provides detailed information on the investment and portfolio performance of New State Capital Partners III, L.P. As of June 30, 2024, the fund has made four investments with a total equity invested of $134.8 million. The gross realized value stands at $4.2 million, while the unrealized value is $252.1 million, leading to a gross total value of $256.3 million. The fund has shown a significant quarter-over-quarter change of 16.6%.

  The fund's Gross MOIC (Multiple on Invested Capital) is 1.9x, and the Net MOIC is 1.5x. The Gross IRR

#### Output Display

In [47]:
print(output_summary_raw.iloc[0,0])

The fund name is New State Capital Partners III, L.P.

- **Topic 1: Economic growth and outlook for 2024 and 2025**
  The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

- **Topic 2: Investment and portfolio performance**
  The report provides detailed information on the investment and portfolio performance of New State Capital Partners III, L.P. As of June 30, 2024, the fund has made four investments with a total equity invested of $134.8 million. The gross realized value stands at $4.2 million, while the unrealized value is $252.1 million, leading to a gross total value of $256.3 million. The fund has shown a significant quarter-over-quarter change of 16.6%.

  The fund's Gross MOIC (Multiple on Invested Capital) is 1.9x, and the Net MOIC is 1.5x. The Gross IRR (Internal Rate of Return) is 40.9%, and the Net IRR is 26.4%. These figures indicate a strong per

In [48]:
print(output_summary_raw.iloc[1,0])

The fund name is Reverence Capital Partners Opportunities Fund II, L.P.

- **Topic 1: Economic growth and outlook for 2024 and 2025**
  - The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

- **Topic 2: Investment and portfolio performance**
  - The fund, established in 2018, has a size of $1.2 billion and focuses on private equity investments, specifically in the small buyout sector within North America, primarily the United States.
  - As of June 30, 2024, the fund has invested a total of $975.5 million, representing 82% of available capital, across eight platform investments. The fund has also completed one add-on acquisition since December 31, 2023.
  - The overall portfolio value of Fund II increased by $174.0 million (0.13x MOIC) or 7.5% over year-end 2023, and the fund is marked at a 2.56x gross MOIC and a 30.8% gross IRR.
  - Specific investments such

In [49]:
print(output_summary_raw.iloc[5,0])

Empty Response


In [50]:
print(output_summary_raw.iloc[9,0])

The fund name is AE Industrial Partners Fund I, LP.

- **Topic 1: Economic growth and outlook for 2024 and 2025**
  - The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

- **Topic 2: Investment and portfolio performance**
  - The fund has made significant investments in various sectors, with a focus on buyouts in the small buyout sub-sector. The fund size is $680 million, and it primarily operates in North America, specifically the United States.
  - One of the notable investments is in CDI, which represents AEI's first take-private transaction. CDI has shown strong momentum with a robust backlog of approximately $160 million and has diversified into higher-margin renewable, low-carbon, and sustainability projects.
  - Kellstrom has also seen a notable recovery within its distribution, MRO, and aircraft asset management business. The company experienced stron

In [51]:
#output_market.to_csv("terrible.csv")

------------

#### Dataframe Organize

In [52]:
def extract_topic_contents(summary):
    # Check if the summary is a string; if not, return empty values for each topic
    if not isinstance(summary, str):
        return {keyword: '' for keyword in keywords}
    
    # Remove the initial "The fund name is..." or "**Fund Name:**" part
    cleaned_summary = re.sub(r'(\*\*Fund Name:.*?\*\*|\n?The fund name is.*?\n)', '', summary, flags=re.DOTALL)

    # Initialize a dictionary to hold the content for each topic
    topic_contents = {keyword: '' for keyword in keywords}
    
    # Define the pattern to match each topic's content, handling both formats
    pattern = re.compile(
        r'- \*\*Topic \d+: (.*?)\*\*\n(.*?)(?=\n- \*\*Topic \d+: |$)',
        re.DOTALL
    )
    
    # Find all matches for the pattern in the summary
    matches = pattern.findall(cleaned_summary)
    
    # Populate the dictionary with the corresponding content
    for topic, content in matches:
        # Match the topic to the correct keyword
        for keyword in keywords:
            if topic.strip() in keyword:
                topic_contents[keyword] = content.strip()
                break
    
    return topic_contents

In [53]:
output_summary_raw[keywords] = output_summary_raw['Market_Level_Summary'].apply(lambda summary: pd.Series(extract_topic_contents(summary)))
output_summary_raw

Unnamed: 0,Market_Level_Summary,Economic growth and outlook for 2024 and 2025,Investment and portfolio performance,Capital market liquidity and Deal transaction volume,Expectation and Impact of interest rates on investments,Expectation and Impact of inflation on investments
"New State Capital Partners III, L.P.",The fund name is New State Capital Partners II...,The report does not provide specific details o...,The report provides detailed information on th...,The report does not provide specific details o...,The report does not provide specific details o...,The report does not provide specific details o...
"Reverence Capital Partners Opportunities Fund II, L.P.",The fund name is Reverence Capital Partners Op...,- The report does not provide specific details...,"- The fund, established in 2018, has a size of...",- The report does not provide specific details...,- The report does not provide specific details...,- The report does not provide specific details...
"Halyard Capital Fund II, L.P.","The fund name is Halyard Capital Fund II, L.P....",The report does not provide specific details o...,The report provides detailed information on th...,The report does not provide specific details o...,The report does not provide specific details o...,The report does not provide specific details o...
"Grain Communications Opportunity Fund, L.P.",Empty Response,,,,,
"Grain Communications Opportunity Fund II, L.P.",The fund name is Grain Communications Opportun...,- The report does not provide specific details...,- The fund has a vintage year of 2018 and a fu...,- The report does not provide specific details...,- The report does not provide specific details...,- The report does not provide specific details...
...,...,...,...,...,...,...
TSG7 B L.P.,The fund name is TSG7 B L.P.\n\n- **Topic 1: E...,- The report does not provide specific details...,"- The fund, TSG7 B L.P., has a vintage year of...",- The report does not provide specific details...,- The report does not provide specific details...,- The report does not provide specific details...
Waud Capital Partners III,The fund name is Waud Capital Partners III.\n\...,The report does not provide specific details o...,The report highlights that the underlying port...,The report notes that the M&A markets remain c...,The report discusses the impact of elevated in...,The report does not provide specific details o...
"Waud Capital Partners IV, L.P.","The fund name is Waud Capital Partners IV, L.P...",The report does not provide specific details o...,The report highlights that the underlying port...,The report mentions that the M&A markets remai...,The report discusses the impact of elevated in...,The report does not provide specific details o...
"WestView Capital Partners III, L.P.",The fund name is WestView Capital Partners III...,The report does not provide specific details o...,The report provides detailed information on th...,The report does not provide specific details o...,The report does not provide specific details o...,The report does not provide specific details o...


In [54]:
output_summary_raw.iloc[9,0]

"The fund name is AE Industrial Partners Fund I, LP.\n\n- **Topic 1: Economic growth and outlook for 2024 and 2025**\n  - The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n- **Topic 2: Investment and portfolio performance**\n  - The fund has made significant investments in various sectors, with a focus on buyouts in the small buyout sub-sector. The fund size is $680 million, and it primarily operates in North America, specifically the United States.\n  - One of the notable investments is in CDI, which represents AEI's first take-private transaction. CDI has shown strong momentum with a robust backlog of approximately $160 million and has diversified into higher-margin renewable, low-carbon, and sustainability projects.\n  - Kellstrom has also seen a notable recovery within its distribution, MRO, and aircraft asset management business. The company experien

In [55]:
output_summary_raw.iloc[9,1]

'- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.'

In [56]:
output_summary_raw.iloc[9,2]

"- The fund has made significant investments in various sectors, with a focus on buyouts in the small buyout sub-sector. The fund size is $680 million, and it primarily operates in North America, specifically the United States.\n  - One of the notable investments is in CDI, which represents AEI's first take-private transaction. CDI has shown strong momentum with a robust backlog of approximately $160 million and has diversified into higher-margin renewable, low-carbon, and sustainability projects.\n  - Kellstrom has also seen a notable recovery within its distribution, MRO, and aircraft asset management business. The company experienced strong topline and margin performance in 2023, driven by outperformance in both distribution and surplus businesses.\n  - The report highlights the incremental investment in Belcan for its acquisitions of Allegiant International and Base2 Solutions. This indicates a strategic approach to expanding portfolio companies through acquisitions.\n  - The perfo

In [57]:
output_summary_raw.iloc[9,-1]

'- The report does not provide specific details on the expectation and impact of inflation on investments. Therefore, I cannot provide a summary for this topic based on the given information.'

In [58]:
#output_summary_raw.to_pickle('output_summary_raw.pkl')

-----------

#### Sentiment Scoring Generate

In [59]:
combined_summaries_dict = {}

for idx, keyword in enumerate(keywords, start=1):
    combined_string = f"### {keyword} ###\n"
    for fund_name, row in output_summary_raw.iterrows():
        summary = row[keyword]
        if summary:  
            combined_string += f"--{fund_name}\n{summary.strip()}\n\n"
    
    combined_summaries_dict[f'topic{idx}'] = combined_string

combined_summaries_dict

{'topic1': "### Economic growth and outlook for 2024 and 2025 ###\n--New State Capital Partners III, L.P.\nThe report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Reverence Capital Partners Opportunities Fund II, L.P.\n- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Halyard Capital Fund II, L.P.\nThe report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Grain Communications Opportunity Fund II, L.P.\n- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--ICV Partners I

In [60]:
print(combined_summaries_dict['topic1'])

### Economic growth and outlook for 2024 and 2025 ###
--New State Capital Partners III, L.P.
The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

--Reverence Capital Partners Opportunities Fund II, L.P.
- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

--Halyard Capital Fund II, L.P.
The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

--Grain Communications Opportunity Fund II, L.P.
- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.

--ICV Partners III, L.P.
The report does 

In [61]:
print(combined_summaries_dict['topic2'])

### Investment and portfolio performance ###
--New State Capital Partners III, L.P.
The report provides detailed information on the investment and portfolio performance of New State Capital Partners III, L.P. As of June 30, 2024, the fund has made four investments with a total equity invested of $134.8 million. The gross realized value stands at $4.2 million, while the unrealized value is $252.1 million, leading to a gross total value of $256.3 million. The fund has shown a significant quarter-over-quarter change of 16.6%.

  The fund's Gross MOIC (Multiple on Invested Capital) is 1.9x, and the Net MOIC is 1.5x. The Gross IRR (Internal Rate of Return) is 40.9%, and the Net IRR is 26.4%. These figures indicate a strong performance, especially considering the fund's relatively recent vintage year of 2021.

  Additionally, the report highlights the performance of previous funds managed by New State Capital Partners. Fund I, vintage 2015, has a Gross MOIC of 3.8x and a Net MOIC of 3.0x, wi

In [62]:
#pip install langchain_openai

In [63]:
llm = AzureChatOpenAI(
    openai_api_version=api_version,
    api_key='992ce21baef84c68a10972ffa78c4eeb',
    azure_endpoint='https://usw1d-openai01.openai.azure.com',
    azure_deployment="gpt-4o",
    temperature=0
)


In [64]:
from langchain_core.prompts.chat import ChatPromptTemplate
#langchain.debug = False

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant, skilled in financial statements. \
                    You analyze financial documents in pdf files, \
                    understand details, and come up with answers to query.\
                    You also share a brief text summary of reasoning"),
        ("human", "{user_input}"),
    ]
)

In [65]:
keyword1=keywords[0]
messages = chat_template.format_messages(
    user_input = f"""
    Please analyze and compare the sentiment of each fund in detail based on the following text, which includes summaries of the remarks and perspectives on {keyword1} from different funds, including {fund_list}:
    
    {combined_summaries_dict['topic1']}
    
    Please follow these guidelines for your response:
    
    1. **Sentiment Score**: Assign a sentiment score for each fund, reflecting the sentiment of their remarks. The sentiment scores should range from -1 to +1, where:
       - **-1** indicates very negative sentiment,
       - **0** indicates neutral sentiment,
       - **+1** indicates very positive sentiment.
       Provide scores with decimal precision (e.g., -0.8, +0.4, +0.7) to reflect varying degrees of sentiment.
    
    2. **Reasoning**: Provide a clear and concise reasoning for each sentiment score, explaining the factors that influenced the score. Include your step-by-step thought process.
    
    3. **Format**: Structure your response in markdown format as follows:
    
    ```markdown
    # Sentiment Analysis and Evaluation Scores for Each Fund
    
    ## Funds with Sufficient Information for Sentiment Analysis
    
    ### 1. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ### 2. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ... (continue for each fund with sufficient information)
    
    ## Funds with Insufficient Information for Sentiment Analysis
    - [Fund Name]
    - [Fund Name]
    - [Fund Name]
    
    ...(list any funds for which there was not enough information to provide a sentiment analysis)

    """
)
ai_message = llm.invoke(messages)

In [66]:
sentiment1=ai_message.content
print(sentiment1)

# Sentiment Analysis and Evaluation Scores for Each Fund

## Funds with Sufficient Information for Sentiment Analysis

### 1. Atlas Capital Resources Fund II, LP
**Sentiment Score: +0.6**
**Reasoning:** The report highlights positive economic growth in the U.S. and Eurozone for 2024, with the U.S. GDP growth surprising economists. However, the outlook for 2025 is not provided, which tempers the overall sentiment.

### 2. Axcel VII
**Sentiment Score: +0.5**
**Reasoning:** The report indicates a positive economic outlook for the Nordic region with projected GDP growth and moderate inflation. However, the sentiment is tempered by the cautious economic environment due to interest rate adjustments.

### 3. Gauge Capital IV LP
**Sentiment Score: +0.4**
**Reasoning:** The report presents a cautiously optimistic outlook with expectations of continued GDP growth and potential interest rate cuts. However, rising unemployment and the upcoming presidential election add uncertainty, moderating the 

In [68]:
combined_summaries_dict

{'topic1': "### Economic growth and outlook for 2024 and 2025 ###\n--New State Capital Partners III, L.P.\nThe report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Reverence Capital Partners Opportunities Fund II, L.P.\n- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Halyard Capital Fund II, L.P.\nThe report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--Grain Communications Opportunity Fund II, L.P.\n- The report does not provide specific details on economic growth and outlook for 2024 and 2025. Therefore, I cannot provide a summary for this topic based on the given information.\n\n--ICV Partners I

In [95]:
te=combined_summaries_dict['topic2']
half_length=len(combined_summaries_dict['topic2'])//2
second_half = te[half_length+952:]
second_half
#combined_summaries_dict['topic_add2']=second_half

'TA Atlantic and Pacific VII L.P.\n- The report highlights that the fund, TA Atlantic and Pacific VII L.P., is a private equity fund focused on middle buyout investments primarily in North America, specifically the United States. The fund has a size of $884.37 million and was established in 2014.\n  - The performance of the fund is measured using metrics such as Fund Net IRR and Multiple on Invested Capital (MOIC). Fund Net IRR reflects the internal rate of return to investors after deducting management fees, performance allocations, and other fund-level expenses. MOIC is computed by dividing distributions and the residual value of the LPs\' capital account by their paid-in capital.\n  - The report emphasizes that past performance is not indicative of future results and that investing in the fund involves significant risks, including the potential loss of the entire investment. Each investment is subject to its own unique risks, and the financial performance of investments can vary sig

In [96]:
keyword2=keywords[1]
messages = chat_template.format_messages(
    user_input = f"""
    Please analyze and compare the sentiment of each fund in detail based on the following text, which includes summaries of the remarks and perspectives on {keyword2} from different funds, including {fund_list}:
    
    {combined_summaries_dict['topic_add']}
    
    Please follow these guidelines for your response:
    
    1. **Sentiment Score**: Assign a sentiment score for each fund, reflecting the sentiment of their remarks. The sentiment scores should range from -1 to +1, where:
       - **-1** indicates very negative sentiment,
       - **0** indicates neutral sentiment,
       - **+1** indicates very positive sentiment.
       Provide scores with decimal precision (e.g., -0.8, +0.4, +0.7) to reflect varying degrees of sentiment.
    
    2. **Reasoning**: Provide a clear and concise reasoning for each sentiment score, explaining the factors that influenced the score. Include your step-by-step thought process.
    
    3. **Format**: Structure your response in markdown format as follows:
    
    ```markdown
    # Sentiment Analysis and Evaluation Scores for Each Fund
    
    ## Funds with Sufficient Information for Sentiment Analysis
    
    ### 1. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ### 2. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ... (continue for each fund with sufficient information)
    
    ## Funds with Insufficient Information for Sentiment Analysis
    - [Fund Name]
    - [Fund Name]
    - [Fund Name]
    
    ...(list any funds for which there was not enough information to provide a sentiment analysis)

    """
)
ai_message = llm.invoke(messages)

In [101]:
#sentiment2_1=ai_message.content
print(sentiment2_1)
sentiment2 = sentiment2 + sentiment2_1 
sentiment2[1]

# Sentiment Analysis and Evaluation Scores for Each Fund

## Funds with Sufficient Information for Sentiment Analysis

### 1. New State Capital Partners III, L.P.
**Sentiment Score: +0.8**
**Reasoning:** The fund has shown strong performance metrics with a Gross IRR of 40.9% and a Net IRR of 26.4%. The significant quarter-over-quarter change of 16.6% and the positive performance of previous funds managed by New State Capital Partners contribute to a very positive sentiment.

### 2. Reverence Capital Partners Opportunities Fund II, L.P.
**Sentiment Score: +0.7**
**Reasoning:** The fund has a gross MOIC of 2.56x and a gross IRR of 30.8%, indicating strong performance. Specific investments like Venerable Holdings and Transact have shown significant returns, contributing to a positive sentiment.

### 3. Halyard Capital Fund II, L.P.
**Sentiment Score: +0.6**
**Reasoning:** The fund has shown significant financial activities and strong growth in revenue and EBITDA. The successful sale trans

' '

In [102]:
keyword3=keywords[2]
messages = chat_template.format_messages(
    user_input = f"""
    Please analyze and compare the sentiment of each fund in detail based on the following text, which includes summaries of the remarks and perspectives on {keyword3} from different funds, including {fund_list}:
    
    {combined_summaries_dict['topic3']}
    
    Please follow these guidelines for your response:
    
    1. **Sentiment Score**: Assign a sentiment score for each fund, reflecting the sentiment of their remarks. The sentiment scores should range from -1 to +1, where:
       - **-1** indicates very negative sentiment,
       - **0** indicates neutral sentiment,
       - **+1** indicates very positive sentiment.
       Provide scores with decimal precision (e.g., -0.8, +0.4, +0.7) to reflect varying degrees of sentiment.
    
    2. **Reasoning**: Provide a clear and concise reasoning for each sentiment score, explaining the factors that influenced the score. Include your step-by-step thought process.
    
    3. **Format**: Structure your response in markdown format as follows:
    
    ```markdown
    # Sentiment Analysis and Evaluation Scores for Each Fund
    
    ## Funds with Sufficient Information for Sentiment Analysis
    
    ### 1. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ### 2. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ... (continue for each fund with sufficient information)
    
    ## Funds with Insufficient Information for Sentiment Analysis
    - [Fund Name]
    - [Fund Name]
    - [Fund Name]
    
    ...(list any funds for which there was not enough information to provide a sentiment analysis)

    """
)
ai_message = llm.invoke(messages)

In [103]:
sentiment3=ai_message.content
print(sentiment3)

```markdown
# Sentiment Analysis and Evaluation Scores for Each Fund

## Funds with Sufficient Information for Sentiment Analysis

### 1. Allegro Fund IV, LP
**Sentiment Score: +0.7**
**Reasoning:** The fund has a strong pipeline of prospects and has reviewed numerous deals, indicating active engagement in the market. The fund's performance metrics, such as a gross fund multiple of 1.11x and a gross IRR of 13.0%, reflect positive sentiment towards capital market liquidity and deal transaction volume.

### 2. Breck Partners I, LP
**Sentiment Score: +0.3**
**Reasoning:** The fund is actively engaging with intermediary contacts to identify investment opportunities, suggesting a focus on maintaining deal transaction volume. However, the cautious approach to capital calls indicates a more reserved sentiment influenced by current market liquidity conditions.

### 3. Consonance Private Equity II, L.P.
**Sentiment Score: +0.4**
**Reasoning:** The report indicates a modest recovery in transacti

In [104]:
keyword4=keywords[3]
messages = chat_template.format_messages(
    user_input = f"""
    Please analyze and compare the sentiment of each fund in detail based on the following text, which includes summaries of the remarks and perspectives on {keyword4} from different funds, including {fund_list}:
    
    {combined_summaries_dict['topic4']}
    
    Please follow these guidelines for your response:
    
    1. **Sentiment Score**: Assign a sentiment score for each fund, reflecting the sentiment of their remarks. The sentiment scores should range from -1 to +1, where:
       - **-1** indicates very negative sentiment,
       - **0** indicates neutral sentiment,
       - **+1** indicates very positive sentiment.
       Provide scores with decimal precision (e.g., -0.8, +0.4, +0.7) to reflect varying degrees of sentiment.
    
    2. **Reasoning**: Provide a clear and concise reasoning for each sentiment score, explaining the factors that influenced the score. Include your step-by-step thought process.
    
    3. **Format**: Structure your response in markdown format as follows:
    
    ```markdown
    # Sentiment Analysis and Evaluation Scores for Each Fund
    
    ## Funds with Sufficient Information for Sentiment Analysis
    
    ### 1. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ### 2. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ... (continue for each fund with sufficient information)
    
    ## Funds with Insufficient Information for Sentiment Analysis
    - [Fund Name]
    - [Fund Name]
    - [Fund Name]
    
    ...(list any funds for which there was not enough information to provide a sentiment analysis)

    """
)
ai_message = llm.invoke(messages)

In [105]:
sentiment4=ai_message.content
print(sentiment4)

# Sentiment Analysis and Evaluation Scores for Each Fund

## Funds with Sufficient Information for Sentiment Analysis

### 1. ICV Partners III, L.P.
**Sentiment Score: -0.2**
**Reasoning:** The report mentions a decrease in spreads in the leveraged loan market, which could impact the cost of borrowing and the overall investment environment. However, it does not provide a detailed analysis of the expectations and impact of interest rates on investments, leading to a slightly negative sentiment due to the uncertainty.

### 2. Atlas Capital Resources Fund II, LP
**Sentiment Score: +0.3**
**Reasoning:** The report indicates that the Fed has been holding interest rates steady, contributing to progress in slowing inflation. The probability of a rate cut is high, which could be positive for investments. However, the limited room to cut rates due to inflation risks tempers the sentiment.

### 3. Axcel VII
**Sentiment Score: -0.1**
**Reasoning:** The report indicates that interest rates in Euro

In [106]:
keyword5=keywords[4]
messages = chat_template.format_messages(
    user_input = f"""
    Please analyze and compare the sentiment of each fund in detail based on the following text, which includes summaries of the remarks and perspectives on {keyword5} from different funds, including {fund_list}:
    
    {combined_summaries_dict['topic5']}
    
    Please follow these guidelines for your response:
    
    1. **Sentiment Score**: Assign a sentiment score for each fund, reflecting the sentiment of their remarks. The sentiment scores should range from -1 to +1, where:
       - **-1** indicates very negative sentiment,
       - **0** indicates neutral sentiment,
       - **+1** indicates very positive sentiment.
       Provide scores with decimal precision (e.g., -0.8, +0.4, +0.7) to reflect varying degrees of sentiment.
    
    2. **Reasoning**: Provide a clear and concise reasoning for each sentiment score, explaining the factors that influenced the score. Include your step-by-step thought process.
    
    3. **Format**: Structure your response in markdown format as follows:
    
    ```markdown
    # Sentiment Analysis and Evaluation Scores for Each Fund
    
    ## Funds with Sufficient Information for Sentiment Analysis
    
    ### 1. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ### 2. [Fund Name]
    **Sentiment Score: [Sentiment Score]**
    **Reasoning:** [Detailed reasoning for the sentiment score]
    
    ... (continue for each fund with sufficient information)
    
    ## Funds with Insufficient Information for Sentiment Analysis
    - [Fund Name]
    - [Fund Name]
    - [Fund Name]
    
    ...(list any funds for which there was not enough information to provide a sentiment analysis)

    """
)
ai_message = llm.invoke(messages)

In [107]:
sentiment5=ai_message.content
print(sentiment5)

```markdown
# Sentiment Analysis and Evaluation Scores for Each Fund

## Funds with Sufficient Information for Sentiment Analysis

### 1. Atlas Capital Resources Fund II, LP
**Sentiment Score: +0.5**
**Reasoning:** The report indicates that inflation has been slowing, and the Fed's actions have helped manage inflation, which is seen as a positive development. However, there are still risks, and the sentiment is cautiously optimistic.

### 2. Axcel VII
**Sentiment Score: +0.6**
**Reasoning:** The report provides inflation projections and highlights that despite inflationary pressures, the fund's portfolio companies have managed to grow their sales and maintain healthy profit margins. This indicates resilience and a positive outlook.

### 3. Gauge Capital IV LP
**Sentiment Score: +0.7**
**Reasoning:** The report highlights a significant decrease in inflation, which is seen as a positive development. The management team remains focused on finding quality investments, and the expectation o

In [109]:
sentiment2=str(sentiment2)

#### Sentiment Scoring Organize

In [110]:
def clean_sentiment_text(text):
    text = re.sub(r"(##|###)\s+Funds with Insufficient Information.*", "", text, flags=re.DOTALL)
    
    return text

In [111]:
cleaned_sentiment1 = clean_sentiment_text(sentiment1)
cleaned_sentiment2 = clean_sentiment_text(sentiment2)
cleaned_sentiment3 = clean_sentiment_text(sentiment3)
cleaned_sentiment4 = clean_sentiment_text(sentiment4)
cleaned_sentiment5 = clean_sentiment_text(sentiment5)

In [None]:
print(cleaned_sentiment1)

In [None]:
print(cleaned_sentiment1)

In [126]:
column_sentiment = [f'{keyword}_Sentiment' for keyword in keywords]
test = pd.DataFrame([cleaned_sentiment1, cleaned_sentiment2, cleaned_sentiment3, cleaned_sentiment4, cleaned_sentiment5])
 
list_topic = []
list_bad = []
fund_bad = []
for j in range(0, 5):
    try:
        t2 = pd.DataFrame(test.iloc[j].item().split('###')).iloc[1:]
        list_score = []
        for i in range(0, len(t2)):
            try:
                t3 = pd.DataFrame(t2.iloc[i].item().split('\n')).replace(r'^\s*$', np.nan, regex=True).dropna()
                t3.columns = ['text']
                t4 = t3[t3['text'].str.contains('Sentiment Score')]
                score = t4['text'].str.extract(r'(\d+.\d+)').astype('float').dropna().values[0].item()
                res_df = pd.DataFrame(index = [t3.iloc[0].item().split('. ')[1]], columns = [column_sentiment[j]])
                res_df.iloc[0, 0] = score
                list_score.append(res_df)
            except IndexError:
                fund_bad.append(i)
        score_df = pd.concat(list_score)
        score_df = score_df[~score_df.index.duplicated(keep='first')]
        list_topic.append(score_df)
    except ValueError:
        list_bad.append(j)
 
res_df = pd.concat(list_topic, axis = 1)
res_df


Unnamed: 0,Economic growth and outlook for 2024 and 2025_Sentiment,Investment and portfolio performance_Sentiment,Capital market liquidity and Deal transaction volume_Sentiment,Expectation and Impact of interest rates on investments_Sentiment,Expectation and Impact of inflation on investments_Sentiment
"Atlas Capital Resources Fund II, LP",0.6,0.4,,0.3,0.5
Axcel VII,0.5,0.4,,0.1,0.6
Gauge Capital IV LP,0.4,0.3,0.7,,0.7
"Industrial Growth Partners V, L.P.",0.3,0.5,0.7,,0.5
Montefiore Investment IV,0.2,0.4,,,
...,...,...,...,...,...
"Genstar Capital Partners XI, L.P.",,,,0.2,
Genstar Capital Partners VIII,,,,0.2,
"Sterling Investment Partners III, L.P.",,,,0.3,
"Littlejohn Fund V, L.P.",,,,,0.3


In [123]:
# Assuming 'cleaned_sentiment1, cleaned_sentiment2, etc.' contain your sentiment data
test = pd.DataFrame([cleaned_sentiment1, cleaned_sentiment2, cleaned_sentiment3, cleaned_sentiment4, cleaned_sentiment5])

column_sentiment = [f'{keyword}_Sentiment' for keyword in keywords]
column_reasoning = [f'{keyword}_Reasoning' for keyword in keywords]

list_topic = []
fund_bad = []
for j in range(len(test)):
    try:
        t2 = pd.DataFrame(test.iloc[j].item().split('###')).iloc[1:]
        list_fund_info = []
        for i in range(len(t2)):
            try:
                t3 = pd.DataFrame(t2.iloc[i].item().split('\n')).replace(r'^\s*$', np.nan, regex=True).dropna()
                t3.columns = ['text']
                fund_name = t3.iloc[0].item().split('. ')[1].strip()
                sentiment_row = t3[t3['text'].str.contains('Sentiment Score')]
                reasoning_row = t3[t3['text'].str.contains('Reasoning:')]

                if not sentiment_row.empty and not reasoning_row.empty:
                    score = float(sentiment_row['text'].str.extract(r'(\+?-?\d+\.\d+)')[0].dropna().values[0])
                    reasoning = reasoning_row['text'].str.replace('**Reasoning:** ', '').item()

                    res_df = pd.DataFrame(index=[fund_name], columns=[column_sentiment[j], column_reasoning[j]])
                    res_df.at[fund_name, column_sentiment[j]] = score
                    res_df.at[fund_name, column_reasoning[j]] = reasoning
                    list_fund_info.append(res_df)
            except IndexError:
                fund_bad.append((j, i))  # Track which fund and section had issues

        if list_fund_info:
            score_df = pd.concat(list_fund_info)
            score_df = score_df[~score_df.index.duplicated(keep='first')]
            list_topic.append(score_df)
    except ValueError:
        fund_bad.append(j)  # Track which main entry had issues

if list_topic:
    sent_res_df = pd.concat(list_topic, axis=1)
else:
    print("No data processed successfully.")
sent_res_df

Unnamed: 0,Economic growth and outlook for 2024 and 2025_Sentiment,Economic growth and outlook for 2024 and 2025_Reasoning,Investment and portfolio performance_Sentiment,Investment and portfolio performance_Reasoning,Capital market liquidity and Deal transaction volume_Sentiment,Capital market liquidity and Deal transaction volume_Reasoning,Expectation and Impact of interest rates on investments_Sentiment,Expectation and Impact of interest rates on investments_Reasoning,Expectation and Impact of inflation on investments_Sentiment,Expectation and Impact of inflation on investments_Reasoning
"Atlas Capital Resources Fund II, LP",0.6,The report highlights positive economic growth...,0.4,The fund has shown positive performance metric...,,,0.3,The report indicates that the Fed has been hol...,0.5,The report indicates that inflation has been s...
Axcel VII,0.5,The report indicates a positive economic outlo...,0.4,The fund has shown significant growth in sales...,,,-0.1,The report indicates that interest rates in Eu...,0.6,The report provides inflation projections and ...
Gauge Capital IV LP,0.4,The report presents a cautiously optimistic ou...,0.3,The fund has faced challenges with performance...,0.7,The fund completed multiple acquisitions and a...,,,0.7,The report highlights a significant decrease i...
"Industrial Growth Partners V, L.P.",0.3,The report indicates broader U.S. economic gro...,0.5,The fund has shown strong performance metrics ...,0.7,Improved credit markets and a steady rise in d...,,,0.5,The report mentions dissipating inflation as a...
Montefiore Investment IV,0.2,The report highlights uncertainty due to polit...,0.4,The fund has shown positive performance metric...,,,,,,
...,...,...,...,...,...,...,...,...,...,...
"Genstar Capital Partners XI, L.P.",,,,,,,0.2,"Similar to Genstar Capital Partners X, L.P., t...",,
Genstar Capital Partners VIII,,,,,,,0.2,The report includes detailed information on th...,,
"Sterling Investment Partners III, L.P.",,,,,,,-0.3,The report discusses the impact of interest ra...,,
"Littlejohn Fund V, L.P.",,,,,,,,,-0.3,The report indicates that inflation has had a ...


In [127]:

# Assuming 'list_topic' is a list of DataFrames you want to concatenate
res_df = pd.concat(list_topic, axis=1)

# Create a list of the variables, filtering out None values
variables = [asset_class, sector, quarter_end_date]
output_name = '_'.join(filter(None, variables))  # This will join non-None and non-empty strings

# Specify the directory
directory = '/home/jovyan/shared/projects/Sep_Iter/Sentiment/output_data'

# Use os.path.join to construct the full file path
full_file_path = os.path.join(directory, output_name + 'Sentiment'+'.xlsx')

# Save to Excel using the dynamically created filename
res_df.to_excel(full_file_path, index=True, sheet_name='Sheet1')


# Create a list of the variables, filtering out None values
variables = [asset_class, sector, quarter_end_date]
output_name = '_'.join(filter(None, variables))  # This will join non-None and non-empty strings

# Specify the directory
directory = '/home/jovyan/shared/projects/Sep_Iter/Sentiment/output_data'

# Use os.path.join to construct the full file path
full_file_path = os.path.join(directory, output_name + 'Sent_Res'+'.xlsx')

# Save to Excel using the dynamically created filename
sent_res_df.to_excel(full_file_path, index=True, sheet_name='Sheet1')