In [None]:
import os
from typing import List
import google.generativeai as genai
from PyPDF2 import PdfReader
import csv
from io import StringIO
import re
import pandas as pd
# ────────────────────────────────────────────────────────────────────────────────
# CONFIGURATION
# ────────────────────────────────────────────────────────────────────────────────

API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_API_KEY")
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel(model_name="gemini-2.5-flash-preview-04-17")
CSV_output = "output.csv"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def read_pdf(path: str) -> str:
    """Extract all text from a PDF file."""
    reader = PdfReader(path)
    full_text = []
    for page in reader.pages:
        txt = page.extract_text()
        if txt:
            full_text.append(txt)
    return "\n".join(full_text)

In [3]:
def build_full_prompt(pdf_text: str, uid: str, strategy: str) -> str:
    instruction = """
        Categorize each investment strategy according to the six Sustainable Investment Approaches and the Motivations for ESG Investing as defined in the accompanying PDF document.

        Instructions:

        For each strategy provided, classify whether each of the following six Sustainable Investment Approaches is present or not:
        a. Apply Exclusions
        b. Limit ESG Risk
        c. Seek ESG Opportunities
        d. Practice Active Ownership
        e. Target Sustainability Themes
        f. Assess Impact
        Assign 1 if the approach is present, and 0 if not present.

        If ESG criteria are mentioned but no clear or explicit approach can be identified, assign 0 to all six categories.

        Also classify the motivation behind the ESG strategy using the Motivations for Sustainable Investing section in the PDF:
        0 = Financial motivation
        1 = Societal motivation
        2 = Both financial and societal motivations
        If societal impact is mentioned only as a way to achieve financial returns, classify it as financial (0).

        Use all available information in the strategy description and the accompanying ESG definitions to support reasoning and classification. Be sure to interpret each strategy carefully and identify complex relationships where present.

        Input Format:

        Each input row will follow this format:
        uid, strategy

        Output Format:

        Return a CSV-formatted table with the following columns:
        uid, Apply Exclusions, Limit ESG Risk, Seek ESG Opportunities, Practice Active Ownership, Target Sustainability Themes, Assess Impact, Motivation

        Ensure that the output is in valid CSV format so it can be parsed programmatically.
    """.strip()

    # assemble
    parts = [
        instruction,
        "\n\n=== ATTACHED TEXT (PDF CONTEXT) ===\n",
        pdf_text,
        "\n\n=== STRATEGY TO SCORE ===\n",
        # here we include both uid and strategy on the same line
        f"{uid}, \"{strategy.strip()}\"\n"
    ]
    return "".join(parts)

In [4]:
def call_gemini(prompt: str) -> str:
    response = model.generate_content(
        prompt, 
    )
    return response.text


In [None]:
# df = pd.read_excel("LC_examples coding.xlsx")
df = pd.read_csv("result_together.csv")
df.rename(columns={'investment_strategy': 'Investment Strategy'}, inplace=True)
base_df = df
df['uid'] = df.index

In [6]:
df = df[['uid','Investment Strategy']]
# df = df.iloc[500:800]
df = df[
    df["Investment Strategy"].notna()                    
    & df["Investment Strategy"].str.strip().ne("")        
]

df = df.rename(columns={"Investment Strategy": "strategy"})

df_list = df.to_dict(orient="records")


In [7]:
df_list

[{'uid': 0,
  'strategy': 'To pursue its goal, the Fund invests primarily in a diversified mix of debt securities. The Fund may invest in a broad array of securities, including: securities issued or guaranteed as to principal or interest by the U.S. government or any of its agencies or instrumentalities; inflation-linked debt securities; municipal securities; foreign securities, including emerging markets; corporate bonds; commercial paper; mortgage-backed and other asset-backed securities; and loans. Securities in which the Fund may invest may be issued by domestic and foreign governments, supranational entities (e.g. World Bank, IMF), corporate entities and trusts and may be structured as fixed rate debt; floating rate debt; and debt that may not pay interest from the time of issuance. The Fund may invest in debt securities across the credit spectrum, including investment grade securities, below investment grade securities and unrated securities, and may invest without limit in below

In [8]:
pdf_context = read_pdf("Morningstar vocabulary ESG.pdf")
raw_responses = []
for rec in df_list:
    uid = rec["uid"]
    strat = rec["strategy"]
    prompt = build_full_prompt(pdf_context, uid, strat)
    result = call_gemini(prompt)
    print(result)
    raw_responses.append(result)

```csv
uid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation
0,0,1,0,1,0,0,0
```
```csv
uid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation
1,1,1,1,0,0,0,2
```
```csv
uid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation
2,1,1,1,0,0,0,2
```
```csv
uid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation
3,1,1,0,1,1,0,2
```
```csv
uid, Apply Exclusions, Limit ESG Risk, Seek ESG Opportunities, Practice Active Ownership, Target Sustainability Themes, Assess Impact, Motivation
4, 1, 1, 1, 1, 0, 0, 2
```
```csv
uid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation
5,1,1,1,1,

In [9]:
raw_responses

['```csv\nuid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation\n0,0,1,0,1,0,0,0\n```',
 '```csv\nuid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation\n1,1,1,1,0,0,0,2\n```',
 '```csv\nuid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation\n2,1,1,1,0,0,0,2\n```',
 '```csv\nuid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Themes,Assess Impact,Motivation\n3,1,1,0,1,1,0,2\n```',
 '```csv\nuid, Apply Exclusions, Limit ESG Risk, Seek ESG Opportunities, Practice Active Ownership, Target Sustainability Themes, Assess Impact, Motivation\n4, 1, 1, 1, 1, 0, 0, 2\n```',
 '```csv\nuid,Apply Exclusions,Limit ESG Risk,Seek ESG Opportunities,Practice Active Ownership,Target Sustainability Them

In [13]:
parsed = []
for raw in raw_responses:
    # strip the ```csv fences
    content = raw.strip('`')
    # remove leading "csv\n" if present
    if content.lower().startswith('csv\n'):
        content = content[4:]
    # read into a one-row DataFrame
    df_part = pd.read_csv(StringIO(content))
    parsed.append(df_part)

approaches_df = pd.concat(parsed, ignore_index=True)
# approaches_df now has columns:
# ['uid',
#  'Apply Exclusions',
#  'Limit ESG Risk',
#  'Seek ESG Opportunities',
#  'Practice Active Ownership',
#  'Target Sustainability Themes',
#  'Assess Impact',
#  'motivation']

final_df = base_df.merge(
    approaches_df,
    on='uid',
    how='left'
)

final_df.to_csv(
    'combined_strategies.csv',
    index=False,
    quoting=csv.QUOTE_ALL
)

print("Saved combined_strategies.csv with columns:", final_df.columns.tolist())

Saved combined_strategies.csv with columns: ['Investment Strategy', 'Category', 'Unnamed: 2', 'Classes', 'Ambiguity in language related to ESG?', 'Motivations (Financial, Social)', 'uid', 'Apply Exclusions', 'Limit ESG Risk', 'Seek ESG Opportunities', 'Practice Active Ownership', 'Target Sustainability Themes', 'Assess Impact', 'Motivation', ' Apply Exclusions', ' Limit ESG Risk', ' Seek ESG Opportunities', ' Practice Active Ownership', ' Target Sustainability Themes', ' Assess Impact', ' Motivation']
