# 🌍 ESG & Financial Intelligence Platform
## AI-Powered Analysis of Corporate Reports

**🎯 What this does:**
- Automatically processes PDF reports from Google Cloud Storage
- Uses Gemini 2.5 Pro to extract financial & ESG metrics
- Generates forecasts using Google's TimesFM 2.0 model
- Analyzes companies: Amgen, Novartis, Target

**📊 Features:**
- ✅ Automated PDF processing from Cloud Storage
- ✅ AI-powered data extraction (Financial + ESG metrics)
- ✅ Revenue forecasting with TimesFM 2.0
- ✅ Multi-company comparative analysis

---
**👆 Click "Runtime" → "Run all" to see the magic happen!**

In [1]:
# 🚀 Setup & Authentication
print("🌟 Welcome to the ESG & Financial Intelligence Platform!")
print("🔧 Setting up environment...")

# Authenticate with Google Cloud
from google.colab import auth
auth.authenticate_user()

# Install required packages
!pip install -q bigframes google-cloud-bigquery plotly seaborn

# Import libraries
import bigframes.pandas as bpd
from google.cloud import bigquery
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt

print("✅ Authentication complete!")
print("✅ Libraries installed!")

🌟 Welcome to the ESG & Financial Intelligence Platform!
🔧 Setting up environment...
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m51.6 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Authentication complete!
✅ Libraries installed!


You have encountered a bug in the BigQuery CLI. Please file a bug report in our
public issue tracker:
https://issuetracker.google.com/issues/new?component=187149&template=0
Please include a brief description of the steps that led to this issue, as well
as any rows that can be made public from the following information:

== Platform ==
  CPython:3.12.11:Linux-6.1.123+-x86_64-with-glibc2.35
== bq version ==
  2.1.22
== Command line ==
  ['/tools/google-cloud-sdk/platform/bq/bq.py', '--use_gce_service_account', 'mk', '--connection', '--connection_type=CLOUD_RESOURCE', '--location=us', 'test_connection']
== UTC timestamp ==
  2025-09-09 10:29:02
== Error trace ==
Traceback (most recent call last):
  File "/tools/google-cloud-sdk/platform/bq/frontend/bigquery_command.py", line 280, in RunSafely
    return_value = self.RunWithArgs(*args, **kwds)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tools/google-cloud-sdk/platform/bq/frontend/command_make.py", line 1250, in RunWithArgs


In [7]:
# 🏗️ Project Configuration
from google.colab import userdata
import getpass

# Get project configuration
try:
    PROJECT_ID = userdata.get('GOOGLE_CLOUD_PROJECT')
    print(f"✅ Using project from secrets: {PROJECT_ID}")
except:
    PROJECT_ID = getpass.getpass("🔑 Enter your Google Cloud Project ID: ")

# Configure BigFrames
bpd.options.bigquery.project = PROJECT_ID
bpd.options.bigquery.location = "US"

# Initialize BigQuery client
client = bigquery.Client(project=PROJECT_ID)

print(f"✅ Project configured: {PROJECT_ID}")
print("✅ BigFrames ready for action!")

🔑 Enter your Google Cloud Project ID: ··········
✅ Project configured: intellitrend-project-dev
✅ BigFrames ready for action!


In [9]:
!bq mk \
  --connection \
  --project_id=intellitrend-project-dev \
  --connection_type=CLOUD_RESOURCE \
  --location=us \
  test_connection

Connection 573553606303.us.test_connection successfully created


In [14]:
SERVICE_ACCT = !bq show --format=prettyjson \
  --connection \
  --project_id=intellitrend-project-dev \
  --location=us \
  test_connection | grep "serviceAccountId" | cut -d '"' -f 4

SERVICE_ACCT_EMAIL = SERVICE_ACCT[0]  # first (and only) line
print(SERVICE_ACCT_EMAIL)

bqcx-573553606303-394u@gcp-sa-bigquery-condel.iam.gserviceaccount.com


In [16]:
import time

# Use $SERVICE_ACCT_EMAIL so the Python variable expands in the shell
!gcloud projects add-iam-policy-binding intellitrend-project-dev \
    --member="serviceAccount:$SERVICE_ACCT_EMAIL" \
    --role="roles/storage.objectViewer" \
    --format=none

!gcloud projects add-iam-policy-binding intellitrend-project-dev \
    --member="serviceAccount:$SERVICE_ACCT_EMAIL" \
    --role="roles/aiplatform.user" \
    --format=none

# Wait ~60 seconds for IAM updates to propagate
# time.sleep(60)

Updated IAM policy for project [intellitrend-project-dev].
Updated IAM policy for project [intellitrend-project-dev].


In [8]:
import requests
from google.cloud import bigquery

PROJECT_ID = "intellitrend-project-dev"   # update if needed
DATASET_ID = "db_reports_insights_annual_esg"
TABLE_ID = "all_reports_annual_esg"

# Step 0: Fetch file list from GitHub
url = "https://api.github.com/repos/intellitrend-global/google_hackathon_bq_ai/contents/annual_esg_reports"
response = requests.get(url)
files = response.json()

pdf_files = [(f["name"], f["download_url"]) for f in files if f["name"].endswith(".pdf")]

print("📄 Found PDF files on GitHub:")
for name, link in pdf_files:
    print(f"   {name} -> {link}")

# Step 1: Initialize BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Step 2: Ensure dataset exists (create if not)
dataset_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ID)
try:
    client.get_dataset(dataset_ref)  # Check if it exists
    print(f"✅ Dataset {DATASET_ID} already exists")
except Exception:
    dataset = bigquery.Dataset(dataset_ref)
    dataset.location = "US"
    client.create_dataset(dataset, timeout=30)
    print(f"📦 Created dataset {DATASET_ID}")

# Step 3: Define schema
schema = [
    bigquery.SchemaField("filename", "STRING"),
    bigquery.SchemaField("uri", "STRING"),
]

# Step 4: Prepare rows
rows = [{"filename": name, "uri": link} for name, link in pdf_files]

# Step 5: Load into BigQuery
job_config = bigquery.LoadJobConfig(schema=schema, write_disposition="WRITE_TRUNCATE")
table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

job = client.load_table_from_json(rows, table_ref, job_config=job_config)
job.result()

print(f"✅ Table {table_ref} created/updated with {len(pdf_files)} GitHub files")


📄 Found PDF files on GitHub:
   amgen_annualreport_2019.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_2019.pdf
   amgen_annualreport_2020.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_2020.pdf
   amgen_annualreport_2021.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_2021.pdf
   amgen_annualreport_2022.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_2022.pdf
   amgen_annualreport_2023.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_2023.pdf
   amgen_annualreport_2024.pdf -> https://raw.githubusercontent.com/intellitrend-global/google_hackathon_bq_ai/main/annual_esg_reports/amgen_annualreport_20

In [10]:
# 🧠 Step 1: Extract Financial & ESG Data using AI
print("🧠 Processing PDFs with Gemini 2.5 Pro (direct call, no model creation)...")
print("   This analyzes Amgen, Target, and Novartis reports...")

generate_text_sql = f"""
CREATE OR REPLACE TABLE `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_results_raw_annual_esg` AS (
SELECT
  *
FROM
  AI.GENERATE(
    MODEL `gemini-2.5-pro`,
    TABLE `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_annual_esg`,
    STRUCT(
      '''
You are an expert ESG and Financial analyst. Use only the information provided in the document to answer.
Fetch Financial and Sustainability Details including tabular and image data for each PDF.
      ''' AS prompt,
      0 AS temperature,
      8092 AS max_output_tokens
    )
  )
WHERE uri LIKE '%amgen%' OR uri LIKE '%target%' OR uri LIKE '%novartis%'
);
"""

try:
    print("⏳ Processing... This may take 2-3 minutes")
    job = client.query(generate_text_sql)
    job.result()

    # Check results
    check_sql = f"""
        SELECT COUNT(*) AS processed_files
        FROM `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_results_raw_annual_esg`
    """
    result = client.query(check_sql).result()
    count = list(result)[0][0]
    print(f"✅ Successfully processed {count} files!")

except Exception as e:
    print(f"⚠️  Processing issue: {e}")


🧠 Processing PDFs with Gemini 2.5 Pro (direct call, no model creation)...
   This analyzes Amgen, Target, and Novartis reports...
⏳ Processing... This may take 2-3 minutes
⚠️  Processing issue: 404 Not found: Dataset intellitrend-project-dev:gemini-2 was not found in location US; reason: notFound, message: Not found: Dataset intellitrend-project-dev:gemini-2 was not found in location US

Location: US
Job ID: 283fd044-921c-4ada-bbb4-736495b867f3

