# 🌍 ESG & Financial Intelligence Platform
## AI-Powered Analysis of Corporate Reports (ESG And Annual)

**🎯 What this does:**
- Automatically processes PDF reports from Google Cloud Storage
- Uses Gemini 2.5 Pro to extract financial & ESG metrics
- Generates forecasts using Google's TimesFM 2.0 model
- Analyzes companies: Amgen, Novartis, Target

**📊 Features:**
- ✅ Automated PDF processing from Cloud Storage
- ✅ AI-powered data extraction (Financial + ESG metrics)
- ✅ Revenue forecasting with TimesFM 2.0
- ✅ Multi-company comparative analysis

---
**👆 Click "Runtime" → "Run all" to see the magic happen!**

In [None]:
# 🚀 Setup & Authentication
print("🌟 Welcome to the ESG & Financial Intelligence Platform!")
print("🔧 Setting up environment...")

# Authenticate with Google Cloud
from google.colab import auth
auth.authenticate_user()

# Install required packages
!pip install -q bigframes google-cloud-bigquery plotly seaborn

# Import libraries
import bigframes.pandas as bpd
from google.cloud import bigquery
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt

print("✅ Authentication complete!")
print("✅ Libraries installed!")

In [None]:
# 🏗️ Project Configuration
from google.colab import userdata
import getpass

# Get project configuration
try:
    PROJECT_ID = userdata.get('GOOGLE_CLOUD_PROJECT')
    print(f"✅ Using project from secrets: {PROJECT_ID}")
except:
    PROJECT_ID = getpass.getpass("🔑 Enter your Google Cloud Project ID: ")

# Configure BigFrames
bpd.options.bigquery.project = PROJECT_ID
bpd.options.bigquery.location = "US"

# Initialize BigQuery client
client = bigquery.Client(project=PROJECT_ID)

print(f"✅ Project configured: {PROJECT_ID}")
print("✅ BigFrames ready for action!")

In [17]:
PROJECT_ID = "intellitrend-project-dev"   # update if needed
PROJECT_LOCATION="US"
CLOUD_RES_CONN = "ghack_conn"
QUALIFIED_CLOUD_RES_CONN = f"{PROJECT_ID}.{PROJECT_LOCATION}.{CLOUD_RES_CONN}"

DATASET_ID = "db_reports_insights_annual_esg"
OBJ_TABLE_ID = "all_reports_obj_table_metadata"
QUALIFIED_OBJ_TABLE_ID= f"{PROJECT_ID}.{DATASET_ID}.{OBJ_TABLE_ID}"

In [15]:
!bq mk \
  --connection \
  --project_id={PROJECT_ID} \
  --connection_type=CLOUD_RESOURCE \
  --location={PROJECT_LOCATION} \
  {CLOUD_RES_CONN}

BigQuery error in mk operation: Already Exists: Connection
projects/573553606303/locations/us/connections/ghack_conn


In [16]:
SERVICE_ACCT = !bq show --format=prettyjson \
  --connection \
  --project_id={PROJECT_ID} \
  --location={PROJECT_LOCATION} \
  {CLOUD_RES_CONN} | grep "serviceAccountId" | cut -d '"' -f 4

SERVICE_ACCT_EMAIL = SERVICE_ACCT[0]  # first (and only) line
print(SERVICE_ACCT_EMAIL)

bqcx-573553606303-2s0n@gcp-sa-bigquery-condel.iam.gserviceaccount.com


In [13]:
import time

# Use $SERVICE_ACCT_EMAIL so the Python variable expands in the shell
!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:$SERVICE_ACCT_EMAIL" \
    --role="roles/storage.objectViewer" \
    --format=none

!gcloud projects add-iam-policy-binding {PROJECT_ID} \
    --member="serviceAccount:$SERVICE_ACCT_EMAIL" \
    --role="roles/aiplatform.user" \
    --format=none

# Wait ~60 seconds for IAM updates to propagate
time.sleep(60)

Updated IAM policy for project [intellitrend-project-dev].
Updated IAM policy for project [intellitrend-project-dev].


In [None]:

PROJECT_ID = "intellitrend-project-dev"   # update if needed
DATASET_ID = "db_reports_insights_annual_esg"
OBJ_TABLE_ID="all_reports_obj_table_metadata"
QUALIFIED_OBJ_TABLE_ID= f"{PROJECT_ID}.{DATASET_ID}.{OBJ_TABLE_ID}"

DATA_FILES_PATH="gs://report_insights"

In [None]:
# 📁 Step 0: Create External Table for PDF Reports

print(f"📁 Creating external table(Object Table) {OBJ_TABLE_ID} for PDF reports...")

create_external_table_sql = f"""
CREATE OR REPLACE EXTERNAL TABLE `{QUALIFIED_OBJ_TABLE_ID}`
WITH CONNECTION `{QUALIFIED_CLOUD_RES_CONN}`
OPTIONS (
  object_metadata = 'SIMPLE',
  uris = ['{DATA_FILES_PATH}/*']
);
"""

try:
    job = client.query(create_external_table_sql)
    job.result()
    print("✅ External table created successfully!")

    # Check what files we have
    check_files_sql = f"SELECT uri, size FROM `{QUALIFIED_OBJ_TABLE_ID}` LIMIT 10"
    files_df = bpd.read_gbq(check_files_sql)
    print(f"📄 Found {len(files_df)} files in storage")
    print("Sample files:")
    print(files_df.head())

except Exception as e:
    print(f"⚠️  Note: {e}")
    print("   Make sure your GCS bucket and connection are set up correctly")

In [None]:
# 🧠 Step 1: Extract Financial & ESG Data using AI
print("🧠 Processing PDFs with Gemini 2.5 Pro (direct call, no model creation)...")
print("   This analyzes Amgen, Target, and Novartis reports...")

generate_text_sql = f"""
CREATE OR REPLACE TABLE `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_results_raw_annual_esg` AS (
SELECT
  *
FROM
  AI.GENERATE(
    MODEL `gemini-2.5-pro`,
    TABLE `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_annual_esg`,
    STRUCT(
      '''
You are an expert ESG and Financial analyst. Use only the information provided in the document to answer.
Fetch Financial and Sustainability Details including tabular and image data for each PDF.
      ''' AS prompt,
      0 AS temperature,
      8092 AS max_output_tokens
    )
  )
WHERE uri LIKE '%amgen%' OR uri LIKE '%target%' OR uri LIKE '%novartis%'
);
"""

try:
    print("⏳ Processing... This may take 2-3 minutes")
    job = client.query(generate_text_sql)
    job.result()

    # Check results
    check_sql = f"""
        SELECT COUNT(*) AS processed_files
        FROM `{PROJECT_ID}.db_reports_insights_annual_esg.all_reports_results_raw_annual_esg`
    """
    result = client.query(check_sql).result()
    count = list(result)[0][0]
    print(f"✅ Successfully processed {count} files!")

except Exception as e:
    print(f"⚠️  Processing issue: {e}")


In [None]:
import requests
from google.cloud import bigquery

PROJECT_ID = "intellitrend-project-dev"   # update if needed
DATASET_ID = "db_reports_insights_annual_esg"
TABLE_ID = "all_reports_annual_esg"

# Step 0: Fetch file list from GitHub
url = "https://api.github.com/repos/intellitrend-global/google_hackathon_bq_ai/contents/annual_esg_reports"
response = requests.get(url)
files = response.json()

pdf_files = [(f["name"], f["download_url"]) for f in files if f["name"].endswith(".pdf")]

print("📄 Found PDF files on GitHub:")
for name, link in pdf_files:
    print(f"   {name} -> {link}")

# Step 1: Initialize BigQuery client
client = bigquery.Client(project=PROJECT_ID)

# Step 2: Ensure dataset exists (create if not)
dataset_ref = bigquery.DatasetReference(PROJECT_ID, DATASET_ID)
try:
    client.get_dataset(dataset_ref)  # Check if it exists
    print(f"✅ Dataset {DATASET_ID} already exists")
except Exception:
    dataset = bigquery.Dataset(dataset_ref)
    dataset.location = "US"
    client.create_dataset(dataset, timeout=30)
    print(f"📦 Created dataset {DATASET_ID}")

# Step 3: Define schema
schema = [
    bigquery.SchemaField("filename", "STRING"),
    bigquery.SchemaField("uri", "STRING"),
]

# Step 4: Prepare rows
rows = [{"filename": name, "uri": link} for name, link in pdf_files]

# Step 5: Load into BigQuery
job_config = bigquery.LoadJobConfig(schema=schema, write_disposition="WRITE_TRUNCATE")
table_ref = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

job = client.load_table_from_json(rows, table_ref, job_config=job_config)
job.result()

print(f"✅ Table {table_ref} created/updated with {len(pdf_files)} GitHub files")
