In [3]:
import os
import pandas as pd
from google.cloud import bigquery
from google.cloud import language_v1
from google.cloud import aiplatform

PROJECT_ID = "tranquil-matter-470021-u2"  # @param {type:"string"}
REGION = "US"     

In [8]:
bigquery_client = bigquery.Client(project=PROJECT_ID, location=REGION)
try:
    query = """
        SELECT company_public_response,
        consumer_complaint_narrative,
        product,
        company_name,
        date_received,
        company_response_to_consumer
        FROM `bigquery-public-data.cfpb_complaints.complaint_database`
        WHERE consumer_complaint_narrative IS NOT NULL
        LIMIT 1000
    """
    
   
    df = bigquery_client.query(query).to_dataframe()
    print("Data loaded successfully from BigQuery.")
    
except Exception as e:
    print(f"Error loading data from BigQuery: {e}")


if 'df' in locals() and not df.empty:
    df['consumer_complaint_narrative'] = df['consumer_complaint_narrative'].str.strip()
    df['consumer_complaint_narrative'] = df['consumer_complaint_narrative'].str.lower()
    print("\nData preprocessing complete.")
    #print(df.head())
else:
    print("No data available for preprocessing.")

Data loaded successfully from BigQuery.

Data preprocessing complete.


In [14]:
from google.cloud import language_v1
language_client = language_v1.LanguageServiceClient()
def analyze_text(text):
    """Sends text to the Natural Language API for analysis."""
    document = language_v1.Document(
        content=text,
        type_=language_v1.Document.Type.PLAIN_TEXT
    )
    
    # Analyze sentiment
    sentiment_response = language_client.analyze_sentiment(
        request={'document': document}
    )
    sentiment_score = sentiment_response.document_sentiment.score
    
    # Analyze entities
    entity_response = language_client.analyze_entities(
        request={'document': document}
    )
    entities = [
        {'name': entity.name, 'type': entity.type_.name} 
        for entity in entity_response.entities
    ]
    
    return sentiment_score, entities
     
sample_df = df.sample(n=10, random_state=42).copy()
sample_df['sentiment_score'] = None
sample_df['entities'] = None

for index, row in sample_df.iterrows():
    try:
        score, entities = analyze_text(row['consumer_complaint_narrative'])
        sample_df.at[index, 'sentiment_score'] = score
        sample_df.at[index, 'entities'] = [ent['name'] for ent in entities]
        print(f"Analyzed row {index} successfully.")
    except Exception as e:
        print(f"Error analyzing row {index}: {repr(e)}")

print("\nSample analysis results:")
print(sample_df[['consumer_complaint_narrative', 'sentiment_score', 'entities']])

Analyzed row 521 successfully.
Analyzed row 737 successfully.
Analyzed row 740 successfully.
Analyzed row 660 successfully.
Analyzed row 411 successfully.
Analyzed row 678 successfully.
Analyzed row 626 successfully.
Analyzed row 513 successfully.
Analyzed row 859 successfully.
Analyzed row 136 successfully.

Sample analysis results:
                          consumer_complaint_narrative sentiment_score  \
521  on my bank account i have {$940.00} from a res...            -0.7   
737  i filed a identity theft claim with this compa...            -0.5   
740  i applied for 3 private loans while xxxx xxxx ...             0.1   
660  i have worked at a non-profit organization sin...            -0.3   
411  i am trying to refinance with another bank ......            -0.7   
678  do you not actually read the complaints agains...            -0.7   
626  someone used my info to enroll in school and i...            -0.2   
513  xxxx xxxx, a division of comerica bank, has al...            -0.5  

In [19]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part
vertexai.init(project=PROJECT_ID, location="us-central1")

In [23]:
model = GenerativeModel("gemini-2.5-pro")

num_complaints_to_summarize = 3
complaints_to_summarize = " ".join(sample_df['consumer_complaint_narrative'].iloc[:num_complaints_to_summarize])
print(complaints_to_summarize)
prompt = f"""
Summarize the following consumer complaints. Identify the core issues and common themes.

Complaints:
{complaints_to_summarize}

Summary:
"""

# Generate summary
try:
    response = model.generate_content(prompt)

    
    if hasattr(response, "text"):
        summary = response.text
    else:
        summary = response.candidates[0].content.parts[0].text

    print("\n--- Generated Summary ---")
    print(summary)

except Exception as e:
    print(f"Error generating summary with Gemini: {e}")

on my bank account i have {$940.00} from a resource that i absolutely know nothing about sir-mam ; why i filed a identity theft claim with this company american education services when i found out that someone had forged my signature on a student loan. they replied that the owner of the loan was xxxx xxxx xxxx and that they had denied my claim. when i sent all the necessary paperwork to xxxx xxxx xxxx xxxx they in turn replied that they do not make the decision on the identity theft claim. that the creditor xxxx xxxx xxxx are the ones who denied the claim. when i sent the information back to aes they in turn say its xxxx xxxx xxxx xxxx. attached are all documents sent to both parties and their replies indicating that obviously no one wants to take the blame. the identity thft account has been sent to numerous collection agencies ( xxxx ) and when i speak with the agencies they all say that the original creditor is xxxx xxxx xxxx xxxx no one is clearing my name from this account and i n