<a href="https://colab.research.google.com/github/cselester/Custovista/blob/main/Custovista.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install faker

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.1.0


In [2]:
# Create project folders (in Colab environment)
!mkdir -p customer360
%cd customer360


/content/customer360


In [3]:
%%writefile ingestion_agent.py
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta

class IngestionAgent:
    def __init__(self, num_customers=100, num_transactions=1000):
        self.fake = Faker()
        self.num_customers = num_customers
        self.num_transactions = num_transactions

    def generate_customers(self) -> pd.DataFrame:
        """Generate synthetic customer data"""
        customers = []
        for i in range(1, self.num_customers + 1):
            customers.append({
                "customer_id": i,
                "name": self.fake.name(),
                "email": self.fake.email(),
                "phone": self.fake.phone_number(),
                "address": self.fake.address().replace("\n", ", "),
                "account_created": self.fake.date_between(start_date='-5y', end_date='today'),
                "risk_score": round(random.uniform(0, 1), 2)
            })
        return pd.DataFrame(customers)

    def generate_transactions(self, customers_df: pd.DataFrame) -> pd.DataFrame:
        """Generate synthetic transaction data"""
        transactions = []
        for i in range(1, self.num_transactions + 1):
            customer = customers_df.sample(1).iloc[0]
            amount = round(random.uniform(10, 20000), 2)
            timestamp = datetime.now() - timedelta(days=random.randint(0, 365), hours=random.randint(0, 23), minutes=random.randint(0, 59))

            transactions.append({
                "transaction_id": i,
                "customer_id": customer['customer_id'],
                "amount": amount,
                "transaction_date": timestamp.strftime("%Y-%m-%d %H:%M:%S"),
                "location": self.fake.city(),
                "merchant": self.fake.company()
            })
        return pd.DataFrame(transactions)

    def ingest(self):
        print("Generating synthetic customer and transaction data using Faker...")
        customer_df = self.generate_customers()
        transaction_df = self.generate_transactions(customer_df)
        print(f"Generated {len(customer_df)} customers and {len(transaction_df)} transactions")
        return customer_df, transaction_df


Writing ingestion_agent.py


In [4]:
%%writefile segmentation_agent.py
# segmentation_agent.py
import pandas as pd
import numpy as np
from datetime import datetime

class SegmentationAgent:
    def __init__(self):
        pass

    def segment_customers(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame) -> pd.DataFrame:
        """Segment customers using RFM logic (Recency, Frequency, Monetary)"""
        print("Running customer segmentation...")

        # Make sure transaction_date is datetime
        transaction_df['transaction_date'] = pd.to_datetime(transaction_df['transaction_date'])

        # Reference date for recency calculation
        current_date = transaction_df['transaction_date'].max()

        # RFM calculations
        rfm = transaction_df.groupby('customer_id').agg({
            'transaction_date': lambda x: (current_date - x.max()).days,  # Recency
            'transaction_id': 'count',  # Frequency
            'amount': 'sum'  # Monetary
        }).reset_index()

        rfm.columns = ['customer_id', 'recency', 'frequency', 'monetary']

        # Scoring
        rfm['recency_score'] = pd.qcut(rfm['recency'], q=4, labels=[4, 3, 2, 1]).astype(int)
        rfm['frequency_score'] = pd.qcut(rfm['frequency'].rank(method='first'), q=4, labels=[1, 2, 3, 4]).astype(int)
        rfm['monetary_score'] = pd.qcut(rfm['monetary'], q=4, labels=[1, 2, 3, 4]).astype(int)

        # Combine into a single RFM score
        rfm['rfm_score'] = rfm['recency_score'] + rfm['frequency_score'] + rfm['monetary_score']

        # Assign segments based on RFM score
        def assign_segment(score):
            if score >= 10:
                return 'High Value'
            elif score >= 7:
                return 'Loyal'
            elif score >= 5:
                return 'Promising'
            else:
                return 'At Risk'

        rfm['segment'] = rfm['rfm_score'].apply(assign_segment)

        # Merge with customer data
        segmented_df = customer_df.merge(rfm, on='customer_id', how='left')

        # Fill missing segments (e.g., customers with no transactions)
        segmented_df['segment'] = segmented_df['segment'].fillna('New')

        # Print segment stats
        print("\nCustomer Segmentation Summary:")
        print(segmented_df['segment'].value_counts())

        return segmented_df


Writing segmentation_agent.py


In [5]:
%%writefile recommendation_agent.py
# recommendation_agent.py
import pandas as pd

class RecommendationAgent:
    def __init__(self):
        # Define basic product categories
        self.products = {
            "premium_card": "Platinum Credit Card",
            "basic_card": "Standard Debit Card",
            "loan_offer": "Personal Loan Offer",
            "investment_plan": "Wealth Investment Plan",
            "retention_offer": "Loyalty Bonus / Cashback",
            "starter_bundle": "Welcome Package for New Customers"
        }

    def generate_recommendations(self, customer_df: pd.DataFrame) -> pd.DataFrame:
        print("Running personalized product recommendation engine...")

        df = customer_df.copy()

        # Initialize recommendation column
        df["recommended_product"] = "None"

        # Assign recommendations based on segment and risk
        for i, row in df.iterrows():
            segment = row.get("segment", "Unknown")
            high_risk = row.get("is_high_risk", False)

            if high_risk:
                df.at[i, "recommended_product"] = self.products["retention_offer"]
            elif segment == "High Value":
                df.at[i, "recommended_product"] = self.products["premium_card"]
            elif segment == "Loyal":
                df.at[i, "recommended_product"] = self.products["investment_plan"]
            elif segment == "Promising":
                df.at[i, "recommended_product"] = self.products["loan_offer"]
            elif segment == "At Risk":
                df.at[i, "recommended_product"] = self.products["retention_offer"]
            elif segment == "New":
                df.at[i, "recommended_product"] = self.products["starter_bundle"]
            else:
                df.at[i, "recommended_product"] = self.products["basic_card"]

        print("\nProduct Recommendation Summary:")
        print(df["recommended_product"].value_counts())

        return df


Writing recommendation_agent.py


In [6]:
%%writefile fraud_detection_agent.py
import pandas as pd
import numpy as np
import os
from datetime import datetime
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from config import FRAUD_DETECTED_DATA_PATH, DATA_DIR

class FraudDetectionAgent:
    def __init__(self):
        self.data_dir = DATA_DIR
        self.customer_fraud_path = FRAUD_DETECTED_DATA_PATH
        self.transaction_fraud_path = os.path.join(self.data_dir, "fraud_detected_transactions.csv")

    def detect_transaction_fraud(self, transaction_df: pd.DataFrame) -> pd.DataFrame:
        print("Running enhanced transaction fraud detection...")
        df = transaction_df.copy()
        df['transaction_date'] = pd.to_datetime(df['transaction_date'])
        df['hour'] = df['transaction_date'].dt.hour
        df['day_of_week'] = df['transaction_date'].dt.dayofweek

        # Daypart assignment
        def assign_daypart(hour):
            if 5 <= hour < 12:
                return 'morning'
            elif 12 <= hour < 17:
                return 'afternoon'
            elif 17 <= hour < 21:
                return 'evening'
            else:
                return 'night'

        df['daypart'] = df['hour'].apply(assign_daypart)

        # Customer stats
        customer_stats = df.groupby('customer_id').agg({
            'amount': ['mean', 'std', 'count', 'max']
        }).reset_index()
        customer_stats.columns = ['customer_id', 'avg_amount', 'std_amount', 'transaction_count', 'max_amount']
        df = df.merge(customer_stats, on='customer_id', how='left')

        # Z-score
        df['amount_zscore'] = (df['amount'] - df['avg_amount']) / df['std_amount'].replace(0, 1)

        # Velocity features
        df = df.sort_values(['customer_id', 'transaction_date'])
        df['prev_transaction_time'] = df.groupby('customer_id')['transaction_date'].shift(1)
        df['time_since_last_txn'] = (df['transaction_date'] - df['prev_transaction_time']).dt.total_seconds().fillna(0)

        # One-hot encode daypart
        df = pd.get_dummies(df, columns=['daypart'], drop_first=True)

        # Features for detection
        features = ['amount', 'amount_zscore', 'hour', 'day_of_week', 'time_since_last_txn']
        features += [col for col in df.columns if col.startswith('daypart_')]
        X = df[features].fillna(0)

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        iso_forest = IsolationForest(contamination=0.03, random_state=42)
        df['anomaly_score'] = iso_forest.fit_predict(X_scaled)
        df['is_fraudulent'] = df['anomaly_score'] == -1

        # Rule-based enhancements
        df.loc[(df['amount'] > 5 * df['avg_amount']) & (df['amount'] > 3 * df['max_amount']), 'is_fraudulent'] = True

        # Unusual hours
        customer_hour_counts = df.groupby(['customer_id', 'hour']).size().reset_index(name='count')
        unusual_customers = customer_hour_counts[
            (customer_hour_counts['hour'].between(2, 4)) &
            (customer_hour_counts['count'] < 3)
        ]['customer_id'].unique()
        df.loc[(df['customer_id'].isin(unusual_customers)) &
               (df['hour'].between(2, 4)), 'is_fraudulent'] = True

        # Dynamic z-score threshold
        high_z = df['amount_zscore'] > df['amount_zscore'].mean() + 3 * df['amount_zscore'].std()
        df.loc[high_z, 'is_fraudulent'] = True

        print(f"Total transactions: {len(df)}")
        print(f"Transactions flagged as fraudulent: {df['is_fraudulent'].sum()}")
        print(f"Fraud rate: {df['is_fraudulent'].sum() / len(df):.2%}")

        return df

    def calculate_customer_fraud_risk(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame) -> pd.DataFrame:
        print("Calculating enhanced customer fraud risk...")
        df = customer_df.copy()
        transaction_df['transaction_date'] = pd.to_datetime(transaction_df['transaction_date'])

        fraud_metrics = transaction_df.groupby('customer_id').agg({
            'is_fraudulent': ['sum', 'mean'],
            'amount': ['mean', 'max'],
            'transaction_id': 'count'
        }).reset_index()
        fraud_metrics.columns = [
            'customer_id', 'fraudulent_count', 'fraud_rate',
            'avg_amount', 'max_amount', 'transaction_count'
        ]
        df = df.merge(fraud_metrics, on='customer_id', how='left').fillna(0)

        # Days since last fraud
        fraud_txns = transaction_df[transaction_df['is_fraudulent']]
        last_fraud_time = fraud_txns.groupby('customer_id')['transaction_date'].max().reset_index()
        last_fraud_time.columns = ['customer_id', 'last_fraud_time']
        now = datetime.now()
        last_fraud_time['days_since_last_fraud'] = (now - last_fraud_time['last_fraud_time']).dt.days
        df = df.merge(last_fraud_time[['customer_id', 'days_since_last_fraud']], on='customer_id', how='left')
        df['days_since_last_fraud'] = df['days_since_last_fraud'].fillna(999)

        # Raw score
        df['fraud_risk_score'] = (
            0.5 * df['fraud_rate'] +
            0.3 * (df['max_amount'] / 10000).clip(0, 1) +
            0.2 * (df['transaction_count'] / 100).clip(0, 1)
        )
        # Recent fraud adjustment
        df['fraud_risk_score'] += 0.1 * (1 - (df['days_since_last_fraud'] / 365).clip(0, 1))

        # Normalize
        min_score = df['fraud_risk_score'].min()
        max_score = df['fraud_risk_score'].max()
        df['fraud_risk_score'] = (df['fraud_risk_score'] - min_score) / (max_score - min_score + 1e-5)

        df['is_high_risk'] = (
            (df['fraud_risk_score'] > 0.4) |
            (df['fraudulent_count'] >= 3) |
            (df['fraud_rate'] > 0.3)
        )

        print(f"Total customers: {len(df)}")
        print(f"High risk customers: {df['is_high_risk'].sum()}")

        return df

    def save_results(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame):
        customer_df.to_csv(self.customer_fraud_path, index=False)
        transaction_df.to_csv(self.transaction_fraud_path, index=False)
        return {
            "customer_fraud_path": self.customer_fraud_path,
            "transaction_fraud_path": self.transaction_fraud_path
        }

    def run(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame):
        if customer_df.empty or transaction_df.empty:
            print("No data provided.")
            return {"status": "error", "message": "Empty data."}

        print(f"Starting fraud detection process...")
        print(f"Processing {len(customer_df)} customers and {len(transaction_df)} transactions")

        fraud_transaction_df = self.detect_transaction_fraud(transaction_df)
        fraudulent_count = fraud_transaction_df['is_fraudulent'].sum()

        fraud_customer_df = self.calculate_customer_fraud_risk(customer_df, fraud_transaction_df)
        high_risk_count = fraud_customer_df['is_high_risk'].sum()

        output_paths = self.save_results(fraud_customer_df, fraud_transaction_df)

        fraud_stats = {
            "total_transactions": len(transaction_df),
            "fraudulent_transactions": fraudulent_count,
            "fraud_rate": fraudulent_count / len(transaction_df),
            "high_risk_customers": high_risk_count,
            "high_risk_rate": high_risk_count / len(customer_df)
        }

        print("\nFraud Statistics:")
        for key, value in fraud_stats.items():
            print(f"{key}: {value}")

        return {
            "status": "success",
            "output_paths": output_paths,
            "fraud_statistics": fraud_stats
        }

if __name__ == "__main__":
    sample_customers = pd.DataFrame({
        "customer_id": [1, 2, 3],
        "name": ["John", "Jane", "Bob"],
        "risk_score": [0.2, 0.8, 0.4]
    })

    sample_transactions = pd.DataFrame({
        "transaction_id": [101, 102, 103, 104],
        "customer_id": [1, 2, 1, 3],
        "amount": [5000, 15000, 200, 4500],
        "transaction_date": ["2024-01-01 10:00:00", "2024-01-01 03:00:00", "2024-01-02 14:00:00", "2024-01-02 02:00:00"]
    })

    agent = FraudDetectionAgent()
    result = agent.run(sample_customers, sample_transactions)
    print(result)


Writing fraud_detection_agent.py


In [7]:
%%writefile oversight_panel_agent.py
# oversight_panel_agent.py
import pandas as pd

class OversightPanelAgent:
    def __init__(self, escalation_threshold=0.6):
        self.escalation_threshold = escalation_threshold

    def review_high_risk_cases(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame) -> pd.DataFrame:
        print("Running oversight panel agent for case review and escalation...")

        # Filter high-risk customers
        high_risk_customers = customer_df[customer_df['is_high_risk'] == True].copy()

        # Merge to bring transaction context
        merged_df = transaction_df.merge(
            high_risk_customers[['customer_id', 'fraud_risk_score']],
            on='customer_id', how='inner'
        )

        # Flag transactions with high fraud score or high amount
        merged_df['escalate_case'] = (
            (merged_df['fraud_risk_score'] > self.escalation_threshold) |
            (merged_df['amount'] > 10000) |
            (merged_df['is_fraudulent'] == True)
        )

        # Extract escalated cases
        escalated_cases = merged_df[merged_df['escalate_case'] == True]

        print(f"Total high-risk customers: {len(high_risk_customers)}")
        print(f"Cases escalated for human review: {len(escalated_cases)}")

        return escalated_cases[['customer_id', 'transaction_id', 'amount', 'transaction_date', 'fraud_risk_score', 'is_fraudulent']]

if __name__ == "__main__":
    # Test with dummy data if needed
    print("Oversight Panel Agent loaded successfully.")


Writing oversight_panel_agent.py


In [8]:
%%writefile config.py
import os

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(BASE_DIR, 'data')

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)

CUSTOMER_DATA_PATH = os.path.join(DATA_DIR, 'customers.csv')
TRANSACTION_DATA_PATH = os.path.join(DATA_DIR, 'transactions.csv')
SEGMENTED_DATA_PATH = os.path.join(DATA_DIR, 'segmented_customers.csv')
FRAUD_DETECTED_DATA_PATH = os.path.join(DATA_DIR, 'fraudulent_transactions.csv')
RECOMMENDATION_DATA_PATH = os.path.join(DATA_DIR, 'recommendations.csv')
ESCALATED_CASES_PATH = os.path.join(DATA_DIR, 'escalated_cases.csv')

Writing config.py


In [9]:
from ingestion_agent import IngestionAgent
from segmentation_agent import SegmentationAgent
from fraud_detection_agent import FraudDetectionAgent
from recommendation_agent import RecommendationAgent
from oversight_panel_agent import OversightPanelAgent

In [10]:
# Initialize agents
ingestion = IngestionAgent()
segmentation = SegmentationAgent()
fraud_detection = FraudDetectionAgent()
recommendation = RecommendationAgent()
oversight = OversightPanelAgent()


In [11]:
# Re-import the class from the file you wrote
from ingestion_agent import IngestionAgent

# Now this will work
ingestion = IngestionAgent(num_customers=200, num_transactions=1000)
customers_df, transactions_df = ingestion.ingest()

customers_df.head(), transactions_df.head()

Generating synthetic customer and transaction data using Faker...
Generated 200 customers and 1000 transactions


(   customer_id             name                       email  \
 0            1   Samantha Myers  jimenezjessica@example.net   
 1            2   Anthony Miller      ryancarter@example.net   
 2            3   Steven Vazquez    theresajones@example.org   
 3            4  James Gutierrez       amymelton@example.org   
 4            5     Cheryl Green         pporter@example.com   
 
                     phone                                            address  \
 0  001-889-888-4581x82317            6380 Butler Brooks, New Karen, DC 40805   
 1        001-873-254-8349  6760 Vasquez Views Apt. 505, New Thomas, MP 96788   
 2              7055167400                   PSC 5686, Box 0191, APO AA 54092   
 3       (984)691-1421x564      255 Vickie Pines, North Stephenport, MH 14434   
 4         +1-645-345-5158      46419 Loretta Stravenue, Port Mindy, NE 60451   
 
   account_created  risk_score  
 0      2021-02-25        0.73  
 1      2024-09-03        0.88  
 2      2021-12-26        0

In [12]:
# Segment customers using KMeans
segmented_customers = segmentation.segment_customers(customers_df, transactions_df)


# Show cluster counts
segmented_customers['segment'].value_counts()


Running customer segmentation...

Customer Segmentation Summary:
segment
Loyal         75
High Value    52
Promising     38
At Risk       34
New            1
Name: count, dtype: int64


Unnamed: 0_level_0,count
segment,Unnamed: 1_level_1
Loyal,75
High Value,52
Promising,38
At Risk,34
New,1


In [13]:
import pandas as pd
# Correct usage:
result = fraud_detection.run(segmented_customers, transactions_df)

# If you want the processed customer and transaction DataFrames:
fraud_checked_customers = pd.read_csv(result["output_paths"]["customer_fraud_path"])
fraud_checked_transactions = pd.read_csv(result["output_paths"]["transaction_fraud_path"])


Starting fraud detection process...
Processing 200 customers and 1000 transactions
Running enhanced transaction fraud detection...
Total transactions: 1000
Transactions flagged as fraudulent: 154
Fraud rate: 15.40%
Calculating enhanced customer fraud risk...
Total customers: 200
High risk customers: 110

Fraud Statistics:
total_transactions: 1000
fraudulent_transactions: 154
fraud_rate: 0.154
high_risk_customers: 110
high_risk_rate: 0.55


In [14]:
# Generate product recommendations
recommended_customers = recommendation.generate_recommendations(fraud_checked_customers)


# Show sample recommendations
recommended_customers[['customer_id', 'segment', 'recommended_product']].head()



Running personalized product recommendation engine...

Product Recommendation Summary:
recommended_product
Loyalty Bonus / Cashback             136
Wealth Investment Plan                34
Personal Loan Offer                   15
Platinum Credit Card                  14
Welcome Package for New Customers      1
Name: count, dtype: int64


Unnamed: 0,customer_id,segment,recommended_product
0,1,Promising,Loyalty Bonus / Cashback
1,2,Promising,Loyalty Bonus / Cashback
2,3,High Value,Loyalty Bonus / Cashback
3,4,At Risk,Loyalty Bonus / Cashback
4,5,At Risk,Loyalty Bonus / Cashback


In [15]:
pd.set_option('display.max_rows', None)
recommended_customers[['customer_id', 'segment', 'recommended_product']]


Unnamed: 0,customer_id,segment,recommended_product
0,1,Promising,Loyalty Bonus / Cashback
1,2,Promising,Loyalty Bonus / Cashback
2,3,High Value,Loyalty Bonus / Cashback
3,4,At Risk,Loyalty Bonus / Cashback
4,5,At Risk,Loyalty Bonus / Cashback
5,6,Loyal,Loyalty Bonus / Cashback
6,7,High Value,Loyalty Bonus / Cashback
7,8,Loyal,Loyalty Bonus / Cashback
8,9,Loyal,Loyalty Bonus / Cashback
9,10,High Value,Loyalty Bonus / Cashback


In [16]:
# Run oversight on high-risk customers
escalated_cases = oversight.review_high_risk_cases(
    customer_df=fraud_checked_customers,
    transaction_df=fraud_checked_transactions
)

# View escalated cases
escalated_cases.head()


Running oversight panel agent for case review and escalation...
Total high-risk customers: 110
Cases escalated for human review: 481


Unnamed: 0,customer_id,transaction_id,amount,transaction_date,fraud_risk_score,is_fraudulent
0,1,498,964.11,2024-04-08 02:07:33,0.499815,True
4,2,322,13904.73,2024-06-29 06:50:33,0.63013,False
5,2,327,2828.42,2024-08-24 15:28:33,0.63013,False
6,2,574,4966.36,2024-10-25 01:55:33,0.63013,False
7,2,902,15142.46,2025-02-07 02:03:33,0.63013,True


In [17]:
import pandas as pd

df = pd.DataFrame(recommended_customers)
# Rename the columns to match your desired selection
df = df.rename(columns={
    "customer_id": "id",
    "is_high_risk": "fraud_alert",  # Assuming 'is_high_risk' represents a fraud alert
    "recommended_product": "recommendations"
})

# Now you can select the columns:
result_df = df[["id", "name", "segment", "fraud_alert", "recommendations"]]
display(result_df)  # To display the result in a notebook environment

Unnamed: 0,id,name,segment,fraud_alert,recommendations
0,1,Samantha Myers,Promising,True,Loyalty Bonus / Cashback
1,2,Anthony Miller,Promising,True,Loyalty Bonus / Cashback
2,3,Steven Vazquez,High Value,True,Loyalty Bonus / Cashback
3,4,James Gutierrez,At Risk,False,Loyalty Bonus / Cashback
4,5,Cheryl Green,At Risk,True,Loyalty Bonus / Cashback
5,6,Jonathon Reese,Loyal,True,Loyalty Bonus / Cashback
6,7,Jonathon Wright,High Value,True,Loyalty Bonus / Cashback
7,8,Andrew West,Loyal,True,Loyalty Bonus / Cashback
8,9,Heather Vasquez,Loyal,True,Loyalty Bonus / Cashback
9,10,Joshua Mccormick,High Value,True,Loyalty Bonus / Cashback


In [18]:
# run.py

import pandas as pd
from ingestion_agent import IngestionAgent
from segmentation_agent import SegmentationAgent
from fraud_detection_agent import FraudDetectionAgent
from recommendation_agent import RecommendationAgent
from oversight_panel_agent import OversightPanelAgent
import os


def main():
    print("\n🚀 Starting Customer360 End-to-End Pipeline...\n")

    # Step 1: Ingest Data
    ingestion = IngestionAgent(num_customers=200, num_transactions=1000)
    customers_df, transactions_df = ingestion.ingest()

    # Step 2: Segment Customers
    segmentation = SegmentationAgent()
    segmented_customers = segmentation.segment_customers(customers_df, transactions_df)
    print("\n📊 Customer Segmentation Complete:")
    print(segmented_customers['segment'].value_counts())

    # Step 3: Detect Fraud
    fraud_detection = FraudDetectionAgent()
    fraud_result = fraud_detection.run(segmented_customers, transactions_df)

    # Load processed outputs
    fraud_checked_customers = pd.read_csv(fraud_result["output_paths"]["customer_fraud_path"])
    fraud_checked_transactions = pd.read_csv(fraud_result["output_paths"]["transaction_fraud_path"])

    # Step 4: Generate Recommendations
    recommendation = RecommendationAgent()
    recommended_customers = recommendation.generate_recommendations(fraud_checked_customers)

    # Step 5: Oversight for High-Risk Customers
    oversight = OversightPanelAgent()
    escalated_cases = oversight.review_high_risk_cases(fraud_checked_customers, fraud_checked_transactions)

    # Final Result Table
    result_df = recommended_customers.rename(columns={
        "customer_id": "id",
        "is_high_risk": "fraud_alert",
        "recommended_product": "recommendations"
    })[["id", "name", "segment", "fraud_alert", "recommendations"]]

    print("\n✅ Final Result Snapshot:")
    print(result_df.head(10))

    # Ensure output folder exists
    os.makedirs("output", exist_ok=True)

    # Save final result to file
    result_df.to_csv("output/final_customer360_result.csv", index=False)
    print("\n📁 Final result saved to 'output/final_customer360_result.csv'")
    print("\n✅ Pipeline execution complete!")


if __name__ == "__main__":
    main()



🚀 Starting Customer360 End-to-End Pipeline...

Generating synthetic customer and transaction data using Faker...
Generated 200 customers and 1000 transactions
Running customer segmentation...

Customer Segmentation Summary:
segment
Loyal         68
High Value    55
Promising     45
At Risk       32
Name: count, dtype: int64

📊 Customer Segmentation Complete:
segment
Loyal         68
High Value    55
Promising     45
At Risk       32
Name: count, dtype: int64
Starting fraud detection process...
Processing 200 customers and 1000 transactions
Running enhanced transaction fraud detection...
Total transactions: 1000
Transactions flagged as fraudulent: 172
Fraud rate: 17.20%
Calculating enhanced customer fraud risk...
Total customers: 200
High risk customers: 119

Fraud Statistics:
total_transactions: 1000
fraudulent_transactions: 172
fraud_rate: 0.172
high_risk_customers: 119
high_risk_rate: 0.595
Running personalized product recommendation engine...

Product Recommendation Summary:
recomm

In [19]:
mkdir -p api


In [20]:
!pip install gradio pandas faker scikit-learn
!python gradio_ui.py

Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [21]:
# gradio_ui.py

import gradio as gr
import pandas as pd
from ingestion_agent import IngestionAgent
from segmentation_agent import SegmentationAgent
from fraud_detection_agent import FraudDetectionAgent
from recommendation_agent import RecommendationAgent
from oversight_panel_agent import OversightPanelAgent

# Initialize agents
ingestion = IngestionAgent(num_customers=200, num_transactions=1000)
segmentation = SegmentationAgent()
fraud_detection = FraudDetectionAgent()
recommendation = RecommendationAgent()
oversight = OversightPanelAgent()

# Global state for Gradio UI
final_df = pd.DataFrame()
escalated_df = pd.DataFrame()

def run_pipeline():
    global final_df, escalated_df

    # Ingest
    customers_df, transactions_df = ingestion.ingest()

    # Segment
    segmented = segmentation.segment_customers(customers_df, transactions_df)

    # Detect fraud
    fraud_results = fraud_detection.run(segmented, transactions_df)
    fraud_checked_customers = pd.read_csv(fraud_results["output_paths"]["customer_fraud_path"])
    fraud_checked_transactions = pd.read_csv(fraud_results["output_paths"]["transaction_fraud_path"])

    # Recommend
    recommended = recommendation.generate_recommendations(fraud_checked_customers)

    # Oversight
    escalated_df = oversight.review_high_risk_cases(fraud_checked_customers, fraud_checked_transactions)

    # Final output
    final_df = recommended.rename(columns={
        "customer_id": "id",
        "is_high_risk": "fraud_alert",
        "recommended_product": "recommendations"
    })[["id", "name", "segment", "fraud_alert", "recommendations"]]

    # Save for download
    final_df.to_csv("output/final_customer360_result.csv", index=False)
    return final_df, "output/final_customer360_result.csv"

def filter_output(segment, fraud_alert):
    if final_df.empty:
        return pd.DataFrame()

    df = final_df.copy()

    if segment != "All":
        df = df[df["segment"] == segment]
    if fraud_alert != "All":
        df = df[df["fraud_alert"] == (fraud_alert == "Yes")]
    return df

def get_recommendations():
    if final_df.empty:
        return pd.DataFrame()
    return final_df[["id", "name", "segment", "recommendations"]]

def get_fraud_cases():
    global escalated_df
    return escalated_df if not escalated_df.empty else pd.DataFrame()

# UI
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Customer360 AI Dashboard")
    gr.Markdown("Simulate data ingestion, segmentation, fraud detection, recommendations, and oversight in one go.")

    with gr.Row():
        run_btn = gr.Button("🚀 Run Pipeline")
        download_btn = gr.File(label="📥 Download Result CSV")

    with gr.Tabs():
        with gr.Tab("📊 Overview"):
            with gr.Row():
                segment_filter = gr.Dropdown(choices=["All", "Loyal", "High Value", "Promising", "At Risk", "New"],
                                              label="Segment Filter", value="All")
                fraud_filter = gr.Dropdown(choices=["All", "Yes", "No"], label="Fraud Alert Filter", value="All")
            overview_table = gr.Dataframe()

        with gr.Tab("🚨 Escalated Fraud Cases"):
            fraud_table = gr.Dataframe()

        with gr.Tab("🎯 Recommendations"):
            reco_table = gr.Dataframe()

    # Actions
    run_btn.click(fn=run_pipeline, outputs=[overview_table, download_btn])
    segment_filter.change(fn=filter_output, inputs=[segment_filter, fraud_filter], outputs=overview_table)
    fraud_filter.change(fn=filter_output, inputs=[segment_filter, fraud_filter], outputs=overview_table)

    run_btn.click(fn=get_fraud_cases, outputs=fraud_table)
    run_btn.click(fn=get_recommendations, outputs=reco_table)

demo.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b4465f3d62dd9f7a60.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [22]:
!pip install groq

Collecting groq
  Downloading groq-0.22.0-py3-none-any.whl.metadata (15 kB)
Downloading groq-0.22.0-py3-none-any.whl (126 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.7/126.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.22.0


In [23]:
import pandas as pd
from fraud_detection_agent import FraudDetectionAgent
from groq import Groq

class GroqFraudAssistant:
    def __init__(self, fraud_agent: FraudDetectionAgent, api_key: str):
        self.fraud_agent = fraud_agent
        self.client = Groq(api_key=api_key)

    def ask_groq(self, prompt: str) -> str:
        chat_completion = self.client.chat.completions.create(
            model="llama-3.2-90b-vision-preview",
            messages=[
                {"role": "system", "content": "You are a fraud analysis assistant."},
                {"role": "user", "content": prompt}
            ]
        )
        return chat_completion.choices[0].message.content

    def interact(self, customer_df: pd.DataFrame, transaction_df: pd.DataFrame):
        print("Hello! I am your Fraud Assistant powered by Groq. Ask me anything.")

        while True:
            user_input = input("Your query: ")

            if user_input.lower() in ["exit", "quit"]:
                print("Session ended.")
                break

            if 'transaction fraud' in user_input.lower():
                fraud_transactions = self.fraud_agent.detect_transaction_fraud(transaction_df)
                print(fraud_transactions[fraud_transactions["is_fraudulent"] == True])
            elif 'customer fraud risk' in user_input.lower():
                fraud_customers = self.fraud_agent.calculate_customer_fraud_risk(customer_df, transaction_df)
                print(fraud_customers[fraud_customers["is_high_risk"] == True])
            else:
                response = self.ask_groq(user_input)
                print(f"Groq says: {response}")


In [24]:
from fraud_detection_agent import FraudDetectionAgent
from google.colab import userdata

In [25]:
import os
import getpass  # ← You missed this line

# Prompt the user to enter the API key securely
os.environ['GROQ_API_KEY'] = getpass.getpass("Enter your GROQ API key: ")

# Use it wherever needed
GROQ_API_KEY = os.environ['GROQ_API_KEY']


Enter your GROQ API key: ··········


In [29]:
!pip install python-dotenv

Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0


In [44]:
# gradio_ui.py

import gradio as gr
import pandas as pd
from ingestion_agent import IngestionAgent
from segmentation_agent import SegmentationAgent
from fraud_detection_agent import FraudDetectionAgent
from recommendation_agent import RecommendationAgent
from oversight_panel_agent import OversightPanelAgent
from groq import Groq
from dotenv import load_dotenv
import os

# ========== Load .env Variables ==========
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY") or "your-groq-api-key-here"

# ========== Agents ==========
ingestion = IngestionAgent(num_customers=200, num_transactions=1000)
segmentation = SegmentationAgent()
fraud_detection = FraudDetectionAgent()
recommendation = RecommendationAgent()
oversight = OversightPanelAgent()

# ========== Global States ==========
final_df = pd.DataFrame()
escalated_df = pd.DataFrame()

# ========== Main Pipeline ==========
def run_pipeline():
    global final_df, escalated_df

    customers_df, transactions_df = ingestion.ingest()
    segmented = segmentation.segment_customers(customers_df, transactions_df)

    fraud_results = fraud_detection.run(segmented, transactions_df)
    fraud_checked_customers = pd.read_csv(fraud_results["output_paths"]["customer_fraud_path"])
    fraud_checked_transactions = pd.read_csv(fraud_results["output_paths"]["transaction_fraud_path"])

    recommended = recommendation.generate_recommendations(fraud_checked_customers)
    escalated_df = oversight.review_high_risk_cases(fraud_checked_customers, fraud_checked_transactions)

    final_df = recommended.rename(columns={
        "customer_id": "id",
        "is_high_risk": "fraud_alert",
        "recommended_product": "recommendations"
    })[["id", "name", "segment", "fraud_alert", "recommendations"]]

    final_df.to_csv("output/final_customer360_result.csv", index=False)
    return final_df, "output/final_customer360_result.csv"

def filter_output(segment, fraud_alert):
    if final_df.empty:
        return pd.DataFrame()

    df = final_df.copy()
    if segment != "All":
        df = df[df["segment"] == segment]
    if fraud_alert != "All":
        df = df[df["fraud_alert"] == (fraud_alert == "Yes")]
    return df

def get_recommendations():
    if final_df.empty:
        return pd.DataFrame()
    return final_df[["id", "name", "segment", "recommendations"]]

def get_fraud_cases():
    global escalated_df
    return escalated_df if not escalated_df.empty else pd.DataFrame()

# ========== Groq Chat Assistant ==========
groq_client = Groq(api_key=GROQ_API_KEY)

def respond_to_user(message, history):
    # Convert Gradio history (list of tuples) to OpenAI-style messages
    messages = [{"role": "system", "content": "You are a helpful fraud detection assistant."}]
    for user, assistant in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assistant})

    # Add latest user message
    messages.append({"role": "user", "content": message})

    # Get response from Groq
    response = groq_client.chat.completions.create(
        model="llama3-8b-8192",
        messages=messages
    )

    reply = response.choices[0].message.content
    history.append((message, reply))
    return history

# ========== Gradio UI ==========
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Customer360 AI Dashboard")
    gr.Markdown("Simulate data ingestion, segmentation, fraud detection, recommendations, and oversight in one go.")

    with gr.Row():
        run_btn = gr.Button("🚀 Run Pipeline")
        download_btn = gr.File(label="📥 Download Result CSV")

    with gr.Tabs():
        with gr.Tab("📊 Overview"):
            with gr.Row():
                segment_filter = gr.Dropdown(choices=["All", "Loyal", "High Value", "Promising", "At Risk", "New"],
                                              label="Segment Filter", value="All")
                fraud_filter = gr.Dropdown(choices=["All", "Yes", "No"], label="Fraud Alert Filter", value="All")
            overview_table = gr.Dataframe()

        with gr.Tab("🚨 Escalated Fraud Cases"):
            fraud_table = gr.Dataframe()

        with gr.Tab("🎯 Recommendations"):
            reco_table = gr.Dataframe()

        with gr.Tab("🤖 Fraud Chat Assistant"):
            chatbot = gr.Chatbot(label="Groq Fraud Assistant", height=400)
            msg = gr.Textbox(placeholder="Ask anything about fraud risks, suspicious transactions, etc...", show_label=False)

            msg.submit(respond_to_user, inputs=[msg, chatbot], outputs=[chatbot])

    # Button actions
    run_btn.click(fn=run_pipeline, outputs=[overview_table, download_btn])
    run_btn.click(fn=get_fraud_cases, outputs=fraud_table)
    run_btn.click(fn=get_recommendations, outputs=reco_table)
    segment_filter.change(fn=filter_output, inputs=[segment_filter, fraud_filter], outputs=overview_table)
    fraud_filter.change(fn=filter_output, inputs=[segment_filter, fraud_filter], outputs=overview_table)

demo.launch(share=True)


  chatbot = gr.Chatbot(label="Groq Fraud Assistant", height=400)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c47e717ce3eede8fb0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


