# An AI-powered risk assessment and invoice auditing dashboard built with Python & Streamlit.

In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt

In [None]:
print("dataset loading")
df=pd.read_csv("/content/sample_data/BankCustomerData.csv")

dataset loading


In [None]:
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,term_deposit
0,58,management,married,tertiary,no,2143,yes,no,unknown,5.0,may,261.0,1.0,-1.0,0.0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5.0,may,151.0,1.0,-1.0,0.0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5.0,may,76.0,1.0,-1.0,0.0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5.0,may,92.0,1.0,-1.0,0.0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5.0,may,198.0,1.0,-1.0,0.0,unknown,no


In [None]:
df.describe()

Unnamed: 0,age,balance,day,duration,campaign,pdays,previous
count,37902.0,37902.0,37901.0,37901.0,37901.0,37901.0,37901.0
mean,40.774445,1271.518627,16.008654,255.777051,2.905306,27.834886,0.36904
std,9.598387,2873.266389,8.365243,260.237104,3.294127,84.587741,2.074364
min,19.0,-8019.0,1.0,0.0,1.0,-1.0,0.0
25%,33.0,52.0,8.0,100.0,1.0,-1.0,0.0
50%,39.0,406.0,17.0,175.0,2.0,-1.0,0.0
75%,48.0,1316.0,21.0,314.0,3.0,-1.0,0.0
max,95.0,98417.0,31.0,4918.0,63.0,371.0,275.0


In [None]:
df.tail()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,term_deposit
37897,41,blue-collar,married,secondary,no,427,yes,yes,cellular,14.0,may,177.0,2.0,-1.0,0.0,unknown,no
37898,36,services,single,secondary,no,-267,yes,no,cellular,14.0,may,99.0,7.0,352.0,1.0,failure,no
37899,33,technician,married,secondary,no,318,yes,no,cellular,14.0,may,108.0,4.0,342.0,14.0,other,no
37900,32,blue-collar,married,secondary,no,169,yes,no,cellular,14.0,may,291.0,2.0,-1.0,0.0,unknown,no
37901,39,management,divorced,tertiary,no,126,no,no,cellular,,,,,,,,


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37902 entries, 0 to 37901
Data columns (total 17 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   age           37902 non-null  int64  
 1   job           37902 non-null  object 
 2   marital       37902 non-null  object 
 3   education     37902 non-null  object 
 4   default       37902 non-null  object 
 5   balance       37902 non-null  int64  
 6   housing       37902 non-null  object 
 7   loan          37902 non-null  object 
 8   contact       37902 non-null  object 
 9   day           37901 non-null  float64
 10  month         37901 non-null  object 
 11  duration      37901 non-null  float64
 12  campaign      37901 non-null  float64
 13  pdays         37901 non-null  float64
 14  previous      37901 non-null  float64
 15  poutcome      37901 non-null  object 
 16  term_deposit  37901 non-null  object 
dtypes: float64(5), int64(2), object(10)
memory usage: 4.9+ MB


In [None]:
df.isnull().sum()

Unnamed: 0,0
age,0
job,0
marital,0
education,0
default,0
balance,0
housing,0
loan,0
contact,0
day,1


In [None]:
# --- ANALYSIS 1: BANK LIQUIDITY (The 'balance' column) ---
total_money_in_bank = df['balance'].sum()
avg_customer_balance = df['balance'].mean()
max_balance = df['balance'].max()

NameError: name 'df' is not defined

In [None]:
print("\nüí∞ BANK LIQUIDITY REPORT")
print("=" * 40)
print(f"Total Capital Held:      ${total_money_in_bank:,.2f}")
print(f"Average Customer Balance: ${avg_customer_balance:,.2f}")
print(f"Richest Customer Has:     ${max_balance:,.2f}")


üí∞ BANK LIQUIDITY REPORT
Total Capital Held:      $48,193,099.00
Average Customer Balance: $1,271.52
Richest Customer Has:     $98,417.00


1. **Analysis 2:** Wealth by Job (The groupby Method)

# Code: df.groupby('job')['balance'].mean().sort_values(ascending=False)

***The Simple Explanation (The "Buckets" Analogy)Imagine you have a room full of 42,000 people. You want to know who is richer: The Doctors or the Students?***

**Group:** You ask everyone to stand in groups based on their job. All "Management" in one corner, all "Technicians" in another.

**Select:** You ignore their age or marital status; you only look at the cash in their hands (balance)

.**Aggregate (Mean):** You calculate the average cash held by each group.

**Sort:** Finally, you list the groups from the richest average to the poorest.



In [None]:
# --- ANALYSIS 2: FINTECH INSIGHT (Wealth by Job) ---
    # Which profession has the most money?
print("\nüìä WEALTH BY JOB TYPE")
print("=" * 40)
wealth_by_job = df.groupby('job')['balance'].mean().sort_values(ascending=False)
print(wealth_by_job.head(5)) # Show top 5 richest professions


üìä WEALTH BY JOB TYPE
job
unknown          1703.528384
management       1666.770473
self-employed    1519.083952
retired          1513.491052
entrepreneur     1446.552339
Name: balance, dtype: float64


In [None]:
# --- ANALYSIS 3: RISK CHECK ---
    # Who has a negative balance? (In debt)
negative_balance_count = df[df['balance'] < 0].shape[0]
print("\n‚ö†Ô∏è RISK ALERT")
print("=" * 40)
print(f"Customers with Negative Balance: {negative_balance_count}")


‚ö†Ô∏è RISK ALERT
Customers with Negative Balance: 3508


# Day 02

In [None]:
print("--- ORIGINAL DATA STATS ---")
print(f"Original Rows: {len(df)}")

--- ORIGINAL DATA STATS ---
Original Rows: 37902


In [None]:
# PART 1: THE "PRO" MISSING VALUE CHECK
# ---------------------------------------------------------
# Yesterday you used df.isnull(), which was hard to read.
# This sums up the errors per column.
print("\n--- MISSING VALUES (The Real Check) ---")
missing_counts = df.isnull().sum()
print(missing_counts[missing_counts > 0]) # Only prints columns with errors


--- MISSING VALUES (The Real Check) ---
day             1
month           1
duration        1
campaign        1
pdays           1
previous        1
poutcome        1
term_deposit    1
dtype: int64


In [None]:
# PART 2: HANDLING "HIDDEN" MISSING DATA
# ---------------------------------------------------------
# In your dataset, missing info isn't empty; it's written as "unknown".
# Let's check how many jobs are "unknown".
unknown_jobs = df[df['job'] == 'unknown'].shape[0]
print(f"\n'Unknown' Jobs found: {unknown_jobs}")

# ACTION: Replace 'unknown' with a real category like 'other'
# or drop them if you want pure data. Let's replace them.
df['job'] = df['job'].replace('unknown', 'other')
print("Fixed 'unknown' jobs -> Renamed to 'other'.")


'Unknown' Jobs found: 229
Fixed 'unknown' jobs -> Renamed to 'other'.


In [None]:
# ---------------------------------------------------------
# PART 3: FEATURE ENGINEERING (The Fintech Magic)
# ---------------------------------------------------------
# We will create a 'Financial_Risk' column.
# LOGIC:
# - If they have a Loan OR Credit Default -> 'High Risk'
# - If Balance is Negative (< 0) -> 'High Risk'
# - Else -> 'Low Risk'

def calculate_risk(row):
    # Check debt status
    has_loan = (row['loan'] == 'yes')
    has_default = (row['default'] == 'yes')
    in_negative = (row['balance'] < 0)

    if has_loan or has_default or in_negative:
        return 'High Risk'
    else:
        return 'Low Risk'

# Apply this logic to every single row
print("\nApplying Fintech Logic to 40,000+ customers...")
df['Risk_Status'] = df.apply(calculate_risk, axis=1)


Applying Fintech Logic to 40,000+ customers...


In [None]:
# PART 4: ANALYZE THE NEW DATA
# ---------------------------------------------------------
print("\n--- NEW RISK ANALYSIS ---")
print(df['Risk_Status'].value_counts())

# Let's see who these High Risk people are
risk_breakdown = df.groupby('Risk_Status')['balance'].mean()
print("\nAverage Balance by Risk Group:")
print(risk_breakdown)


--- NEW RISK ANALYSIS ---
Risk_Status
Low Risk     28639
High Risk     9263
Name: count, dtype: int64

Average Balance by Risk Group:
Risk_Status
High Risk     480.424592
Low Risk     1527.390132
Name: balance, dtype: float64


In [None]:
# PART 5: SAVE YOUR WORK
# ---------------------------------------------------------
# We save this cleaned version to use in Day 3 (Automation)
df.to_csv('Cleaned_Bank_Data.csv', index=False)
print("\n‚úÖ Success! Saved 'Cleaned_Bank_Data.csv' for Day 3.")


‚úÖ Success! Saved 'Cleaned_Bank_Data.csv' for Day 3.


# Day # 03

In [None]:
import pandas as pd
import requests
import time
import os

# ==========================================
# CONFIGURATION (PASTE YOUR URL HERE)
# ==========================================
WEBHOOK_URL = "https://hook.looks_like_this"  # <--- PASTE HERE

# ==========================================
# PART 1: RE-GENERATE CLEAN DATA (Day 2 Logic)
# ==========================================
# We re-run this to ensure we have the latest data in this session

print("‚úÖ Loading Data...")
df = pd.read_csv('/content/sample_data/Cleaned_Bank_Data.csv')

    # Fix 'unknown' jobs
df['job'] = df['job'].replace('unknown', 'other')

    # Risk Logic Function
def calculate_risk(row):
        if (row['loan'] == 'yes') or (row['default'] == 'yes') or (row['balance'] < 0):
            return 'High Risk'
        else:
            return 'Low Risk'

    # Apply Logic
df['Risk_Status'] = df.apply(calculate_risk, axis=1)
print(f"‚úÖ Risk Engine Complete. High Risk Customers: {len(df[df['Risk_Status']=='High Risk'])}")

    # ==========================================
    # PART 2: SEND TO MAKE.COM (Day 3 Logic)
    # ==========================================
print("\nüöÄ STARTING AUTOMATION: Sending alerts to Make.com...")

    # Get top 3 High Risk people
risky_people = df[df['Risk_Status'] == 'High Risk'].head(3)

for index, row in risky_people.iterrows():
        # Create the data packet
        payload = {
            "Customer_ID": int(index), # Colab likes standard Python ints
            "Job": row['job'],
            "Balance": int(row['balance']),
            "Status": "CRITICAL ALERT"
        }

        try:
            # Send Data
            response = requests.post(WEBHOOK_URL, json=payload)

            if response.status_code == 200:
                print(f"   --> üü¢ Alert Sent for Customer #{index} (Balance: {row['balance']})")
            else:
                print(f"   --> üî¥ Failed to send. Status: {response.status_code}")

        except Exception as e:
            print(f"   --> ‚ö†Ô∏è Connection Error: {e}")

        time.sleep(1) # Pause to be polite

print("\n‚úÖ Batch Complete. Go check Make.com!")

‚úÖ Loading Data...
‚úÖ Risk Engine Complete. High Risk Customers: 9263

üöÄ STARTING AUTOMATION: Sending alerts to Make.com...
   --> üü¢ Alert Sent for Customer #2 (Balance: 2)
   --> üü¢ Alert Sent for Customer #6 (Balance: 447)
   --> üü¢ Alert Sent for Customer #7 (Balance: 2)

‚úÖ Batch Complete. Go check Make.com!


# Day 04

In [None]:
# Install the library to MAKE pdfs (just for setup)
!pip install reportlab -q

from reportlab.pdfgen import canvas

def create_dummy_invoice():
    c = canvas.Canvas("invoice_test.pdf")
    c.drawString(100, 750, "FINSWARM NEXUS SOLUTIONS")
    c.drawString(100, 730, "Invoice #9921")
    c.drawString(100, 710, "Date: 2025-11-24")

    c.drawString(100, 650, "Bill To: John Doe")
    c.drawString(100, 630, "Service: AI Automation Consulting")

    c.drawString(100, 550, "Amount: $1,500.00")
    c.drawString(100, 530, "Tax: $150.00")

    c.setFont("Helvetica-Bold", 12)
    c.drawString(100, 500, "TOTAL DUE: $1,650.00")

    c.save()
    print("‚úÖ Dummy Invoice Created: 'invoice_test.pdf'")

create_dummy_invoice()

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/2.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.3/2.0 MB[0m [31m10.7 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m1.9/2.0 MB[0m [31m32.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m2.0/2.0 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[?25h‚úÖ Dummy Invoice Created: 'invoice_test.pdf'


In [None]:
# 1. Install the reader library
!pip install pdfplumber -q

import pdfplumber
import re # "Regex" - A tool to find patterns in text

print("üîç Starting PDF Extraction...")

# 2. Open the PDF
with pdfplumber.open("invoice_test.pdf") as pdf:
    # Get the first page
    first_page = pdf.pages[0]

    # Extract the raw text
    text_data = first_page.extract_text()

print("\n--- RAW TEXT FROM PDF ---")
print(text_data)
print("-------------------------\n")

# 3. THE LOGIC: Finding the "Total Due"
# The computer sees the PDF as one big string of text.
# We need to find the specific line that has the money.

# We look for the line containing "TOTAL DUE"
for line in text_data.split('\n'):
    if "TOTAL DUE" in line:
        print(f"üéØ FOUND IT: {line}")

        # Simple extraction logic:
        # Split the line by the '$' symbol to get the number
        amount = line.split('$')[1]
        print(f"üí∞ Extracted Amount: {amount}")

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m43.6/43.6 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m67.7/67.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m60.0/60.0 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m5.6/5.6 MB[0m [31m56.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.0/3.0 MB[0m [31m92.8 MB/s[0m eta [36m0:00:00[0m
[?25hü

In [None]:
#challange given by mam
#Challenge: Modify the code to extract the "Invoice #" as well.


import pdfplumber
import re # "Regex" - A tool to find patterns in text

print("üîç Starting PDF Extraction...")

# 2. Open the PDF
with pdfplumber.open("invoice_test.pdf") as pdf:
    # Get the first page
    first_page = pdf.pages[0]

    # Extract the raw text
    text_data = first_page.extract_text()

print("\n--- RAW TEXT FROM PDF ---")
print(text_data)
print("-------------------------\n")

# 3. THE LOGIC: Finding the "Total Due"
# The computer sees the PDF as one big string of text.
# We need to find the specific line that has the money.

# We look for the line containing "TOTAL DUE"
for line in text_data.split('\n'):
    if "Invoice " in line:
        print(f"üéØ FOUND IT: {line}")

        # Simple extraction logic:
        # Split the line by the '$' symbol to get the number
        amount = line.split('#')[1]
        print(f"üí∞ Extracted Invoice: {amount}")

üîç Starting PDF Extraction...

--- RAW TEXT FROM PDF ---
FINSWARM NEXUS SOLUTIONS
Invoice #9921
Date: 2025-11-24
Bill To: John Doe
Service: AI Automation Consulting
Amount: $1,500.00
Tax: $150.00
TOTAL DUE: $1,650.00
-------------------------

üéØ FOUND IT: Invoice #9921
üí∞ Extracted Invoice: 9921


# Day 05

In [None]:
# 1. Write the Python code to a file named 'app.py'
%%writefile app.py

import streamlit as st
import pandas as pd
import plotly.express as px

# --- PAGE SETUP ---
st.set_page_config(page_title="FinSwarm Nexus Dashboard", layout="wide")

st.title("üè¶ FinSwarm Nexus: Risk Management Dashboard")
st.markdown("### AI-Powered Financial Analysis")

# --- LOAD DATA ---
# We use @st.cache_data so it doesn't reload every time you click a button
@st.cache_data
def load_data():
    # We will try to load the Cleaned data, or fallback to raw
    try:
        df = pd.read_csv("/content/sample_data/Cleaned_Bank_Data.csv")
        return df
    except:
        return pd.DataFrame() # Return empty if no file

df = load_data()

if df.empty:
    st.error("‚ö†Ô∏è Error: 'Cleaned_Bank_Data.csv' not found. Please upload it!")
else:
    # --- SIDEBAR FILTERS ---
    st.sidebar.header("Filter Options")
    selected_job = st.sidebar.selectbox("Select Job Type", df['job'].unique())

    # Filter the data based on selection
    filtered_df = df[df['job'] == selected_job]

    # --- KPI METRICS (The Big Numbers) ---
    col1, col2, col3 = st.columns(3)

    total_customers = len(filtered_df)
    avg_balance = filtered_df['balance'].mean()
    high_risk_count = len(filtered_df[filtered_df['Risk_Status'] == 'High Risk'])

    col1.metric("Total Customers", f"{total_customers}")
    col2.metric("Average Balance", f"${avg_balance:,.2f}")
    col3.metric("High Risk Alerts", f"{high_risk_count}", delta_color="inverse")

    st.markdown("---")

    # --- CHARTS (The Visuals) ---
    c1, c2 = st.columns(2)

    with c1:
        st.subheader("üí∞ Balance Distribution")
        # Histogram showing who has how much money
        fig_hist = px.histogram(filtered_df, x="balance", nbins=20, title="Balance Spread")
        st.plotly_chart(fig_hist, use_container_width=True)

    with c2:
        st.subheader("‚ö†Ô∏è Risk Analysis")
        # Pie chart of Risk Status
        risk_counts = filtered_df['Risk_Status'].value_counts()
        fig_pie = px.pie(values=risk_counts.values, names=risk_counts.index, title="Risk Breakdown")
        st.plotly_chart(fig_pie, use_container_width=True)

    # --- RAW DATA ---
    st.subheader("üìÑ Detailed Customer Records")
    st.dataframe(filtered_df.head(10))

Overwriting app.py


In [None]:
# 1. Install Streamlit and a tunnel tool
!pip install streamlit -q
!npm install localtunnel

# 2. Run Streamlit in the background
!streamlit run app.py &>/content/logs.txt &

# 3. Expose the port so you can click it
import urllib
print("üî• Your Dashboard is Ready!")
print("üëá IMPORTANT: Copy this IP address first:")
print(urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))
print("\nüëá Then click this link and Paste the IP address:")
!npx localtunnel --port 8501

[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K
up to date, audited 23 packages in 1s
[1G[0K‚†á[1G[0K
[1G[0K‚†á[1G[0K3 packages are looking for funding
[1G[0K‚†á[1G[0K  run `npm fund` for details
[1G[0K‚†á[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K‚†á[1G[0Küî• Your Dashboard is Ready!
üëá IMPORTANT: Copy this IP address first:
34.106.194.49

üëá Then click this link and Paste the IP address:
[1G[0K‚†ô[1G[0Kyour url is: https://rare-jars-cheer.loca.lt
^C


# Day # 06

In [1]:
%%writefile app.py

import streamlit as st
import pandas as pd
import plotly.express as px

# --- PAGE SETUP ---
st.set_page_config(page_title="FinSwarm Nexus Dashboard", layout="wide")

st.title("üè¶ FinSwarm Nexus: Risk Management Dashboard")
st.markdown("### AI-Powered Financial Analysis")

# --- LOAD DATA ---
@st.cache_data
def load_data():
    try:
        df = pd.read_csv("/content/sample_data/Cleaned_Bank_Data.csv")
        return df
    except:
        return pd.DataFrame()

df = load_data()

if df.empty:
    st.error("‚ö†Ô∏è Error: 'Cleaned_Bank_Data.csv' not found. Please upload it!")
else:
    # --- SIDEBAR FILTERS ---
    st.sidebar.header("Filter Options")
    selected_job = st.sidebar.selectbox("Select Job Type", df['job'].unique())

    # Filter data based on job selection
    filtered_df = df[df['job'] == selected_job]

    # ===================================================
    # üÜï NEW FEATURE: DOWNLOAD BUTTON
    # ===================================================
    st.sidebar.markdown("---") # A divider line
    st.sidebar.subheader("üì• Export Reports")

    # 1. Logic: Get ONLY the High Risk customers from the whole dataset
    high_risk_export = df[df['Risk_Status'] == 'High Risk']

    # 2. Conversion: Turn that data into a CSV string (Computers need bytes)
    csv_data = high_risk_export.to_csv(index=False).encode('utf-8')

    # 3. UI: The Button
    st.sidebar.download_button(
        label="Download High Risk List",
        data=csv_data,
        file_name="High_Risk_Customers.csv",
        mime="text/csv",
        help="Click to download a full report of all High Risk customers."
    )
    # ===================================================

    # --- KPI METRICS ---
    col1, col2, col3 = st.columns(3)
    col1.metric("Total Customers", f"{len(filtered_df)}")
    col2.metric("Average Balance", f"${filtered_df['balance'].mean():,.2f}")
    col3.metric("High Risk Alerts", f"{len(filtered_df[filtered_df['Risk_Status'] == 'High Risk'])}")

    st.markdown("---")

    # --- CHARTS ---
    c1, c2 = st.columns(2)

    with c1:
        st.subheader("üí∞ Balance Distribution")
        fig_hist = px.histogram(filtered_df, x="balance", nbins=20, title="Balance Spread")
        st.plotly_chart(fig_hist, use_container_width=True)

    with c2:
        st.subheader("‚ö†Ô∏è Risk Analysis")
        risk_counts = filtered_df['Risk_Status'].value_counts()
        fig_pie = px.pie(values=risk_counts.values, names=risk_counts.index, title="Risk Breakdown")
        st.plotly_chart(fig_pie, use_container_width=True)

    # --- RAW DATA ---
    st.subheader("üìÑ Detailed Customer Records")
    st.dataframe(filtered_df.head(10))

Writing app.py


In [3]:
# 1. Install Streamlit (Just in case)
!pip install streamlit -q

# 2. Run your App in the background
!streamlit run app.py &>/content/logs.txt &

# 3. Download & Run Cloudflare Tunnel (The "Better" Tunnel)
print("üöÄ Downloading Cloudflare Tunnel...")
!wget -q -O cloudflared https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!chmod +x cloudflared

print("üîó Creating your Public Link... (Look for the URL below!)")
# This will keep running. Look for the link ending in .trycloudflare.com
!./cloudflared tunnel --url http://localhost:8501

[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/10.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.3/10.2 MB[0m [31m9.9 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[90m‚ï∫[0m[90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.4/10.2 MB[0m [31m49.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[91m‚ï∏[0m [32m10.1/10.2 MB[0m [31m96.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m10.2/10.2 MB[0m [31m72.9 

# Day 07

In [4]:
%%writefile app.py

import streamlit as st
import pdfplumber
import pandas as pd
import plotly.express as px

# --- PAGE CONFIG ---
st.set_page_config(page_title="FinSwarm Nexus: AI Auditor", layout="wide")

st.title("ü§ñ FinSwarm Nexus: AI Invoice Auditor")
st.markdown("### Upload a vendor invoice to detect fraud or errors instantly.")

# --- SIDEBAR (The Navigation) ---
st.sidebar.header("üîß Control Panel")
app_mode = st.sidebar.selectbox("Choose Mode", ["Dashboard Overview", "AI Invoice Scanner"])

# ==========================================
# MODE 1: THE DASHBOARD (From Day 6)
# ==========================================
if app_mode == "Dashboard Overview":
    # (Simplified version of your previous dashboard)
    st.subheader("üìä Live Risk Monitor")

    # Fake data for demo (since we focus on the PDF part today)
    data = {'Customer': ['Alice', 'Bob', 'Charlie', 'David'],
            'Balance': [5000, -200, 12000, -50],
            'Risk': ['Low', 'High', 'Low', 'High']}
    df = pd.DataFrame(data)

    # Metrics
    c1, c2, c3 = st.columns(3)
    c1.metric("Active Customers", "4")
    c2.metric("Total Risk Alerts", "2", delta="-2", delta_color="inverse")
    c3.metric("System Status", "Online üü¢")

    # Chart
    fig = px.bar(df, x='Customer', y='Balance', color='Risk', title="Customer Balance vs Risk")
    st.plotly_chart(fig, use_container_width=True)

# ==========================================
# MODE 2: THE AI SCANNER (The Day 7 Magic)
# ==========================================
elif app_mode == "AI Invoice Scanner":
    st.subheader("üìÑ Upload Invoice for AI Analysis")

    # 1. THE UPLOADER
    uploaded_file = st.file_uploader("Drag and drop a PDF Invoice here", type="pdf")

    if uploaded_file:
        st.success("‚úÖ File Uploaded! AI is reading...")

        # 2. THE AI READER (Day 4 Logic)
        with pdfplumber.open(uploaded_file) as pdf:
            page = pdf.pages[0]
            text = page.extract_text()

        # Display the Raw Text (Optional)
        with st.expander("See Raw Text extracted by AI"):
            st.text(text)

        # 3. THE LOGIC (Find the Money)
        # We look for "Total Due" or "$"
        extracted_amount = 0.0
        invoice_id = "Unknown"

        for line in text.split('\n'):
            if "$" in line:
                st.write(f"üîé Found Money Line: *{line}*")
                # Simple logic: try to grab the number
                # (In a real app, we use Regex, but this works for simple PDFs)

            if "#" in line:
                invoice_id = line.split('#')[-1]

        # 4. THE DECISION (Risk Engine)
        st.markdown("---")
        st.subheader("ü§ñ AI Audit Verdict")

        col1, col2 = st.columns(2)
        col1.info(f"**Invoice ID Detected:** #{invoice_id}")

        # Fake logic for the demo (Real logic would check the amount against a database)
        if "High" in text or "Overdue" in text:
             col2.error("üö® RISK STATUS: HIGH (Suspicious Keywords Found)")
        else:
             col2.success("‚úÖ RISK STATUS: APPROVED (Looks clean)")

Overwriting app.py


In [5]:
!pip install streamlit pdfplumber plotly -q
!wget -q -O cloudflared https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!chmod +x cloudflared
!streamlit run app.py &>/content/logs.txt &
!./cloudflared tunnel --url http://localhost:8501

[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m43.6/43.6 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m67.7/67.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m60.0/60.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m5.6/5.6 MB[0m [31m62.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m3.0/3.0 MB[0m [31m96.4 MB/s[0m eta [36m0:00:00[0m
[?25h[