# Step 1: Install Dependencies

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import pandas as pd
from sdv.metadata import SingleTableMetadata
from sdv.single_table import CTGANSynthesizer
from transformers import BertTokenizer, BertModel
import torch
from tqdm import tqdm
from huggingface_hub import notebook_login
#notebook_login()
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk
nltk.download('vader_lexicon')
import faiss
import numpy as np
from langchain.chains import RetrievalQA
from langchain.llms import BaseLLM
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline
from transformers import AutoModelForCausalLM,AutoTokenizer, pipeline

Mounted at /content/drive


ModuleNotFoundError: No module named 'sdv'

In [None]:
# !pip install pandas numpy sdv transformers faiss-cpu langchain streamlit scikit-learn nltk
# !pip install langchain-community
# !pip install --upgrade langchain-core langchain-community langchain-experimental sentence-transformers
# !pip install -U langchain-huggingface

# Step 2: Generate Synthetic Data

In [None]:
# --- Customer Profiles ---
customer_metadata = SingleTableMetadata()
customer_metadata.add_column('customer_id', sdtype='id')
customer_metadata.add_column('age', sdtype='numerical')
customer_metadata.add_column('gender', sdtype='categorical')
customer_metadata.add_column('location', sdtype='categorical')
customer_metadata.add_column('interests', sdtype='categorical')
customer_metadata.add_column('income', sdtype='numerical')
customer_metadata.add_column('education', sdtype='categorical')
customer_metadata.add_column('occupation', sdtype='categorical')

# Training data with sample values
customer_data = pd.DataFrame([{
    'customer_id': 1234,
    'age': 45,
    'gender': 'Male',
    'location': 'New York',
    'interests': 'Luxury Shopping and Travel',
    'income': 75000,
    'education': 'MBA',
    'occupation': 'Financial Advisor'
},{
    'customer_id': 1235,
    'age': 32,
    'gender': 'Female',
    'location': 'San Francisco',
    'interests': 'Tech Gadgets',
    'income': 125000,
    'education': 'Masters',
    'occupation': 'Engineer'
}])

# Generate synthetic customer profiles
customer_synthesizer = CTGANSynthesizer(customer_metadata)
customer_synthesizer.fit(customer_data)
customer_profiles = customer_synthesizer.sample(num_rows=1000)

# --- Social Media Data ---
social_metadata = SingleTableMetadata()
social_metadata.add_column('customer_id', sdtype='id')
social_metadata.add_column('post_id', sdtype='id')
social_metadata.add_column('platform', sdtype='categorical')
social_metadata.add_column('content', sdtype='text')
social_metadata.add_column('timestamp', sdtype='datetime')
social_metadata.add_column('sentiment_score', sdtype='numerical')
social_metadata.add_column('intent', sdtype='categorical')

social_data = pd.DataFrame([{
    'customer_id': 1234,
    'post_id': 103,
    'platform': 'LinkedIn',
    'content': 'Navigating fluctuations raw material prices!! cash flow planning is key!',
    'timestamp': '2023-06-15',
    'sentiment_score': 0.4,
    'intent': 'Financial Management Concern'
},{
    'customer_id': 1235,
    'post_id': 104,
    'platform': 'Instagram',
    'content': 'Check out my latest post about luxury travel accessories!',
    'timestamp': '2023-06-16',
    'sentiment_score': 0.9,
    'intent': 'Fashion Interest'
}])

social_synthesizer = CTGANSynthesizer(social_metadata)
social_synthesizer.fit(social_data)
social_media = social_synthesizer.sample(num_rows=5000)

# --- Transaction History ---
transaction_metadata = SingleTableMetadata()
transaction_metadata.add_column('customer_id', sdtype='id')
transaction_metadata.add_column('product_id', sdtype='id')
transaction_metadata.add_column('transaction_type', sdtype='categorical')
transaction_metadata.add_column('category', sdtype='categorical')
transaction_metadata.add_column('amount', sdtype='numerical')
transaction_metadata.add_column('purchase_date', sdtype='datetime')
transaction_metadata.add_column('payment_mode', sdtype='categorical')

transaction_data = pd.DataFrame([{
    'customer_id': 1234,
    'product_id': 398,
    'transaction_type': 'Luxury Shopping',
    'category': 'Gucci',
    'amount': 50696,
    'purchase_date': '2023-01-01',
    'payment_mode': 'Amex Platinum'
},{
    'customer_id': 1235,
    'product_id': 401,
    'transaction_type': 'Technology Investment',
    'category': 'IPhone',
    'amount': 1299,
    'purchase_date': '2023-06-15',
    'payment_mode': 'Corporate credit card'
}])

transaction_synthesizer = CTGANSynthesizer(transaction_metadata)
transaction_synthesizer.fit(transaction_data)
transactions = transaction_synthesizer.sample(num_rows=10000)

# Save to CSV
customer_profiles.to_csv("/content/drive/MyDrive/Hackathon2025/customer_profiles.csv", index=False)
social_media.to_csv("/content/drive/MyDrive/Hackathon2025/social_media.csv", index=False)
transactions.to_csv("/content/drive/MyDrive/Hackathon2025/transactions.csv", index=False)


Column Name   sdtype datetime_format
  timestamp datetime            None
Without this specification, SDV may not be able to accurately parse the data. We recommend adding datetime formats using 'update_column'.
  Column Name   sdtype datetime_format
purchase_date datetime            None
Without this specification, SDV may not be able to accurately parse the data. We recommend adding datetime formats using 'update_column'.


# Step 3: Data Preprocessing

In [None]:
# Load Data
customer_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/customer_profiles.csv")
social_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/social_media.csv")
transaction_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/transactions.csv")

# Preprocess Customer Profiles
customer_df['gender'] = customer_df['gender'].map({'Male': 0, 'Female': 1})
customer_df = pd.get_dummies(customer_df, columns=['education', 'occupation'],dtype=int)

# Preprocess Social Media Sentiment
def analyze_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    return sia.polarity_scores(text)['compound']

social_df['sentiment_score'] = social_df['content'].apply(analyze_sentiment)
social_agg = social_df.groupby('customer_id')['sentiment_score'].mean().reset_index()

# Preprocess Transactions
transaction_agg = transaction_df.groupby('customer_id').agg({
    'amount': ['mean', 'sum'],
    'category': lambda x: x.mode()[0]
}).reset_index()
transaction_agg.columns = ['customer_id', 'avg_spend', 'total_spend', 'fav_category']

# Merge All Data
merged_df = pd.merge(customer_df, social_agg, on='customer_id')
merged_df = pd.merge(merged_df, transaction_agg, on='customer_id')

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
merged_df.to_csv("/content/drive/MyDrive/Hackathon2025/merged_df.csv", index=False)

In [None]:
# Load Data
customer_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/customer_profiles.csv")
social_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/social_media.csv")
transaction_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/transactions.csv")
merged_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/merged_df.csv")

# Step 4: Multi-modal feature engineering

In [None]:
# --- Optimizations ---
# 1. Use smaller model (distilbert)
# 2. Process in batches
# 3. Truncate text to 128 tokens
# 4. Use GPU if available

# Initialize
tokenizer = BertTokenizer.from_pretrained('distilbert-base-uncased')
model = BertModel.from_pretrained('distilbert-base-uncased')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Batch processing function
def get_bert_embeddings_batched(texts, batch_size=32):
    embeddings = []
    for i in tqdm(range(0, len(texts), batch_size)):
        batch = texts[i:i+batch_size]

        inputs = tokenizer(
            batch,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=128  # Reduced from 512
        ).to(device)

        with torch.no_grad():
            outputs = model(**inputs)

        batch_embeddings = outputs.last_hidden_state[:,0,:].cpu().numpy()
        embeddings.extend(batch_embeddings)

    return embeddings

# Apply to social media data
social_texts = social_df.groupby('customer_id')['content'].apply(
    lambda x: ' '.join(x)[:500]  # Truncate long texts
).reset_index()

# Process in batches (Colab-safe)
social_texts['embedding'] = get_bert_embeddings_batched(
    social_texts['content'].tolist(),
    batch_size=16  # Reduce if still crashing
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'DistilBertTokenizer'. 
The class this function is called from is 'BertTokenizer'.
You are using a model of type distilbert to instantiate a model of type bert. This is not supported for all configurations of models and can yield errors.
Some weights of BertModel were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['embeddings.

In [None]:
# Merge with other features
final_df = pd.merge(merged_df, social_texts[['customer_id', 'embedding']], on='customer_id')

In [None]:
final_df.head()

Unnamed: 0,customer_id,age,gender,location,interests,income,education_MBA,education_Masters,occupation_Engineer,occupation_Financial Advisor,sentiment_score,avg_spend,total_spend,fav_category,embedding
0,12308363,32,1,San Francisco,Tech Gadgets,125000,0,1,1,0,0.7605,11703.0,11703,Gucci,"[0.18504955, 2.1168249, 0.25780225, -0.6667464..."
1,14132394,32,1,New York,Tech Gadgets,94260,1,0,0,1,0.8519,7858.0,7858,IPhone,"[0.38649833, 2.087719, -0.070956424, -0.545133..."
2,14703890,32,1,New York,Tech Gadgets,75000,0,1,0,1,0.0,1299.0,1299,IPhone,"[0.038439203, 2.1994088, 0.23548208, -0.531982..."
3,5976098,32,1,San Francisco,Tech Gadgets,124829,1,0,1,0,-0.4939,10686.0,10686,IPhone,"[0.3470851, 2.2011657, 0.10435499, -0.6719307,..."
4,13251058,32,1,San Francisco,Tech Gadgets,125000,0,1,1,0,0.4215,1299.0,1299,IPhone,"[0.33744738, 2.093231, 0.24944262, -0.43225497..."


In [None]:
# Save the final_df
final_df.to_parquet("/content/drive/MyDrive/Hackathon2025/final_df.parquet")

In [None]:
# Load the final_df
final_df = pd.read_parquet("/content/drive/MyDrive/Hackathon2025/final_df.parquet")

#Step 5: Build RAG System

In [None]:
# ... [Keep previous imports] ...

# --- Improved FAISS Index Creation ---
# Add financial attributes to documents
documents = [
    f"""
    CUSTOMER PROFILE {row['customer_id']}:
    - Age: {row['age']}
    - Gender: {'Male' if row['gender'] == 0 else 'Female'}
    - Income: ${row['income']:,.0f}
    - Interests: {row['interests']}
    - Favorite Category: {row['fav_category']}
    - Avg Spend: ${row['avg_spend']:,.0f}
    - Sentiment: {row['sentiment_score']:.2f}
    """
    for _, row in final_df.iterrows()
]

# --- Better Embeddings Model ---
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"  # Better for financial text
)

# --- Build FAISS Index ---
faiss_index = FAISS.from_texts(documents, embeddings_model)
faiss_index.save_local("customer_index")

# --- Enhanced Prompt Template ---
from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE = """You are a financial advisor analyzing customer data. Use these customer profiles:

{context}

Recommend products for this customer:

### Profile:
{question}

### Format Requirements:
1. Start with "**Recommended Financial Products for Customer [ID]:**"
2. List 3 products with bullet points
3. Each product must include:
   - Pre-approved credit limit/terms
   - Key benefits
   - "Justification:" section linking to profile data
4. Include Sentiment-Driven Content Suggestions

Respond ONLY with the recommendation, no extra text:"""

QA_PROMPT = PromptTemplate(
    template=PROMPT_TEMPLATE,
    input_variables=["context", "question"]
)


  embeddings_model = HuggingFaceEmbeddings(


In [None]:
# --- Upgrade to Better LLM ---
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.2",
    device_map="auto"
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.3,
    do_sample=True
)

# --- Enhanced Chain ---
qa_chain = RetrievalQA.from_chain_type(
    llm=HuggingFacePipeline(pipeline=pipe),
    chain_type="stuff",
    retriever=faiss_index.as_retriever(search_kwargs={"k": 5}),  # Get more context
    chain_type_kwargs={"prompt": QA_PROMPT},
    return_source_documents=True
)

# --- Test Function ---
def get_recommendation(customer_id):
    customer = final_df[final_df['customer_id'] == customer_id].iloc[0]

    query = f"""
    Customer ID: {customer_id}
    - Age: {customer['age']}
    - Gender: {'Male' if customer['gender'] == 0 else 'Female'}
    - Income: ${customer['income']:,.0f}
    - Interests: {customer['interests']}
    - Favorite Category: {customer['fav_category']}
    - Avg Spend: ${customer['avg_spend']:,.0f}
    - Sentiment: {customer['sentiment_score']:.2f}
    """

    result = qa_chain.invoke({"query": query})
    return result["result"]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
type(model)

In [None]:
print(get_recommendation(198822))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


You are a financial advisor analyzing customer data. Use these customer profiles:


    CUSTOMER PROFILE 9091918:
    - Age: 32
    - Gender: Male
    - Income: $125,000
    - Interests: Tech Gadgets
    - Favorite Category: Gucci
    - Avg Spend: $1,299
    - Sentiment: 0.91
    


    CUSTOMER PROFILE 8962039:
    - Age: 38
    - Gender: Male
    - Income: $125,000
    - Interests: Tech Gadgets
    - Favorite Category: Gucci
    - Avg Spend: $3,968
    - Sentiment: 0.10
    


    CUSTOMER PROFILE 14929039:
    - Age: 44
    - Gender: Female
    - Income: $125,000
    - Interests: Tech Gadgets
    - Favorite Category: Gucci
    - Avg Spend: $24,119
    - Sentiment: 0.87
    


    CUSTOMER PROFILE 13918195:
    - Age: 32
    - Gender: Female
    - Income: $125,000
    - Interests: Tech Gadgets
    - Favorite Category: Gucci
    - Avg Spend: $4,113
    - Sentiment: 0.42
    


    CUSTOMER PROFILE 16483485:
    - Age: 32
    - Gender: Female
    - Income: $125,000
    - Interests: Tec

In [None]:
# app.py
import streamlit as st

st.title("Hyper-Personalized Recommendations")
customer_id = st.text_input("Enter Customer ID")

if customer_id:
    recommendation = get_recommendation(int(customer_id))
    st.write("Recommended Product:", recommendation)

In [None]:
# from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS  # <-- Changed import
from transformers import AutoTokenizer, pipeline
import torch

# --- Create FAISS Index PROPERLY ---
# Convert DataFrame entries to documents
documents = [
    f"Customer {row['customer_id']}: {row['age']}-year-old {row['gender']}, Income ${row['income']}, Interests {row['interests']}"
    for _, row in final_df.iterrows()
]

# Create FAISS index with embeddings
embeddings_model = HuggingFaceEmbeddings(model_name="distilbert-base-uncased")
faiss_index = FAISS.from_embeddings(
    text_embeddings=list(zip(documents, final_df['embedding'].tolist())),
    embedding=embeddings_model
)

# Save PROPERLY as a directory
faiss_index.save_local("customer_index")  # Creates a folder with index files

# --- Build RAG Chain ---
# Load from directory
faiss_index = FAISS.load_local(
    "customer_index",  # Folder name
    embeddings_model,
    allow_dangerous_deserialization=True
)

# --- Initialize Hugging Face Pipeline ---
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = pipeline(
    "text-generation",
    model="gpt2",
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,  # Use GPU if available,
    max_length=400
)

# --- Wrap Hugging Face pipeline as a Runnable ---
llm = HuggingFacePipeline(pipeline=model)  # Wrap the Hugging Face pipeline in a Runnable

# --- Define Retriever ---
retriever = faiss_index.as_retriever(search_kwargs={"k": 3})

# --- Build the RetrievalQA chain ---
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,  # Using the wrapped Hugging Face pipeline as a Runnable
    chain_type="stuff",
    retriever=retriever
)

# --- Test Recommendation ---
def get_recommendation(customer_id):
    customer_data = final_df[final_df['customer_id'] == customer_id].iloc[0]
    query = f"""
    Customer Profile: {customer_data['age']}-year-old {customer_data['gender']},
    Income: ${customer_data['income']}, Interests: {customer_data['interests']}.\n
    Transactions: Favorite category: {customer_data['fav_category']}, Avg Spend: ${customer_data['avg_spend']}.\n
    Social Sentiment: {customer_data['sentiment_score']}.\n
    Recommend a personalized financial product:
    """
    output = qa_chain.invoke(query, max_new_tokens=50)
    return output['result']

# Example usage:
print(get_recommendation(198822))  # Replace with actual customer ID


Device set to use cpu
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Customer 11074386: 44-year-old 0, Income $75000, Interests Luxury Shopping and Travel

Customer 13705673: 32-year-old 1, Income $125000, Interests Tech Gadgets

Customer 12427786: 32-year-old 0, Income $125000, Interests Tech Gadgets

Question: 
    Customer Profile: 38-year-old 1,
    Income: $125000, Interests: Tech Gadgets.

    Transactions: Favorite category: Gucci, Avg Spend: $2895.0.

    Social Sentiment: 0.8271.

    Recommend a personalized financial product:
    
Helpful Answer: It appears you are interested in different categories of smart phones, watches, etc. This doesn't appear to be correct. It would be nice to know your question in detail so that we can correct it before answering it.

Question:    

Message sent: 38-year-old 2, Income: $100,000, Interests: $900, and Tech Gadgets.


Response:


You are a bus

# Approach-2

In [None]:
# #!pip install streamlit faiss-cpu aif360 scikit-surprise
!pip install streamlit aif360 sentence-transformers faiss-cpu torch transformers
!pip install xgboost
!pip uninstall flash-attn -y
!pip install bitsandbytes accelerate

Collecting streamlit
  Downloading streamlit-1.44.0-py3-none-any.whl.metadata (8.9 kB)
Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_

^C
^C
^C


In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import faiss
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
from sklearn.model_selection import train_test_split
#from surprise import SVD, Dataset, Reader
from sentence_transformers import SentenceTransformer
import torch
from google.colab import drive
drive.mount('/content/drive')

pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)

from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error, mean_absolute_error

# --------------------------
# 1. Optimized Data Loading
# --------------------------
def load_data():
    # Same as original but with optimized data types
    customer_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/customer_profiles.csv")
    social_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/social_media.csv")
    transactions_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/transactions.csv")

    # Optimized aggregations
    social_agg = social_df.groupby('customer_id').agg(
        sentiment_score=('sentiment_score', 'mean'),
        content=('content', lambda x: ' '.join(x.astype(str)))
    )

    transaction_agg = transactions_df.groupby('customer_id').agg(
        avg_spend=('amount', 'mean'),
        total_spend=('amount', 'sum'),
        fav_category=('category', lambda x: x.mode()[0])
    )

    # Merge with optimized data types
    merged_df = customer_df.merge(social_agg, on='customer_id', how='left')
    merged_df = merged_df.merge(transaction_agg, on='customer_id', how='left')
    merged_df['content'] = merged_df['content'].fillna('')

    # Batch processing with GPU acceleration
    if torch.cuda.is_available():
        model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cuda')
    else:
        model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')

    merged_df['embedding'] = model.encode(
        merged_df['content'].tolist(),
        batch_size=128,
        convert_to_numpy=True,
        normalize_embeddings=True
    ).tolist()

    return merged_df

# --------------------------
# 2. Optimized RAG System
# --------------------------
def setup_rag_system():
    # 4-bit quantization only (remove flash-attention)
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True
    )

    # Modified model loading without flash attention
    model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        device_map="auto",
        quantization_config=quantization_config,
        torch_dtype=torch.float16
    )

    tokenizer = AutoTokenizer.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        padding_side="left"
    )
    tokenizer.pad_token = tokenizer.eos_token

    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto",
        max_new_tokens=256,
        temperature=0.3,
        do_sample=True,
        top_p=0.9
    )

# --------------------------
# 3. Optimized Recommendation Generation
# --------------------------
def generate_recommendation(_pipe, customer_data):
    prompt = f"""<s>[INST] As a financial advisor, analyze:
    - Age: {customer_data['age']}
    - Income: ${customer_data['income']}
    - Recent Spend: ${customer_data['avg_spend']}
    - Interests: {customer_data['interests']}
    - Social Sentiment: {customer_data['sentiment_score']:.2f}

    Recommend 3 financial products and business strategies. Be concise. [/INST]"""

    # Generate with optimized parameters
    response = _pipe(
        prompt,
        num_return_sequences=1,
        repetition_penalty=1.2,
        early_stopping=True
    )[0]['generated_text']

    return response.split("[/INST]")[-1].strip()

# --------------------------
# 3. Ethical Checks
# --------------------------
def check_bias(df):
    """Check model fairness metrics with empty group handling."""
    # Create a copy to avoid modifying original data
    df = df.copy()

    # 1. Gender Encoding
    df['gender'] = df['gender'].map({'Female': 0, 'Male': 1, 'Other': -1})
    df = df[df['gender'] != -1]  # Remove "Other" gender for binary analysis

    # 2. Income Binning with Quantiles (25th/75th percentiles)
    try:
        df['income_bin'] = pd.qcut(
            df['income'],
            q=[0, 0.25, 1.0],
            labels=[0, 1]  # 0=Bottom 25%, 1=Top 75%
        ).astype(int)
    except ValueError:
        # Fallback to median if quantile binning fails
        income_median = df['income'].median()
        df['income_bin'] = (df['income'] > income_median).astype(int)

    # 3. Simulate Binary Predictions (Replace with real predictions if available)
    np.random.seed(42)
    df['prediction'] = np.random.randint(0, 2, size=len(df))

    # 4. Create Dataset
    dataset = BinaryLabelDataset(
        df=df[['gender', 'income_bin', 'prediction']],
        label_names=['prediction'],
        protected_attribute_names=['gender', 'income_bin']
    )

    # 5. Calculate Metrics with Group Checks
    metrics = {}

    # Gender Impact (Requires both genders present)
    gender_counts = df['gender'].value_counts()
    if 0 in gender_counts and 1 in gender_counts and len(gender_counts) >= 2:
        metrics['gender_impact'] = ClassificationMetric(
            dataset, dataset,
            unprivileged_groups=[{'gender': 0}],
            privileged_groups=[{'gender': 1}]
        ).disparate_impact()
    else:
        metrics['gender_impact'] = np.nan

    # Income Fairness (Requires both income groups)
    income_counts = df['income_bin'].value_counts()
    if 0 in income_counts and 1 in income_counts and len(income_counts) >= 2:
        metrics['income_fairness'] = ClassificationMetric(
            dataset, dataset,
            unprivileged_groups=[{'income_bin': 0}],
            privileged_groups=[{'income_bin': 1}]
        ).statistical_parity_difference()
    else:
        metrics['income_fairness'] = np.nan

    # 6. Add Debug Info
    print("\nBias Check Diagnostics:")
    print(f"Gender distribution:\n{gender_counts.to_dict()}")
    print(f"Income distribution:\n{income_counts.to_dict()}")

    return metrics

# --------------------------
# Benchmarking
# --------------------------
def collaborative_filtering_benchmark(df):
    # Create user-item matrix
    user_item = df.pivot_table(
        index='customer_id',
        columns='fav_category',
        values='avg_spend',
        fill_value=0
    )

    # Matrix factorization
    model = NMF(n_components=10)
    W = model.fit_transform(user_item)
    H = model.components_

    # Calculate reconstruction error
    reconstructed = np.dot(W, H)
    rmse = np.sqrt(mean_squared_error(user_item, reconstructed))

    return rmse

def run_benchmarking(df):
    # Collaborative Filtering with NMF
    cf_rmse = collaborative_filtering_benchmark(df)

    # XGBoost Regression
    X = pd.get_dummies(df.drop(['customer_id', 'embedding'], axis=1))
    y = df['avg_spend']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    from xgboost import XGBRegressor
    xgb = XGBRegressor().fit(X_train, y_train)
    xgb_mae = mean_absolute_error(y_test, xgb.predict(X_test))

    return {
        'collaborative_filtering_rmse': cf_rmse,
        'xgboost_mae': xgb_mae
    }



In [None]:
def main():
    # Load Data
    df = load_data()

    # Setup RAG System (Recommendation System)
    llm_pipe = setup_rag_system()  # This will show faster loading

    # Test with a customer ID
    customer_id = df['customer_id'].iloc[0]
    customer_record = df[df['customer_id'] == customer_id].iloc[0]

    print(f'{{"customer_id": {customer_record["customer_id"]}, "age": {customer_record["age"]} ,  "avg_spend": {customer_record["avg_spend"]} "interests": {customer_record["interests"]}}} }}')

    # Generate Recommendation - Now should be 2-5x faster
    customer_data = df[df['customer_id'] == customer_id].iloc[0]
    recs = generate_recommendation(llm_pipe, customer_data)

    # Rest remains same
    print("AI Recommendations:")
    print(recs)

    # Check Ethical Biases
    bias_metrics = check_bias(df)
    print("\nEthical Bias Metrics:")
    print(f"Gender Impact Ratio: {bias_metrics['gender_impact']:.2f}")
    print(f"Income Fairness Gap: {bias_metrics['income_fairness']:.2f}")

    # Run Benchmarking
    benchmarks = run_benchmarking(df)
    print("\nBenchmarking Results:")
    print(f"Collaborative Filtering RMSE: {benchmarks['collaborative_filtering_rmse']:.2f}")
    print(f"XGBoost MAE: {benchmarks['xgboost_mae']:.2f}")

main()

# df = load_data()

# # Setup RAG System (Recommendation System)
# llm_pipe = setup_rag_system()  # This will show faster loading
# # Test with a customer ID
# customer_id = df['customer_id'].iloc[0]
# customer_record = df[df['customer_id'] == customer_id].iloc[0]
# print(f'{{"customer_id": {customer_record["customer_id"]}, ...}}')  # Same print

# # Generate Recommendation - Now should be 2-5x faster
# customer_data = df[df['customer_id'] == customer_id].iloc[0]
# recs = generate_recommendation(llm_pipe, customer_data)

# # Rest remains same
# print("AI Recommendations:")
# print(recs)

# # Check Ethical Biases
# bias_metrics = check_bias(df)
# print("\nEthical Bias Metrics:")
# print(f"Gender Impact Ratio: {bias_metrics['gender_impact']:.2f}")
# print(f"Income Fairness Gap: {bias_metrics['income_fairness']:.2f}")
# # Run Benchmarking
# benchmarks = run_benchmarking(df)
# print("\nBenchmarking Results:")
# print(f"Collaborative Filtering RMSE: {benchmarks['collaborative_filtering_rmse']:.2f}")
# print(f"XGBoost MAE: {benchmarks['xgboost_mae']:.2f}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


{"customer_id": 12308363, "age": 32 ,  "avg_spend": 11703.0 "interests": Tech Gadgets} }
AI Recommendations:
Based on the given information, here are three financial product recommendations for this individual:

1. High-Yield Savings Account: With an income of $125,000 and recent spend of $11,703, it's important to ensure that emergency funds are in place. A high-yield savings account can help earn interest on savings while keeping them easily accessible.

2. Individual Retirement Account (IRA): Given the age of 32, retirement may seem far off but starting early is crucial for building a substantial nest egg. An IRA offers tax advantages and various investment options tailored to different risk profiles.

3. Technology Equipment Protection Plan: Considering their stated interest in tech gadgets, investing in a protection plan could provide peace of mind against potential damage or theft. This might include extended warranties, accidental damage coverage, or even insurance policies spec

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd
import numpy as np
import torch
from transformers import (
    pipeline,
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig
)
from sentence_transformers import SentenceTransformer
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
from sklearn.model_selection import train_test_split
from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error, mean_absolute_error
from xgboost import XGBRegressor
import html

# --------------------------
# Data Loading & Processing
# --------------------------
@st.cache_data
def load_data():
    customer_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/customer_profiles.csv")
    social_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/social_media.csv")
    transactions_df = pd.read_csv("/content/drive/MyDrive/Hackathon2025/transactions.csv")

    # Data processing steps
    social_agg = social_df.groupby('customer_id').agg(
        sentiment_score=('sentiment_score', 'mean'),
        content=('content', lambda x: ' '.join(x.astype(str))))

    transaction_agg = transactions_df.groupby('customer_id').agg(
        avg_spend=('amount', 'mean'),
        total_spend=('amount', 'sum'),
        fav_category=('category', lambda x: x.mode()[0]))

    merged_df = pd.merge(customer_df, social_agg, on='customer_id', how='left')
    merged_df = pd.merge(merged_df, transaction_agg, on='customer_id', how='left')
    merged_df['content'] = merged_df['content'].fillna('')

    # Embedding generation
    model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    merged_df['embedding'] = model.encode(
        merged_df['content'].tolist(),
        batch_size=128,
        convert_to_numpy=True
    ).tolist()

    return merged_df

# --------------------------
# AI Recommendation System
# --------------------------
@st.cache_resource
def load_llm():
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True)

    model = AutoModelForCausalLM.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        device_map="auto",
        quantization_config=quantization_config,
        torch_dtype=torch.float16)

    tokenizer = AutoTokenizer.from_pretrained(
        "mistralai/Mistral-7B-Instruct-v0.2",
        padding_side="left")
    tokenizer.pad_token = tokenizer.eos_token

    return pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device_map="auto",
        max_new_tokens=256,
        temperature=0.3)

def generate_recommendation(_pipe, customer_data):
    prompt = f"""<s>[INST] As a financial advisor, analyze:
    - Age: {customer_data['age']}
    - Income: ${customer_data['income']}
    - Recent Spend: ${customer_data['avg_spend']}
    - Interests: {customer_data['interests']}
    - Social Sentiment: {customer_data['sentiment_score']:.2f}

    Recommend 3 financial products and business strategies. Be concise. [/INST]"""

    response = _pipe(
        prompt,
        num_return_sequences=1,
        repetition_penalty=1.2)[0]['generated_text']

    return response.split("[/INST]")[-1].strip()

# --------------------------
# Ethical Checks
# --------------------------
def check_bias(df):
    df = df.copy()
    df['gender'] = df['gender'].map({'Female': 0, 'Male': 1, 'Other': -1})
    df = df[df['gender'] != -1]

    try:
        df['income_bin'] = pd.qcut(df['income'], q=[0, 0.25, 1.0], labels=[0, 1]).astype(int)
    except ValueError:
        df['income_bin'] = (df['income'] > df['income'].median()).astype(int)

    np.random.seed(42)
    df['prediction'] = np.random.randint(0, 2, size=len(df))

    dataset = BinaryLabelDataset(
        df=df[['gender', 'income_bin', 'prediction']],
        label_names=['prediction'],
        protected_attribute_names=['gender', 'income_bin'])

    metrics = {}
    gender_counts = df['gender'].value_counts()
    if 0 in gender_counts and 1 in gender_counts:
        metrics['gender_impact'] = ClassificationMetric(
            dataset, dataset,
            unprivileged_groups=[{'gender': 0}],
            privileged_groups=[{'gender': 1}]).disparate_impact()
    else:
        metrics['gender_impact'] = np.nan

    income_counts = df['income_bin'].value_counts()
    if 0 in income_counts and 1 in income_counts:
        metrics['income_fairness'] = ClassificationMetric(
            dataset, dataset,
            unprivileged_groups=[{'income_bin': 0}],
            privileged_groups=[{'income_bin': 1}]).statistical_parity_difference()
    else:
        metrics['income_fairness'] = np.nan

    return metrics

# --------------------------
# Benchmarking
# --------------------------
def run_benchmarking(df):
    # Collaborative Filtering
    user_item = df.pivot_table(index='customer_id', columns='fav_category',
                              values='avg_spend', fill_value=0)
    model = NMF(n_components=10)
    W = model.fit_transform(user_item)
    reconstructed = np.dot(W, model.components_)
    cf_rmse = np.sqrt(mean_squared_error(user_item, reconstructed))

    # XGBoost
    X = pd.get_dummies(df.drop(['customer_id', 'embedding'], axis=1))
    y = df['avg_spend']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    xgb = XGBRegressor().fit(X_train, y_train)
    xgb_mae = mean_absolute_error(y_test, xgb.predict(X_test))

    return {'cf_rmse': cf_rmse, 'xgb_mae': xgb_mae}

# --------------------------
# Streamlit UI
# --------------------------
def main():
    st.set_page_config(page_title="Financial Advisor AI", layout="wide")
    st.title("üí∞ AI Financial Advisor")

    # Data Loading
    with st.spinner("Loading customer data..."):
        df = load_data()

    # Sidebar Controls
    st.sidebar.header("Customer Selection")
    customer_id = st.sidebar.selectbox("Select Customer", df['customer_id'].unique())
    customer_data = df[df['customer_id'] == customer_id].iloc[0]

    # Main Content
    col1, col2 = st.columns([1, 2])

    with col1:
        st.header("üë§ Customer Profile")
        st.json({
            "Age": customer_data['age'],
            "Income": f"${customer_data['income']:,.2f}",
            "Avg Spend": f"${customer_data['avg_spend']:,.2f}",
            "Favorite Category": customer_data['fav_category'],
            "Social Sentiment": f"{customer_data['sentiment_score']:.1f}/5.0"
        })

        if st.button("Generate Recommendations üí°"):
            with st.spinner("Analyzing financial profile..."):
                llm_pipe = load_llm()
                recs = generate_recommendation(llm_pipe, customer_data)

            st.subheader("AI Recommendations")
            safe_recs = html.escape(recs).replace('\n', '<br>')
            st.markdown(f"""
            <div style="
                background: #f8f9fa;
                padding: 20px;
                border-radius: 10px;
                margin-top: 20px;
            ">
                {safe_recs}
            </div>
            """, unsafe_allow_html=True)

    with col2:
        st.header("üìä System Analytics")

        tab1, tab2 = st.tabs(["Ethical Metrics", "Performance"])

        with tab1:
            st.subheader("ü§ñ AI Fairness Report")
            bias_metrics = check_bias(df)

            col1, col2 = st.columns(2)
            with col1:
                st.metric("Gender Impact Ratio",
                          f"{bias_metrics['gender_impact']:.2f}",
                          help="1.0 = Perfect fairness, <1.0 = Bias against women")

            with col2:
                st.metric("Income Fairness Gap",
                          f"{bias_metrics['income_fairness']:.2f}",
                          help="0 = Perfect fairness, Negative values indicate low-income bias")

        with tab2:
            st.subheader("‚öôÔ∏è System Performance")
            benchmarks = run_benchmarking(df)

            st.metric("Recommendation Accuracy (RMSE)",
                      f"{benchmarks['cf_rmse']:.2f}",
                      help="Lower values indicate better performance")

            st.metric("Spending Prediction Error (MAE)",
                      f"{benchmarks['xgb_mae']:.2f}",
                      help="Lower values indicate better predictions")

            st.progress(0.85, text="Overall System Accuracy")

if __name__ == "__main__":
    main()

Writing app.py


In [None]:
# %%writefile app.py
# import streamlit as st
# import pandas as pd
# import numpy as np
# import faiss
# from aif360.datasets import BinaryLabelDataset
# from aif360.metrics import ClassificationMetric
# from sklearn.model_selection import train_test_split
# from sentence_transformers import SentenceTransformer
# import torch
# from transformers import (
#     pipeline,
#     AutoTokenizer,
#     AutoModelForCausalLM,
#     BitsAndBytesConfig
# )
# from sklearn.decomposition import NMF
# from sklearn.metrics import mean_squared_error, mean_absolute_error
# from xgboost import XGBRegressor

# # --------------------------
# # 1. Optimized Data Loading
# # --------------------------
# @st.cache_data
# def load_data(uploaded_files):
#     # Load datasets from uploaded files
#     customer_df = pd.read_csv(uploaded_files['customer_profiles'])
#     social_df = pd.read_csv(uploaded_files['social_media'])
#     transactions_df = pd.read_csv(uploaded_files['transactions'])

#     # Optimized aggregations
#     social_agg = social_df.groupby('customer_id').agg(sentiment_score=('sentiment_score', 'mean'),
#         content=('content', lambda x: ' '.join(x.astype(str)))
#     )

#     transaction_agg = transactions_df.groupby('customer_id').agg(avg_spend=('amount', 'mean'),
#         total_spend=('amount', 'sum'),
#         fav_category=('category', lambda x: x.mode()[0])
#     )

#     # Merge with optimized data types
#     merged_df = customer_df.merge(social_agg, on='customer_id', how='left')
#     merged_df = merged_df.merge(transaction_agg, on='customer_id', how='left')
#     merged_df['content'] = merged_df['content'].fillna('')

#     # Batch processing with GPU acceleration
#     model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2', device='cuda' if torch.cuda.is_available() else 'cpu')
#     merged_df['embedding'] = model.encode(
#         merged_df['content'].tolist(),
#         batch_size=128,
#         convert_to_numpy=True,
#         normalize_embeddings=True
#     ).tolist()

#     return merged_df

# # --------------------------
# # 2. Optimized RAG System
# # --------------------------
# @st.cache_resource
# def setup_rag_system():
#     quantization_config = BitsAndBytesConfig(
#         load_in_4bit=True,
#         bnb_4bit_compute_dtype=torch.float16,
#         bnb_4bit_quant_type="nf4",
#         bnb_4bit_use_double_quant=True
#     )

#     model = AutoModelForCausalLM.from_pretrained(
#         "mistralai/Mistral-7B-Instruct-v0.2",
#         device_map="auto",
#         quantization_config=quantization_config,
#         torch_dtype=torch.float16
#     )

#     tokenizer = AutoTokenizer.from_pretrained(
#         "mistralai/Mistral-7B-Instruct-v0.2",
#         padding_side="left"
#     )
#     tokenizer.pad_token = tokenizer.eos_token

#     return pipeline(
#         "text-generation",
#         model=model,
#         tokenizer=tokenizer,
#         device_map="auto",
#         max_new_tokens=256,
#         temperature=0.3,
#         do_sample=True,
#         top_p=0.9
#     )

# # --------------------------
# # 3. Optimized Recommendation Generation
# # --------------------------
# def generate_recommendation(_pipe, customer_data):
#     prompt = f"""<s>[INST] As a financial advisor, analyze:
#     - Age: {customer_data['age']}
#     - Income: ${customer_data['income']}
#     - Recent Spend: ${customer_data['avg_spend']}
#     - Interests: {customer_data['interests']}
#     - Social Sentiment: {customer_data['sentiment_score']:.2f}

#     Recommend 3 financial products and business strategies. Be concise. [/INST]"""

#     # Generate with optimized parameters
#     response = _pipe(
#         prompt,
#         num_return_sequences=1,
#         repetition_penalty=1.2,
#         early_stopping=True
#     )[0]['generated_text']

#     return response.split("[/INST]")[-1].strip()

# # --------------------------
# # Main Streamlit App
# # --------------------------
# def main():
#     st.title('Customer Insights and AI-Driven Recommendations')

#     # Upload files via Streamlit
#     uploaded_files = {}
#     uploaded_files['customer_profiles'] = st.file_uploader("Upload Customer Profiles CSV", type="csv")
#     uploaded_files['social_media'] = st.file_uploader("Upload Social Media Data CSV", type="csv")
#     uploaded_files['transactions'] = st.file_uploader("Upload Transactions Data CSV", type="csv")

#     if uploaded_files['customer_profiles'] is not None and uploaded_files['social_media'] is not None and uploaded_files['transactions'] is not None:
#         with st.spinner('Loading and processing data...'):
#             df = load_data(uploaded_files)

#         # Sidebar for selecting a customer record
#         st.sidebar.header('Select a Customer')
#         customer_id = st.sidebar.selectbox('Choose Customer ID', df['customer_id'].unique())

#         customer_data = df[df['customer_id'] == customer_id].iloc[0]
#         st.write(f"**Customer ID:** {customer_data['customer_id']}")
#         st.write(f"**Age:** {customer_data['age']}")
#         st.write(f"**Average Spend:** ${customer_data['avg_spend']}")
#         st.write(f"**Interests:** {customer_data['interests']}")

#         # Setup RAG System
#         llm_pipe = setup_rag_system()

#         # Generate Recommendation
#         st.subheader('AI Recommendations')
#         recs = generate_recommendation(llm_pipe, customer_data)
#         st.write(recs)

#         # Run Benchmarking
#         st.subheader('Benchmarking Results')
#         cf_rmse, xgb_mae = run_benchmarking(df)
#         st.write(f"Collaborative Filtering RMSE: {cf_rmse:.2f}")
#         st.write(f"XGBoost MAE: {xgb_mae:.2f}")

#         # Ethical Checks
#         st.subheader('Ethical Bias Check')
#         bias_metrics = check_bias(df)
#         st.write(f"Gender Impact Ratio: {bias_metrics['gender_impact']:.2f}")
#         st.write(f"Income Fairness Gap: {bias_metrics['income_fairness']:.2f}")

# # Run Streamlit app
# if __name__ == "__main__":
#     main()


Overwriting app.py


In [None]:
!npm install localtunnel
!streamlit run app.py &>/content/logs.txt &
!npx localtunnel --port 8501 --subdomain myfinanceapp --secure-password SecurePass123

[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K‚†ß[1G[0K‚†á[1G[0K‚†è[1G[0K‚†ã[1G[0K‚†ô[1G[0K‚†π[1G[0K‚†∏[1G[0K‚†º[1G[0K‚†¥[1G[0K‚†¶[1G[0K
added 22 packages in 3s
[1G[0K‚†¶[1G[0K
[1G[0K‚†¶[1G[0K3 packages are looking for funding
[1G[0K‚†¶[1G[0K  run `npm fund` for details
[1G[0K‚†¶[1G[0K[1G[0K‚†ô[1G[0Kyour url is: https://myfinanceapp.loca.lt
/content/node_modules/localtunnel/bin/lt.js:81
    throw err;
    ^

Error: connection refused: localtunnel.me:6167 (check your firewall settings)
    at Socket.<anonymous> [90m(/content/[39mnode_modules/[4mlocaltunnel[24m/lib/TunnelCluster.js:52:11[90m)[39m
[90m    at Socket.emit (node:events:517:28)[39m
[90m    at emitErrorNT (node:internal/streams/destroy:151:8)[39m
[90m    at emitErrorCloseNT (node:internal/streams/destroy:116:3)[39m
[90m    at process.proce

In [None]:
!curl https://loca.lt/mytunnelpassword

34.145.67.184

In [None]:
df = load_data()
#df[df['customer_id'] == 12308363]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]