<a href="https://colab.research.google.com/github/antonychackotc/final-project/blob/main/all_nlp_chatbots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Chatbot for all - [ Sentiment, Transalation, Summarization, Q & A ]**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q streamlit
!pip install -q localtunnel
!pip install -q pyngrok
!pip install faiss-cpu
!pip install sentence-transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m55.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: Could not find a version that satisfies the requirement localtunnel (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for localtunnel[0m[31m
[0mCollecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pack

In [3]:
%%writefile app1.py
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from transformers import MarianMTModel, MarianTokenizer, BartForConditionalGeneration, BartTokenizer
import torch

# Load models
@st.cache_resource
def load_models():
    rf_classifier = joblib.load("/content/drive/MyDrive/random_forest_model.pkl")
    bert_model = SentenceTransformer('bert-base-uncased')
    svd = joblib.load("/content/drive/MyDrive/twitter_real_svd50D_model.pkl")

    translation_model_path = "/content/drive/MyDrive/saved_model"
    translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_path)
    translation_model = MarianMTModel.from_pretrained(translation_model_path)

    summarization_model_path = "/content/drive/MyDrive/BART_finetuned_model"
    summarization_tokenizer = BartTokenizer.from_pretrained(summarization_model_path)
    summarization_model = BartForConditionalGeneration.from_pretrained(summarization_model_path)

    return rf_classifier, bert_model, svd, translation_model, translation_tokenizer, summarization_model, summarization_tokenizer

rf_classifier, bert_model, svd, translation_model, translation_tokenizer, summarization_model, summarization_tokenizer = load_models()

# Function to get BERT embedding
def get_bert_embedding(sentence):
    return bert_model.encode(sentence)

# Function for translation
def translate(text):
    inputs = translation_tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
    output = translation_model.generate(**inputs)
    return translation_tokenizer.decode(output[0], skip_special_tokens=True)

# Function for summarization
def summarize(text):
    inputs = summarization_tokenizer(text, return_tensors="pt", truncation=True, max_length=64)
    summary_ids = summarization_model.generate(**inputs, max_length=16)
    return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Load dataset
df = pd.read_csv("updated_insurance_data.csv")

# Initialize BERT model for FAISS indexing
faiss_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

# Convert dataset into a text-based knowledge base
def create_knowledge_base(df):
    knowledge_base = []
    for _, row in df.iterrows():
        answer = (
            f"Policy ID: {row['Policy_ID']}, Customer Age: {row['Customer_Age']}, Gender: {row['Gender']}, "
            f"Policy Type: {row['Policy_Type']}, Annual Income: {row['Annual_Income']}, Vehicle Age: {row['Vehicle_Property_Age']}, "
            f"Claim History: {row['Claim_History']}, Premium Amount: {row['Premium_Amount']}, Claim Amount: {row['Claim_Amount']}, "
            f"Risk Score: {row['Risk_Score']}, Fraudulent Claim: {row['Fraudulent_Claim']}, Policy Start: {row['Policy_Start_Date']}, "
            f"Policy Expiry: {row['Policy_Expiry_Date']}, Deductible: {row['Deductible_Amount']}, Coverage Limit: {row['Coverage_Limit']}, "
            f"Claim Status: {row['Claim_Status']}, Reimbursement Time: {row['Reimbursement_Time']} days."
        )
        knowledge_base.append(answer)
    return knowledge_base

# Generate text data from dataset
knowledge_base = create_knowledge_base(df)

# Convert to embeddings
corpus_embeddings = faiss_model.encode(knowledge_base, convert_to_numpy=True)

# Create FAISS index
embedding_dim = corpus_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(np.array(corpus_embeddings))

# Save FAISS index
faiss.write_index(index, "faiss_insurance.index")

# Streamlit UI
st.title("🛠️ Multi-Function NLP App")
tabs = st.tabs(["Sentiment Prediction", "English to French Translation", "Text Summarization", "Insurance Q & A"])

# Sentiment Prediction Tab
with tabs[0]:
    st.subheader("📊 Sentiment Prediction")
    user_sentence = st.text_area("Enter your text:", "This insurance policy is very helpful.")
    if st.button("Predict Sentiment"):
        if user_sentence.strip():
            bert_embedding = get_bert_embedding(user_sentence)
            bert_embedding_df = pd.DataFrame([bert_embedding])
            bert_embedding_reduced = svd.transform(bert_embedding_df)
            new_data = pd.DataFrame(bert_embedding_reduced)
            predicted_class = rf_classifier.predict(new_data)[0]
            class_labels = {0: "Negative", 1: "Neutral", 2: "Positive"}
            st.success(f"### Predicted Sentiment: {class_labels[predicted_class]} 🎯")
        else:
            st.warning("Please enter a sentence before predicting.")

# Translation Tab
with tabs[1]:
    st.subheader("🌍 English to French Translator")
    text = st.text_area("Enter text in English:")
    if st.button("Translate"):
        if text.strip():
            translation = translate(text)
            st.success(f"✅ Translation: {translation}")
        else:
            st.warning("⚠️ Please enter some text.")

# Summarization Tab
with tabs[2]:
    st.subheader("📄 Text Summarization")
    input_text = st.text_area("Enter text to summarize:")
    if st.button("Generate Summary"):
        if input_text.strip():
            summary = summarize(input_text)
            st.subheader("Generated Summary:")
            st.write(summary)
        else:
            st.warning("Please enter text to summarize.")

# Knowledge Base Tab
with tabs[3]:
    st.subheader("📚 Insurance Q & A")
    st.write("FAISS index created and saved for insurance-related queries.")


Writing app1.py


In [4]:
from pyngrok import ngrok

# Replace 'YOUR_AUTHTOKEN' with your actual ngrok authtoken
ngrok.set_auth_token("2t49imFFYgswbiILhxmRavW6AlI_5a5SfwjpV9f29CtGGGYiu")

# Run the Streamlit app in the background
!streamlit run app1.py &>/dev/null&

# Create a public URL using ngrok
try:
    public_url = ngrok.connect(8501)
    print(f"Streamlit app is running at {public_url}")
except Exception as e:
    print(f"Error: {e}")
    print("Trying to run with localtunnel")
    !streamlit run app1.py &>/content/logs.txt & npx localtunnel --port 8501

Streamlit app is running at NgrokTunnel: "https://1b59-35-201-156-148.ngrok-free.app" -> "http://localhost:8501"


In [5]:
ipv4 = !curl ipv4.icanhazip.com
ipv4

['35.201.156.148']

In [6]:
# !streamlit run app1.py  &>/content/logs.txt & npx localtunnel --port 8501