<a href="https://colab.research.google.com/github/arielcintra/smart_bot_boy/blob/main/smart_boy_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
pip install sentence-transformers flask pymongo requests beautifulsoup4



In [6]:
import os
import json
from flask import Flask, request, render_template
from sentence_transformers import SentenceTransformer, util
from pymongo import MongoClient
from werkzeug.utils import secure_filename
import docx
import xlrd
import csv
import requests
from bs4 import BeautifulSoup

In [None]:
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

In [None]:
client = MongoClient("mongodb://localhost:27017/")

In [None]:
db = client['chatbot_db']

In [None]:
document_collection = db['documents']

In [None]:
ALLOWED_EXTENSIONS = {'txt', 'pdf', 'docx', 'xlsx', 'csv'}

In [None]:
def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

In [None]:
def extract_text_from_file(file):
    extension = file.filename.rsplit('.', 1)[1].lower()
    text = ""
    if extension == 'txt':
        text = file.read().decode("utf-8")
    elif extension == 'docx':
        doc = docx.Document(file)
        for para in doc.paragraphs:
            text += para.text + "\n"
    elif extension == 'xlsx':
        workbook = xlrd.open_workbook(file_contents=file.read())
        sheet = workbook.sheet_by_index(0)
        for row in range(sheet.nrows):
            text += " ".join(str(sheet.cell(row, col).value) for col in range(sheet.ncols)) + "\n"
    elif extension == 'csv':
        reader = csv.reader(file.read().decode('utf-8').splitlines())
        for row in reader:
            text += " ".join(row) + "\n"
    return text

In [None]:
def store_document(text):
    embedding = model.encode(text, convert_to_tensor=True)
    document_collection.insert_one({
        "text": text,
        "embedding": embedding.tolist()
    })

In [None]:
def store_link_content(url):
    text = extract_text_from_link(url)
    if text and len(text) > 0:
        embedding = model.encode(text, convert_to_tensor=True)
        document_collection.insert_one({
            "text": text,
            "embedding": embedding.tolist()
        })
        return "Link content processed and stored successfully!"
    return "No content retrieved from the URL."

In [None]:
def extract_text_from_link(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            soup = BeautifulSoup(response.content, 'html.parser')
            paragraphs = soup.find_all('p')
            text = "\n".join([para.get_text() for para in paragraphs])
            return text
        else:
            return "Failed to retrieve the webpage."
    except Exception as e:
        return f"An error occurred: {e}"

In [None]:
def search_answer(question):
    question_embedding = model.encode(question, convert_to_tensor=True)

    documents = document_collection.find()
    similarities = []

    for doc in documents:
        doc_embedding = doc['embedding']
        similarity = util.pytorch_cos_sim(question_embedding, model.encode([doc_embedding], convert_to_tensor=True))[0][0]
        similarities.append((doc['text'], similarity))

    similarities.sort(key=lambda x: x[1], reverse=True)
    return similarities[0][0] if similarities else "No relevant information found."

In [None]:
app = Flask(__name__)

@app.route('/')
def index():
    return render_template('index.html')

@app.route("/upload", methods=["POST"])
def upload_document():
    if 'file' not in request.files:
        return "No file part", 400
    file = request.files['file']
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        text = extract_text_from_file(file)
        store_document(text)
        return "Document uploaded and processed successfully!", 200
    return "Invalid file format", 400

@app.route("/upload_link", methods=["POST"])
def upload_link():
    link = request.form.get("url")
    if link:
        response = store_link_content(link)
        return response, 200
    return "Invalid URL", 400


@app.route("/api/messages", methods=["POST"])
def messages():
    user_message = request.json.get("text", "")
    if user_message:
        answer = search_answer(user_message)
        return json.dumps({"text": answer}), 200
    return json.dumps({"text": "No message received."}), 400

In [None]:
if __name__ == "__main__":
    app.run(debug=True, port=3978)