# Flask Chatbot with Fine-Tuning and Sentiment Analysis

This notebook demonstrates how to create a Flask chatbot using the Flan-T5-XL model, fine-tune it with the OpenOrca dataset, and integrate sentiment analysis to adjust responses.

In [1]:
!pip install flask transformers datasets tensorflow rouge-score

Collecting datasets
  Downloading datasets-2.20.0-py3-none-any.whl (547 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl (40.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 MB[0m [31m38.2 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
Collecting requests (from transformers)
  Downloading requests-2.32.3-py3-none-any.whl (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?

In [2]:

# Import necessary libraries
import tensorflow as tf
from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from datasets import load_dataset, load_metric
import numpy as np
from flask import Flask, request, jsonify


## Step 1: Prepare the Dataset

In [3]:
# Load the OpenOrca dataset
dataset = load_dataset("Open-Orca/OpenOrca")

# Select only 10000 rows
dataset = dataset['train'].select(range(10000))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/12.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.01G [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [4]:
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")

def preprocess_function(examples):
    inputs = ["question: " + example for example in examples["question"]]
    targets = [example if len(example)> 0 else "I don't know" for example in examples["response"]]
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding=True, return_tensors="np")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=256, truncation=True, padding=True, return_tensors="np")
    model_inputs["labels"] = labels["input_ids"].astype(np.int32)
    model_inputs["decoder_input_ids"] = labels["input_ids"].astype(np.int32)
    model_inputs["input_ids"] = model_inputs["input_ids"].astype(np.int32)
    model_inputs["attention_mask"] = model_inputs["attention_mask"].astype(np.int32)
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)
tokenized_dataset.set_format("tf")

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]



In [5]:
# Prepare the TensorFlow dataset
from transformers import DataCollatorForSeq2Seq
model = TFAutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")

tf_dataset = tokenized_dataset.to_tf_dataset(
    columns=["input_ids", "attention_mask", "labels", "decoder_input_ids"],
    shuffle=True,
    batch_size=128,
    drop_remainder=True  # ensures all batches have the same size
)

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


In [6]:
for batch in tf_dataset.take(1):
    print(batch['input_ids'].shape)
    print(batch['attention_mask'].shape)
    print(batch['decoder_input_ids'].shape)
    print(batch['labels'].shape)

(128, 256)
(128, 256)
(128, 256)
(128, 256)


## Step 2: Fine-Tune the Model

In [7]:

# Freeze all layers except the final head
for layer in model.layers[:-1]:
    layer.trainable = False

In [8]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))
model.summary()

Model: "tft5_for_conditional_generation"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 shared (Embedding)          multiple                  16449536  
                                                                 
 encoder (TFT5MainLayer)     multiple                  35332800  
                                                                 
 decoder (TFT5MainLayer)     multiple                  41628352  
                                                                 
 lm_head (Dense)             multiple                  16449536  
                                                                 
Total params: 76961152 (293.58 MB)
Trainable params: 16449536 (62.75 MB)
Non-trainable params: 60511616 (230.83 MB)
_________________________________________________________________


In [9]:
# Compile and train the model


model.fit(tf_dataset, epochs=2)

Epoch 1/2


Cause: for/else statement not yet supported


Cause: for/else statement not yet supported
Epoch 2/2


<tf_keras.src.callbacks.History at 0x7b5d0855f730>

In [10]:
model.save_pretrained("./flan_t5_finetuned")

## Step 3: Create Flask App

In [11]:
%%writefile flask_chatbot_app.py

from transformers import TFAutoModelForSeq2SeqLM, AutoTokenizer
from flask import Flask, request, jsonify
from transformers import pipeline

# Initialize Flask
app = Flask(__name__)

# Initialize conversation history
conversation_history = []
sentiment_analysis = pipeline("sentiment-analysis")
model = TFAutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")

def adjust_prompt_based_on_sentiment(prompt):
    sentiment = sentiment_analysis(prompt)[0]
    if sentiment['label'] == 'NEGATIVE':
        prompt = f"User seems upset. Respond in a calming manner: {prompt}"
    return prompt

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get('message')

    # Add user input to conversation history
    conversation_history.append({"role": "User", "message": user_input})

    # Adjust prompt based on sentiment
    adjusted_input = adjust_prompt_based_on_sentiment(user_input)

    # Prepare the full conversation as the prompt
    full_conversation = ""
    for turn in conversation_history:
        full_conversation += f"{turn['role']}: {turn['message']} "

    # Generate response using the fine-tuned model
    inputs = tokenizer(full_conversation, return_tensors="tf", max_length=1024, truncation=True, padding=True)
    outputs = model.generate(**inputs, max_new_tokens=250)
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Add assistant response to conversation history
    conversation_history.append({"role": "Assistant", "message": response_text})

    return jsonify({"response": response_text})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)


Writing flask_chatbot_app.py


# Step 4: Test the Chatbot

In [12]:

import subprocess

# Stop any running Flask app
subprocess.run(['pkill', '-f', 'flask_chatbot_app.py'])

CompletedProcess(args=['pkill', '-f', 'flask_chatbot_app.py'], returncode=1)

In [13]:
!nohup python flask_chatbot_app.py &

nohup: appending output to 'nohup.out'


In [16]:
!sudo lsof -i -P -n | grep LISTEN

node         7 root   21u  IPv6  14022      0t0  TCP *:8080 (LISTEN)
kernel_ma   14 root    6u  IPv4  19000      0t0  TCP 172.28.0.12:6000 (LISTEN)
colab-fil   80 root    3u  IPv4  15973      0t0  TCP 127.0.0.1:3453 (LISTEN)
jupyter-n  136 root    7u  IPv4  24112      0t0  TCP 172.28.0.12:9000 (LISTEN)
pt_main_t  387 root   21u  IPv4  25924      0t0  TCP 127.0.0.1:40337 (LISTEN)
python3    424 root    3u  IPv4  33876      0t0  TCP 127.0.0.1:42517 (LISTEN)
python3    424 root    5u  IPv4  33877      0t0  TCP 127.0.0.1:51813 (LISTEN)
pt_main_t 4423 root   33u  IPv4 149593      0t0  TCP *:5000 (LISTEN)


In [17]:

import requests

# Define the URL of the Flask app
url = 'http://0.0.0.0:5000/chat'

# Send a request to the Flask app
response = requests.post(url, json={'message': 'Hello, how are you?'})
print(response.json())


{'response': "I'm fine"}


In [18]:
response = requests.post(url, json={'message': 'I was wondering how can I go to Eiffel Tower from the airport using the train and subway?'})
print(response.json())

{'response': 'The train is the CN line and the subway is the SNCF line.'}


## Step 5: Evaluation with ROUGE Score

In [19]:

def evaluate_model(tokenized_dataset, model):
    rouge = load_metric('rouge', trust_remote_code=True)

    # Prepare evaluation data
    eval_data = tokenized_dataset.shuffle(seed=42).select(range(5))

    # Generate predictions
    predictions = []
    references = []
    for example in eval_data:
        question = example['question'].numpy().decode("utf-8")
        response = example['response'].numpy().decode("utf-8")
        inputs = tokenizer(question, return_tensors="tf", max_length=1024, truncation=True, padding=True)
        outputs = model.generate(**inputs, max_new_tokens=250)
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(prediction)
        references.append(response)

    # Compute ROUGE score
    results = rouge.compute(predictions=predictions, references=references)
    return results

# Evaluate the model
rouge_scores = evaluate_model(tokenized_dataset, model)
print(rouge_scores)


  rouge = load_metric('rouge', trust_remote_code=True)


Downloading builder script:   0%|          | 0.00/2.17k [00:00<?, ?B/s]

{'rouge1': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.16666666666666666, recall=0.0012460120524636653, fmeasure=0.002468899020983898), high=Score(precision=0.36666666666666664, recall=0.0028589152782701167, fmeasure=0.005656150016999834)), 'rouge2': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.0, recall=0.0, fmeasure=0.0), high=Score(precision=0.0, recall=0.0, fmeasure=0.0)), 'rougeL': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.16666666666666666, recall=0.0012460120524636653, fmeasure=0.002468899020983898), high=Score(precision=0.36666666666666664, recall=0.0028589152782701167, fmeasure=0.005656150016999834)), 'rougeLsum': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.16666666666666666, recall=0.0012460120524636653, fmeasure=0.002468899020983898), high=Score(precision=0.36666666666666664, recall=0.002858915278