In [1]:
!pip install flask transformers requests



In [2]:
%%writefile flask_chatbot_app.py
from flask import Flask, request, jsonify
from transformers import pipeline, TFAutoModelForSeq2SeqLM, AutoTokenizer
import tensorflow as tf
import tf_keras
import numpy as np

app = Flask(__name__)

# Load the Hugging Face model

model_name = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSeq2SeqLM.from_pretrained(model_name)
sentiment_model = pipeline("sentiment-analysis")

def prune_layer(layer, pruning_factor=0.2):
    if isinstance(layer, tf_keras.src.layers.core.dense.Dense):
        weights = layer.get_weights()[0]
        abs_weights = np.abs(weights)
        threshold = np.percentile(abs_weights, pruning_factor * 100)
        mask = abs_weights >= threshold
        mask = tf.cast(mask, weights.dtype)
        new_weights = weights * mask
        layer.set_weights([new_weights])
        return layer

def prune_model(model, pruning_factor=0.2):
    for layer in model.encoder._flatten_layers():
        layer =  prune_layer(layer, pruning_factor*2)
    for layer in model.decoder._flatten_layers():
        layer =  prune_layer(layer, pruning_factor)
    layer = prune_layer(model.layers[-1], pruning_factor/2)
    return model

def get_zero_and_nonzero_params(model):
  total_zero_params = 0
  total_nonzero_params = 0
  for weight in model.get_weights():
    rows = weight.shape[0]
    cols = weight.shape[1] if len(weight.shape) > 1 else 1
    total_params = rows * cols
    total_params_non_zero = tf.math.count_nonzero(weight).numpy()
    total_zero_params += total_params - total_params_non_zero
    total_nonzero_params += total_params_non_zero
  return total_zero_params, total_nonzero_params

zero_params, total_nonzero_params = get_zero_and_nonzero_params(model)
print(f"Total number of zero parameters: {zero_params}, and nonzero parameters: {total_nonzero_params}")
pruning_factor = 0.25
pruned_model = prune_model(model, pruning_factor)
zero_params_pruned, total_nonzero_params_pruned = get_zero_and_nonzero_params(pruned_model)
print(f"Total number of zero parameters after pruning: {zero_params_pruned}, and nonzero parameters after pruning: {total_nonzero_params_pruned}")

@app.route('/')
def index():
    response = {
        'Original model': f"Total number of zero parameters: {zero_params}, and nonzero parameters: {total_nonzero_params}",
        'Pruned model': f"Total number of zero parameters after pruning: {zero_params_pruned}, and nonzero parameters after pruning: {total_nonzero_params_pruned}"
    }
    return jsonify(response)

@app.route('/chatbot', methods=['POST'])
def chatbot():
    user_input = request.json.get('message')
    if not user_input:
        return jsonify({'response': 'Please provide a message.'}), 400
    sentiment = sentiment_model(user_input)[0]
    response = generate_response(user_input, sentiment)
    return jsonify({'response': response})

def generate_response(user_input, sentiment):
    if sentiment['label'] == 'NEGATIVE':
        prompt = f"The user is upset, respond with empathy and support: {user_input}"
    else:
        prompt = f"Respond to the following query: {user_input}"
    input_ids = tokenizer.encode(prompt, truncation=True, padding=True, max_length=512, return_tensors="tf")
    output = pruned_model.generate(input_ids, max_length=250, num_beams=5, early_stopping=True)
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response


if __name__ == '__main__':
    app.run(debug=True, port=5000, host='0.0.0.0')


Writing flask_chatbot_app.py


In [3]:

import subprocess

# Stop any running Flask app
subprocess.run(['pkill', '-f', 'flask_chatbot_app.py'])

CompletedProcess(args=['pkill', '-f', 'flask_chatbot_app.py'], returncode=1)

In [4]:
!nohup python flask_chatbot_app.py &

nohup: appending output to 'nohup.out'


In [7]:
!sudo lsof -i -P -n | grep LISTEN

node         7 root   21u  IPv6  19195      0t0  TCP *:8080 (LISTEN)
kernel_ma   14 root    3u  IPv4  19135      0t0  TCP 172.28.0.12:6000 (LISTEN)
colab-fil   80 root    3u  IPv4  20873      0t0  TCP 127.0.0.1:3453 (LISTEN)
jupyter-n  136 root    7u  IPv4  20138      0t0  TCP 172.28.0.12:9000 (LISTEN)
python3    341 root   21u  IPv4  14245      0t0  TCP 127.0.0.1:36021 (LISTEN)
python3    378 root    3u  IPv4  33760      0t0  TCP 127.0.0.1:36161 (LISTEN)
python3    378 root    5u  IPv4  33761      0t0  TCP 127.0.0.1:36349 (LISTEN)
pt_main_t 2122 root   33u  IPv4  77725      0t0  TCP *:5000 (LISTEN)
pt_main_t 2433 root   33u  IPv4  77725      0t0  TCP *:5000 (LISTEN)
pt_main_t 2433 root   42u  IPv4  77725      0t0  TCP *:5000 (LISTEN)


In [8]:

import requests

# Define the URL of the Flask app
url = 'http://127.0.0.1:5000/chatbot'

# Send a request to the Flask app
response = requests.post(url, json={'message': 'Hello, how are you?'})
print(response.json())


{'response': 'I am fine.'}


In [9]:
response = requests.post(url, json={'message': 'What is the capital of France?'})
print(response.json())

{'response': 'paris'}


In [10]:
response = requests.get('http://0.0.0.0:5000/')
print(response.json())

{'Original model': 'Total number of zero parameters: 0, and nonzero parameters: 783150080', 'Pruned model': 'Total number of zero parameters after pruning: 260489203, and nonzero parameters after pruning: 522660877'}
