In [None]:
!pip install pyngrok
from flask import Flask, request, jsonify
from pyngrok import ngrok
import ast
import json

# **Initialize the LLM**

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
model_name = "google/flan-t5-xxl"

model = AutoModelForSeq2SeqLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto" 
)

tokenizer = AutoTokenizer.from_pretrained(model_name)


# **Flask server**

In [None]:
app = Flask(__name__)
@app.route('/def', methods=['Get'])
def c():
    return 'Hello'
@app.route('/contacts', methods=['POST'])
def contacts():
    data = request.get_json()
    contacts_text = data.get('text','No text found')
    print(data)
    contacts_prompt = f"""
    You are an assistant inside the **Contacts** screen of a mobile app.
    
    Your task is to understand what the user wants and return their request as this exact format:
    
    action: <action>, name: <name or null>, phone: <phone or null>
    
    ### Valid actions:
    - add
    - view
    - call
    - dial
    
    ### Examples:
    
    User: "Add Sarah, her number is 6912345678."
    Output: action: add, name: Sarah, phone: 6912345678
    
    User: "Call my brother Tom."
    Output: action: call, name: Tom, phone: null
    
    User: "Open the dialer please."
    Output: action: dial, name: null, phone: null
    
    User: "Show me all my contacts."
    Output: action: view, name: null, phone: null
    
    User: "Make a call now."
    Output: action: call, name: null, phone: null
    
    User: "Call John."
    Output: action: call, name: John, phone: null
    
    User: "Dial the number pad."
    Output: action: dial, name: null, phone: null
    
    ### Now respond to this user request:
    "{contacts_text}"
    Output: 
    """
    
    inputs = tokenizer(contacts_prompt, return_tensors="pt").to("cuda")
    
    outputs = model.generate(
        **inputs,
        max_length=150,
        num_beams=5,
        temperature=0.0,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    
    contacts_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(contacts_result)
    contacts_list = [part.strip() for part in contacts_result.split(",")]
    print(contacts_list)
    contacts_result_dict = dict(item.split(": ", 1) for item in contacts_list)
    for key in ["action", "name", "phone"]:
        contacts_result_dict.setdefault(key, "null")
    if "phone" in contacts_result_dict:
        contacts_result_dict["phone"] = re.sub(r"[ -]", "", contacts_result_dict["phone"])
    if contacts_result_dict["name"] not in contacts_text:
        contacts_result_dict["name"] = "null"
    json_str = json.dumps(contacts_result_dict)
    print(json_str)
    
    return json_str
@app.route('/add-edit-contact', methods=['POST'])
def addcontact():
    data = request.get_json()
    addcontact_text = data.get('text','No text found')
    print(data)
    addcontact_prompt = f"""
    You are an assistant inside the **Add Contact** screen of a mobile app.
    
    Your task is to understand the name and the phone of the contact that the user wants to add and if it is an emergency contact:
    
    name: <name or null>, phone: <phone or null>, ergency: <true/false or null>
    
    If you cant find a name or phone or emergency you can set them to null.
    
    ### Examples:
    
    User: "Add Sarah, her number is 6912345678."
    Output: name: Sarah, phone: 6912345678, emergency: null
    
    User: "6912342232."
    Output: name: null, phone: 6912342232, emergency: null
    
    User: "The name is Mark"
    Output: name: Mark, phone: null, emergency: null

    User: "The name is Mark and it is an emergency contact"
    Output: name: Mark, phone: null, emergency: true
    
    ### Now respond to this user request:
    "{addcontact_text}"
    Output: 
    """
    inputs = tokenizer(addcontact_prompt, return_tensors="pt").to("cuda")
    
    outputs = model.generate(
        **inputs,
        max_length=150,
        num_beams=5,
        temperature=0.0,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    
    addcontact_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(addcontact_result)
    addcontact_list = [part.strip() for part in addcontact_result.split(",")]
    print(addcontact_list)
    addcontact_result_dict = dict(item.split(": ", 1) for item in addcontact_list)
    for key in ["name", "phone","emergency"]:
        addcontact_result_dict.setdefault(key, "null")
    if "phone" in addcontact_result_dict:
        addcontact_result_dict["phone"] = re.sub(r"[ -]", "", addcontact_result_dict["phone"])
    if "emergency" not in addcontact_text.lower():
        addcontact_result_dict["emergency"] = "null"
    json_str = json.dumps(addcontact_result_dict)
    print(json_str)
    return json_str

@app.route('/viewcontact', methods=['POST'])
def viewcontact():
    viewdata = request.get_json()
    viewcontact_text = viewdata.get('text','No text found')
    print(viewdata)
    viewcontact_prompt = f"""
    You are inside the **View Contact** screen of a mobile app.
    
    The user speaks a command, and your task is to detect the intended action.
    
    Valid actions:
    - call
    - delete
    - edit
    
    Respond with exactly one of these actions, depending on what the user clearly said.
    If the user didn’t clearly ask to call, delete, or edit the contact, respond with:
    action: null
    
    ### Format:
    action: <call/delete/edit/null>
    
    ### Examples:
    
    User: "Can you call this person?"
    Output: action: call
    
    User: "I want to remove this contact"
    Output: action: delete
    
    User: "Make changes to the contact"
    Output: action: edit
    
    User: "I need to update the number"
    Output: action: edit
    
    User: "Just checking the contact"
    Output: action: "null"
    
    User: "Who is this?"
    Output: action: "null"
    
    User: "Delete it now!"
    Output: action: delete
    
    ### Now respond to this user request:
    "{viewcontact_text}"
    Output:
    """

    inputs = tokenizer(viewcontact_prompt, return_tensors="pt").to("cuda")
        
    outputs = model.generate(
        **inputs,
        max_length=150,
        num_beams=5,
        temperature=0.0,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    
    view_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(view_result)
    viewcontact_list = [part.strip() for part in view_result.split(",")]
    print(viewcontact_list)
    viewcontact_result_dict = dict(item.split(": ", 1) for item in viewcontact_list)
    json_str = json.dumps(viewcontact_result_dict)
    print(json_str)
    return json_str

@app.route('/soscall', methods=['POST'])
def soscall():
    sosdata = request.get_json()
    sos_text = sosdata.get('text','No text found')
    print(sosdata)
    sos_prompt = f"""
    You are inside the **SOS Emergency Call** screen of a mobile app.
    
    The user gives a voice command. Your task is to identify **which emergency service** they want to contact.
    
    Valid services:
    - ambulance
    - police
    - firedepartment
    
    If the user clearly asks for one of these services, respond with just the service name.
    If it's unclear or unrelated, respond with:
    null
    
    ### Examples:
    
    User: "Call an ambulance now!"
    Output: ambulance
    
    User: "I need the police"
    Output: police
    
    User: "There’s a fire in the building"
    Output: firedepartment
    
    User: "Emergency, call the fire department!"
    Output: firedepartment
    
    User: "Help me"
    Output: null
    
    User: "What's this app?"
    Output: null
    
    ### Now respond to this user request:
    "{sos_text}"
    Output:
    """

    inputs = tokenizer(sos_prompt, return_tensors="pt").to("cuda")
        
    outputs = model.generate(
        **inputs,
        max_length=150,
        num_beams=5,
        temperature=0.0,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    
    sos_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(sos_result)
    sos_result_dict = {"service":sos_result}
    json_str = json.dumps(sos_result_dict)
    print(json_str)
    return json_str

@app.route('/location', methods=['POST'])
def location():
    data = request.get_json()
    print("[REQUEST DATA]", data)

    location_text = data.get('text', 'No text found')
    print("[LOCATION TEXT]", location_text) 

    location_prompt = f"""
    You are an assistant inside the **Location** screen of a mobile app.
    
    Your task is to understand what the user wants to do and return the request in this exact format:
    
    action: <action>, name: <value or null>, address: <value or null>, zip: <value or null>, city: <value or null>
    
    DO NOT combine name with address or any other field.
    
    ### Valid actions:
    - add
    - view
    - search
    - menu
    
    ### Examples:
    
    User: "Can you add Eleni Pharmacy? It is on Papanastasiou 30 in Thessaloniki. ZIP is 54639."
    Output: action: add, name: Eleni Pharmacy, address: Papanastasiou 30, zip: 54639, city: Thessaloniki
    
    User: "Where is Eleni Pharmacy?"
    Output: action: search, name: Eleni Pharmacy, address: null, zip: null, city: null
    
    User: "I want to see about Eleni Pharmacy."
    Output: action: view, name: Eleni Pharmacy, address: null, zip: null, city: null
    
    User: "Ι want to return to the menu"
    Output: action: menu, name: null, address: null, zip: null, city: null
    
    ### Now respond to this user request:
    "{location_text}"
    Output:
    """

    try:
        inputs = tokenizer(location_prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_length=150,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        location_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("[MODEL OUTPUT]", location_result)

    except Exception as e:
        print("[MODEL ERROR]", e)
        return jsonify({"error": str(e)}), 500

    try:
        location_list = [part.strip() for part in location_result.split(",")]
        print("[SPLIT OUTPUT]", location_list)
        valid_parts = [item for item in location_list if ": " in item]
        print("[VALID PARTS]", valid_parts)
        location_result_dict = dict(item.split(": ", 1) for item in valid_parts)
        print("[DICT OUTPUT]", location_result_dict)
    except Exception as e:
        print("[PARSING ERROR]", e)
        return jsonify({"error": "Parsing error: " + str(e)}), 500

    for key in ["action", "name", "address", "zip", "city"]:
        location_result_dict.setdefault(key, "null")

    return jsonify(location_result_dict)


############## Add Location ##############
###########################################################################################################################

@app.route('/addlocation', methods=['POST'])
def addlocation():
    data = request.get_json()
    print("[REQUEST DATA]", data)

    addlocation_text = data.get('text', 'No text found')
    print("[LOCATION TEXT]", addlocation_text) 

    addlocation_prompt = f"""
    You are an assistant inside the **Location** screen of a mobile app.
    
    Your task is to understand what the user wants to do and return the request in this exact format:
    
    action: <action>, name: <value or null>, address: <value or null>, zip: <value or null>, city: <value or null>
    
    DO NOT combine name with address or any other field.
    
    ### Valid actions:
    - add
    - menu
    
    ### Examples:
    
    User: "Can you add Eleni Pharmacy? It is on Papanastasiou 30 in Thessaloniki. ZIP is 54639."
    Output: action: add, name: Eleni Pharmacy, address: Papanastasiou 30, zip: 54639, city: Thessaloniki
    
    User: "Ι want to return to the menu"
    Output: action: menu, name: null, address: null, zip: null, city: null
    
    ### Now respond to this user request:
    "{addlocation_text}"
    Output:
    """

    try:
        inputs = tokenizer(addlocation_prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_length=150,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        addlocation_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("[MODEL OUTPUT]", addlocation_result)

    except Exception as e:
        print("[MODEL ERROR]", e)
        return jsonify({"error": str(e)}), 500

    try:
        addlocation_list = [part.strip() for part in addlocation_result.split(",")]
        print("[SPLIT OUTPUT]", addlocation_list)
        valid_parts = [item for item in addlocation_list if ": " in item]
        print("[VALID PARTS]", valid_parts)
        addlocation_result_dict = dict(item.split(": ", 1) for item in valid_parts)
        print("[DICT OUTPUT]", addlocation_result_dict)
    except Exception as e:
        print("[PARSING ERROR]", e)
        return jsonify({"error": "Parsing error: " + str(e)}), 500

    for key in ["action", "name", "address", "zip", "city"]:
        addlocation_result_dict.setdefault(key, "null")

    return jsonify(addlocation_result_dict)

############## Edit Location ##############
###########################################################################################################################

@app.route('/editlocation', methods=['POST'])
def editlocation():
    data = request.get_json()
    print("[REQUEST DATA]", data)

    editlocation_text = data.get('text', 'No text found')
    print("[LOCATION TEXT]", editlocation_text) 

    editlocation_prompt = f"""
    You are an assistant inside the **Location** screen of a mobile app.
    
    Your task is to understand what the user wants to do and return the request in this exact format:
    
    action: <action>, name: <value or null>, address: <value or null>, zip: <value or null>, city: <value or null>
    
    DO NOT combine name with address or any other field.
    
    ### Valid actions:
    - edit
    - menu
    
    ### Examples:
    
    User: "Can you update Eleni Pharmacy? The new address is Venizelou 10 in Thessaloniki and the ZIP is 54640."
    Output: action: edit, name: Eleni Pharmacy, address: Venizelou 10, zip: 54640, city: Thessaloniki
    
    User: "Ι want to return to the menu"
    Output: action: menu, name: null, address: null, zip: null, city: null
    
    ### Now respond to this user request:
    "{editlocation_text}"
    Output:
    """

    try:
        inputs = tokenizer(editlocation_prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_length=150,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        editlocation_result = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("[MODEL OUTPUT]", editlocation_result)

    except Exception as e:
        print("[MODEL ERROR]", e)
        return jsonify({"error": str(e)}), 500

    try:
        editlocation_list = [part.strip() for part in editlocation_result.split(",")]
        print("[SPLIT OUTPUT]", editlocation_list)
        valid_parts = [item for item in editlocation_list if ": " in item]
        print("[VALID PARTS]", valid_parts)
        editlocation_result_dict = dict(item.split(": ", 1) for item in valid_parts)
        print("[DICT OUTPUT]", editlocation_result_dict)
    except Exception as e:
        print("[PARSING ERROR]", e)
        return jsonify({"error": "Parsing error: " + str(e)}), 500

    for key in ["action", "name", "address", "zip", "city"]:
        editlocation_result_dict.setdefault(key, "null")

    return jsonify(editlocation_result_dict)

############## View Location ##############
###########################################################################################################################

@app.route('/viewlocation', methods=['POST'])
def viewlocation():
    data = request.get_json()
    print("[REQUEST DATA]", data)

    user_request = data.get('text', 'No text found')
    print("[LOCATION TEXT]", user_request) 

    prompth = f"""
    You are an assistant that determines what the user wants to do with a saved location in a mobile app.
    
    Valid actions are:
    - edit: The user wants to change or update a location’s details (name, address, zip, or city).
    - delete: The user wants to remove or delete a saved location.
    - goto: The user wants to navigate to or get directions to a saved location.
    - menu: The user wants to return to the menu.
    
    Your task is to read the user’s request and respond with ONLY ONE of the following words:
    edit, delete, goto
    
    Examples:
    
    User: "Can you update the address of Maria's Pharmacy?"
    Output: edit
    
    User: "Remove Maria's Pharmacy from my saved places."
    Output: delete
    
    User: "Take me to Maria's Pharmacy."
    Output: goto
    
    User: "Navigate to my doctor’s office."
    Output: goto
    
    User: "Change the ZIP code for Maria's Pharmacy."
    Output: edit
    
    User: "Delete the gas station location."
    Output: delete

    User: "Ι want to return to the menu"
    Output: menu
    
    Now decide what action the user wants to perform:
    
    User: "{user_request}"
    
    Output:
    """

    try:
        inputs = tokenizer(prompth, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_length=20,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        action_result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print("[MODEL OUTPUT]", action_result)

    except Exception as e:
        print("[MODEL ERROR]", e)
        return jsonify({"error": str(e)}), 500

    # Clean up the output just in case
    action_result = action_result.lower()
    if action_result not in ["edit", "delete", "goto", "menu"]:
        return jsonify({"error": "Invalid action detected", "raw": action_result}), 400

    return jsonify({"action": action_result})

############## MainMenuScreen ##############
###########################################################################################################################

@app.route('/mainscreen', methods=['POST'])
def mainscreen():
    data = request.get_json()
    print("[REQUEST DATA]", data)

    user_request = data.get('text', 'No text found')
    print("[MAINSCREEN TEXT]", user_request) 

    prompt = f"""
    You are an assistant that maps a user's voice command to a screen name in a mobile app.
    
    The valid screens are:
    - sos
    - return_home
    - locations
    - contacts
    - pills
    - profile
    - other (for anything else that doesn't match the screens above)
    
    Your task is to respond with only one of these screen names based on what the user said.
    
    Examples:
    
    User: "I need help right now!"  
    Output: sos
    
    User: "Take me back to my home."  
    Output: return_home
    
    User: "I want to go to the pharmacy."  
    Output: locations
    
    User: "Show me the nearest hospital."  
    Output: locations
    
    User: "I need to take my pills."  
    Output: pills
    
    User: "I want to call my daughter."  
    Output: contacts
    
    User: "Change my name."  
    Output: profile
    
    User: "Tell me the weather."  
    Output: other
    
    Now decide what screen the user wants to go to:
    
    User: "{user_request}"
    
    Output:
    """

    try:
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(
            **inputs,
            max_length=20,
            num_beams=5,
            early_stopping=True,
            no_repeat_ngram_size=2
        )
        screen_result = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        print("[MODEL OUTPUT]", screen_result)

    except Exception as e:
        print("[MODEL ERROR]", e)
        return jsonify({"error": str(e)}), 500


    screen_result = screen_result.lower()
    valid_screens = ["sos", "return_home", "locations", "contacts", "pills", "profile", "menu", "other"]
    if screen_result not in valid_screens:
        return jsonify({"error": "Invalid screen detected", "raw": screen_result}), 400

    return jsonify({"action": screen_result})

if __name__ == '__main__':
    ngrok.set_auth_token("2xaM5gJ2pJlgSZlG0oGtmBbj862_6eNFEUf9vVQuxUyAj4FJQ")
    public_url = ngrok.connect(5000)
    print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:5000\"")
    app.run(port=5000)