In [14]:
from duckduckgo_search import DDGS
import re
import os
import pyttsx3

from groq import Groq
if os.getenv("GROQ_API_KEY") is None:
    os.environ["GROQ_API_KEY"] = ''

    
from llama_index.llms.ollama import Ollama
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.core import StorageContext, load_index_from_storage

#from llama_index.embeddings.openai import OpenAIEmbedding
import os

import whisper
from langdetect import detect
from pytube import YouTube


import tempfile

#_________________________________________________________________________________________________________________

#Youtube Search Video RAG with Vector DB

def startfile(fn):
    os.system('open %s' % fn)

def create_and_open_txt(text, filename):
    # Create and write the text to a txt file
    with open(filename, "w") as file:
        file.write(text)
    startfile(filename)


def delete_audio_file(file_path):
    try:
        os.remove(file_path)
        print(f"Audio file {file_path} deleted successfully.")
    except OSError as e:
        print(f"Error deleting audio file: {e}")

    
def youtube_audio_text(url, path):
    yt = YouTube(url)
    audio_stream = yt.streams.filter(only_audio=True).first()
    
    output_path = path #r"C:\Users\akhil\Downloads\YoutubeAudios"
    filename = "audio.mp3"
    audio_stream.download(output_path=output_path, filename=filename)

    print(f"Audio downloaded to {output_path}\{filename}")

    model = whisper.load_model("base")
    result = model.transcribe(path + "\\audio.mp3")
    transcribed_text = result["text"]
    #print(transcribed_text)

    # language = detect(transcribed_text)
    # print(f"Detected language: {language}")
    delete_audio_file(path + "\\audio.mp3")
    
    create_and_open_txt(transcribed_text, path + "\\output_Text.txt")


def Youtube_RAG(model, path):
    llm = Ollama(model="mistral", request_timeout=30.0)

    #inp = input("enter 1 - Hugging Face Embedding \n2 - OpenAI Embedding")
    inp = 1
    if inp == 1:
        #
        Settings.embed_model = HuggingFaceEmbedding(model_name="flax-sentence-embeddings/all_datasets_v4_MiniLM-L6")


    #Settings.llm = Ollama(model="mistral", request_timeout=200.0)
    Settings.llm = Ollama(model=model, request_timeout=200.0)
    
    documents = SimpleDirectoryReader(path).load_data()
    sen_split=TokenTextSplitter()
    pipeline = IngestionPipeline(
        transformations=[sen_split]
    )
    nodes=pipeline.run(show_progress=True,documents=documents, in_place=True)
    index = VectorStoreIndex.from_documents(
        documents, transformations=[sen_split]
    )
    index.storage_context.persist(persist_dir="./indexDB")
    storage_context = StorageContext.from_defaults(persist_dir="./indexDB")
    index = load_index_from_storage(storage_context)
    query_engine = index.as_query_engine()
    response = query_engine.query("Summary of the Document")
    print(response)
    return response
    
#     while True:
#         #
#         user_input = input("Enter 'stop' to end: ")
#         if user_input.lower() == 'stop':
#             print("Stopping the loop.")
#             break
#         else:
#             response = query_engine.query(user_input)
#             print()
#             print(response)
        

def youtube(url):
    path = tempfile.mkdtemp()
#     path = "C:\\Users\\akhil\\Downloads\\YOUTUBE_VIDEO_SCRPT_WRITTER\\Folder"
    #url = input("Enter the YouTube video URL: ")
    youtube_audio_text(url, path)
    #model_id = input("Enter the model you want to use: 1 - Mistral, 2- Codellama 13B, 3 - llava")
    model_id = 1
    if model_id == 1:
        response = Youtube_RAG("mistral", path)
    elif model_id == 2:
        response = Youtube_RAG("codellama:13b", path)
#     elif model_id == 3:
#         Youtube_RAG("codellama:13b")
    return response

#_________________________________________________________________________________________________________________
    
    
#Not using Groq Max Tokens Completed for Today
def O_LLM_GroQ(query):
    client = Groq(
        api_key=os.environ.get("GROQ_API_KEY"),
    )

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": query,
            }
        ],
        model="mixtral-8x7b-32768",
        #model="gemma-7b-it",
        temperature = 0,
    )

    response = chat_completion.choices[0].message.content
    return response


import google.generativeai as genai

#Gemini 1.0 pro, good not great need to test on 1.5 pro in google hackathon
def O_LLM(query):
    Gemini_API = ""
    model = genai.GenerativeModel('gemini-pro')
    genai.configure(api_key=Gemini_API)
    response = model.generate_content(query)
    resp = response.text
   # print(response.text)
    return resp



def extract_thoughts_actions_output(text):
    thoughts = []
    actions = []
    outputs = []

    thought_pattern = r'Thought \d+:\s*(.*)'
    action_pattern = r'Action \d+:\s*(.*)'
    output_pattern = r'Output \d+:\s*(.*)'

    # Extract Thoughts, Actions, and Output
    thoughts = re.findall(thought_pattern, text)
    actions = re.findall(action_pattern, text)
    outputs = re.findall(output_pattern, text)

    return thoughts, actions, outputs

def extract_info(texts):
    """
    This function extracts tools and inputs from a list of text strings.

    Args:
      texts: A list of strings containing instructions with tools and inputs in brackets.

    Returns:
      A dictionary where keys are tools (e.g., "Search", "Summarize", "Calculate") 
      and values are corresponding inputs (e.g., "funding received by Mistral Ai from investors").
    """
    tools = {}
    for text in texts:
        # Extract tool using regular expression
        tool = re.findall(r'^\w+', text)[0]

        # Extract input using regular expression
        inp = re.findall(r'\[(.*?)\]', text)[0]

        # Add tool and input to the dictionary
        tools[tool] = inp
    return tools



def duck_go(Keyword):
    results = DDGS().text(Keyword, max_results=10)
    bodies = [item['body'] for item in results]
    paragraph = ' '.join(bodies)
    return paragraph

def Calculate(expression):
    print(f"Calculating: {expression}")

    
import subprocess

def execute_python(code):
    #print("Code recieved for execution Terminal: ",code)
    result = subprocess.run(["python", "-c", code], capture_output=True, text=True)
    err = 0
    # Check if there's an error
    if result.returncode != 0:
        print("Error Found")
        err = 1
        return result.stderr, err
    else:
        
        output = result.stdout
        return output, err

#------------------------------------------------------------------------------------------------
def extract_text(input_string, option):
    if option == 1:
        pattern = r'\```python(.*?)\```'
        matches = re.search(pattern, input_string, re.DOTALL)
        if matches:
            return matches.group(1).strip()
        else:
            return None
    else:
        pattern = r'\```(.*?)\```'
        matches = re.search(pattern, input_string, re.DOTALL)
        if matches:
            return matches.group(1).strip()
        else:
            return None
#------------------------------------------------------------------------------------------------
def check_substring(main_string, substring):

    if substring.lower() in main_string.lower():
        return True
    else:
        return False
#------------------------------------------------------------------------------------------------
Error_Counter = 0

def code_processing(answer):
    #answer = O_LLM(query)
    main_string = answer
    substring = "```python"
    substring_sub = "```"
    print("\n\n")
    if check_substring(main_string, substring_sub):
        #print("```, FOUND PREPROCESSING... ")
        
        if check_substring(main_string, substring):
            #print("```python, FOUND PREPROCESSING... ")
            input_string =  answer
            extracted_text = extract_text(input_string, 1)
            
            if extracted_text:
                answer = extracted_text
                #print("Extracted Text: \n", answer)
                code = answer
            else:
                #print("No text found between ``` and ```.")
                code = answer
        else:
            print("")
            if check_substring(main_string, substring_sub):
                print("```python, FOUND PREPROCESSING... ")
                input_string =  answer
                extracted_text = extract_text(input_string, 0)

                if extracted_text:
                    answer = extracted_text
                    #print("Extracted Text: \n", answer)
                    code = answer
                else:
                    print("No text found between ``` and ```.")
                    code = answer
            
    else:
        print("```python ,NOT FOUND")
        code = answer
    print("Code Extracted: ",code)
    code_to_execute = code    
    result, err = execute_python(code_to_execute)
    if err == 0:
        print("Returning Result to Prompt: ", result)
        Error_Counter = 0
        return result
    else:
        Code_error_recur(code_to_execute, result)

        

def Code_error_recur(code_to_execute, result):
    Error_Counter = Error_Counter + 1
    Error_query = "Code: " + code_to_execute + "\n Error: " + result + "\n Dont add any comments and always write code under these tags '```python' and '```' " 
    resp = O_LLM(Error_query)
    code_processing(resp)

    

def calculate(expression):
    try:
        result = eval(expression)
        return result
    except Exception as e:
        return f"Error: {str(e)}"
    
    

def Voice(voice_response):
    text = voice_response
    engine = pyttsx3.init()
    engine.setProperty('rate', 190)    # Speed percent (can go over 100)
    engine.setProperty('volume', 0.9)  # Volume 0-1
    engine.say(text)
    engine.runAndWait()
    return "Speaking completed"

    
#Afterwards Update match with elif

def handle_request(data, thought):
    #
    if "Search" in data:
        output = duck_go(data["Search"])
        param = data["Search"]
#         print("In DuckDuckGo Search Question:",thought)
#         print("Output from search:", output)
        prompt = f"Consider yourself as a professional web researcher and Summarize the text based on your task assignmed by boss in plain text with space formating: {thought}\n\n\n Text: {output}"
        output = O_LLM(prompt)
        return output
    elif "Calculator" in data:
        output = Calculate(data["Calculate"])
        return output
    elif "Python" in data:
        output = code_processing(data["Python"])
        return output
    elif "Voice" in data:
        output = Voice(data["Voice"])
        return output
    elif "Youtube" in data:
        output = youtube(data["Youtube"])
        return output
    elif "Human" in data:
        output = input(data["Human"])
        return output

    else:
        print("Invalid key. Please use 'Search' or 'Calculate'")

        
def convert_list_to_dict(data):
    result = {}
    for item in data:
        try:
            key, value = item.split('[', 1)
            value = value.rsplit(']', 1)[0].strip()  # Get text from beginning to last ']'
            if value:  # Check if value is not empty (null)
                result[key.strip()] = value
        except ValueError:
            continue  # Skip to the next iteration if splitting fails
    return result

#extract_thoughts_actions_output not using, new function extract_actions is better!
def extract_thoughts_actions_output(text):
    print(text)
    thoughts = []
    actions = []
    outputs = []

    thought_pattern = r'Thought \d+:\s*(.*)'
    action_pattern = r'Action \d+:\s*([\s\S]*?)(?=(?:Thought \d+|$))'
    output_pattern = r'output:\s*(.*)'

    # Extract Thoughts, Actions, and Output
    thoughts = re.findall(thought_pattern, text, re.IGNORECASE)
    action_matches = re.findall(action_pattern, text, re.IGNORECASE)
    print("actio_match:", action_matches)
    actions = ['\n'.join(action.strip().split('\n')) for action in action_matches]
    outputs = re.findall(output_pattern, text, re.IGNORECASE)

    return thoughts, actions, outputs

def extract_actions(text):
    pattern = r'[Aa]ction(?:[:\s\d]+)?\s*([^\[\]]+\[[^\[\]]+\])'
    matches = re.findall(pattern, text)
    return matches

def token_count(text):
    tokens = text.split()
    num_tokens = len(tokens)
    return num_tokens


def actions_perform(resp,thought):
    actions_list = extract_actions(resp)
    actions_tools_dic = convert_list_to_dict(actions_list)
    print("Action Tools Found: (List) ",actions_tools_dic)
    print(type(actions_tools_dic))
    i = thoughts_list[0]
    out = handle_request(actions_tools_dic, thought)
    return out


def summary_context(text):
    context_len = token_count(text)
    print("Token Length: ",context_len)
    if context_len > 180:
        summm_prompt = f"""
        Your a Editor working in a company AGNOS, your task is to summarize the text given by your manager. You have to perform this job carefully as the company development is dependent on your work. 
        Now summarize this text without loosing any important information, which may include, numbers, values, names, strategies, list or nested lists or any other. 
        You can delete any matter if it doesn't belongs to the context your working.
        You cannot rewrite the summary once writen so carefully do the work. All the best.

        Text:
        {text}
        """
        text = O_LLM(summm_prompt)
    return text


In [15]:
Task = "Perform a detailed analysis of recent earth quake in taiwan its effects, damages, etc"



Example_prompt_thoughts = """
Consider yourself a manager at a company called AGNOS Business Solutions, and break down this complex task from your boss (for company clients) into multiple simple tasks as thoughts for your assistant to complete. Don't respond to any other tools except these, as they are new and cannot be used other than these: 
Tools available to use: Search[Text or URL to search in the internet]

Task: I want a detailed analysis report of Competators of Luxury shoe market for investors to launch my new shoe brand

Thought 1: First I need to find, which companies work in luxury shoe market in internet
Thought 2: Second, Make a list of all the companies
Thought 3: Third, Now search which products, Number of products, revenue, SWOT analysis of each company listed
Thought 4: Fourth, Now With all the companies information write a important summary
Thought 5: With all the information, I need to find where can we build new shoe brand without much competation 

"""





Task_promp_thoughts = f"""{Example_prompt_thoughts}
Task: {Task}

Now write simple multiple Thoughts for this Task and use only tools mentioned. Write Thoughts for this task below and Dont write any actions its not your work to perform.
"""


print(Task_promp_thoughts)



Example_prompt_Actions = """
You were an assistant to the manager at AGNOS business solutions Company which have many clients; previously, he gave you tasks and multiple thoughts, which you performed perfectly. Now he gave you the most important task and thoughts. You need to respond to the thoughts carefully and correctly, as your promotion is in his hands. 

He said to use only these Tools: Search[Text to search in the internet or URL], Calculator[Expression or numbers to calculate]
Task: I want a detailed analysis report of Competators of Luxury shoe market for investors to launch my new shoe brand
Thought 1: First I need to find, which companies work in luxury shoe market
Observation: "The most expensive shoe brand in the world is reportedly Stuart Weitzman, who designed a pair of shoes valued at $3 million. Jimmy Choo shoes range in price from $395 to $4,595. Alexander McQueen shoes start at a price point of $620. Valentino's shoe collection starts at a price point of $845.Feb 7, 2024" 
Thought 2: Second, Make a list of all the companies
1. Stuart Weitzman
2. Jimmy Choo shoes
3. Alexander McQueen
4. Valentino shoe
Thought 3: Third, Now search which products, Number of products. 
Action: Search[Stuart Weitzman shoes all products]   
Action: Search[Jimmy Choo shoes all products]
Action: Search[Alexander McQueen shoes all products]
Action: Search[Valentino shoes all products]
"""



Consider yourself a manager at a company called AGNOS Business Solutions, and break down this complex task from your boss (for company clients) into multiple simple tasks as thoughts for your assistant to complete. Don't respond to any other tools except these, as they are new and cannot be used other than these: 
Tools available to use: Search[Text or URL to search in the internet]

Task: I want a detailed analysis report of Competators of Luxury shoe market for investors to launch my new shoe brand

Thought 1: First I need to find, which companies work in luxury shoe market in internet
Thought 2: Second, Make a list of all the companies
Thought 3: Third, Now search which products, Number of products, revenue, SWOT analysis of each company listed
Thought 4: Fourth, Now With all the companies information write a important summary
Thought 5: With all the information, I need to find where can we build new shoe brand without much competation 


Task: Perform a detailed analysis of recent

In [16]:
import markdown2
import pdfkit

def Markdown_pdf(markdown_text):
    html_text = markdown2.markdown(markdown_text)

    output_path = r'C:\Users\akhil\Downloads\Final_report_test_news.pdf'

    # Convert HTML to PDF
    pdfkit.from_string(html_text, output_path)
    
def To_do_list(text):
    thoughts = re.findall(r'(?i)(?<=thought\s)\d+:\s(.+)', text)
    return thoughts

thoughts_resp = O_LLM(Task_promp_thoughts)
print(thoughts_resp)

thoughts_list = To_do_list(thoughts_resp)
thoughts_list.append(f"With all the information give me a report for the task: {Task}")
print(thoughts_list)


#------------------------------------------------------------------------------------------------------

Action_prompt = f"""
Task: {Task}
Thought : {thoughts_list[0]}
"""

Action_disclaimer = "Write an simple Action for this Thought with correct syntax"

Markdown_prompt_editor = """
Your a Editorial Manager in AGNOS Business solutions. Name 'Tillu', where you need to provide a report to your client regrading their request. 
Here is the final draft of the report, try to build some hidden insights from this and write it in final report, write this draft into beautiful markdown, if already in markdown, try to make it better and clear.
And in final write your opinion in paragraph. Make the report better and bigger.
"""

i = 0
for i in range(len(thoughts_list)):
    print(f"------------ITEARATION {i}------------------")
    if i > 0: 
        try:
            Action_prompt = Action_prompt + "Observation: " + observation
        except Exception as e:
            observation = " "
            Action_prompt = Action_prompt + "Observation: " + observation
            
        Action_prompt = summary_context(Action_prompt)
        Action_prompt = f"{Action_prompt}\n Thought : {thoughts_list[i]} "

    Action_prompt_full = f"{Example_prompt_Actions}\n {Action_prompt} \n {Action_disclaimer} "
    print("********************FULL-PROMPT-START********************")
    print(Action_prompt_full)
    print("********************FULL-PROMPT-END********************")
    print("\n\n")
    Action_resp = O_LLM(Action_prompt_full)
    print(Action_resp)
    try:
        observation = actions_perform(Action_resp,thoughts_list[i])
        print(observation)
    except Exception as e:
        print(e)
        observation = " "
    
    itr = len(thoughts_list)
    if i == (itr-1):
        Markdown_prompt_full = f"{Markdown_prompt_editor} \n Draft: {Action_prompt} \n {observation}"
        final_report = O_LLM(Markdown_prompt_full)
        Markdown_pdf(final_report)
    

Thought 1: First I need to find, recent earthquake in taiwan in internet
Thought 2: Second, Make a list of the basic information regarding the earthquake such as time, location, and magnitude
Thought 3: Third, Now search how many people died,injured, building collapsed due to the earthquake
Thought 4: Fourth, Now With all the companies information write a important summary of the event
Thought 5: With all the information, I need to find what are the after effects of earthquake
['First I need to find, recent earthquake in taiwan in internet', 'Second, Make a list of the basic information regarding the earthquake such as time, location, and magnitude', 'Third, Now search how many people died,injured, building collapsed due to the earthquake', 'Fourth, Now With all the companies information write a important summary of the event', 'With all the information, I need to find what are the after effects of earthquake', 'With all the information give me a report for the task: Perform a detailed