# Importing Libraries

In [2]:
%%capture
!pip install -q pypandoc pdflatex python-pptx
!pip install -q auto-gptq optimum
!sudo apt install texlive
import warnings
warnings.filterwarnings('ignore')

from pypandoc.pandoc_download import download_pandoc
download_pandoc()
import torch
import asyncio
import ctypes, gc
from torch.cuda.amp import autocast
libc = ctypes.CDLL('libc.so.6')


In [3]:
#clear memory before generating response
def clear_memory():
    libc.malloc_trim(0)
    torch.cuda.empty_cache()
    gc.collect()
clear_memory()

In [4]:
#using qwen 2.5 pre-trained model to generate responses
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "/kaggle/input/qwen2.5/transformers/7b-instruct-gptq-int8/1"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
).eval()

tokenizer = AutoTokenizer.from_pretrained(model_name)

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
#function for generating response to user query
def generate_response(messages):
    clear_memory()
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    with torch.no_grad(), autocast():
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=1024
        )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

In [6]:
#Defining the document type prompt to model to understsnd the users requirment by providing examples

DOC_TYPE_CLASS_PROMPT = """
As per the user's query can you categorize the type of documents that is required by the user.
 - Report
 - Letter
 - Email 
 - Summary
 - Presentation
 - Resume
 - Proposal
 - Agreement

The above given are just example of some of the types of documents. 
But you can use your knowledge base also to provide the type of the document that is desired by the user.

Just output the type of document that is desired and no other explanation is required. Think before you 
reply ground it with the user's query whether it is making some sense or not.
"""

In [7]:
def get_document_type(query):
    messages = [
        {"role": "system", "content": DOC_TYPE_CLASS_PROMPT},
        {"role": "user", "content": query}
    ]
    
    return generate_response(messages)

In [8]:
FILE_NAME_SUGGESTION_PROMPT = """
Based on the content provided to you, suggest a suitable and meaningful 
filename that accurately represents the content of the document.

Guidelines:
- Keep it concise yet descriptive.
- Use relevant keywords from the query.
- Avoid special characters except underscores (_) or hyphens (-).
- Follow common naming conventions (e.g., snake_case, camelCase, or Title-Case).
- If applicable, include a date or version number for clarity.
- Make sure that the filename is not too long.
- Do not include extensions of the file type.

Just output the suggested filename and no other explanation is required. 
Ensure it aligns logically with the user's request.
"""

def get_suggested_filename(query):
    messages = [
        {"role": "system", "content": FILE_NAME_SUGGESTION_PROMPT},
        {"role": "user", "content": query}
    ]
    
    return generate_response(messages)

In [9]:
get_suggested_filename("""
Dear [HR Manager's Name],

I hope this message finds you well. I am writing to formally request a leave of absence for three consecutive days, specifically from March 15 to March 17, 2023. During this period, I will be attending a personal matter that requires my immediate attention.

I have ensured that all my ongoing tasks will be completed or handed over to my colleagues before my departure to minimize any disruption to the team’s workflow. I will also be available via email and phone during my leave if any urgent matters arise.

Thank you for considering my request. Please let me know if there is any additional information needed or if you require further details.

Best regards,

[Your Full Name]
[Your Position]
[Your Contact Information]

""")

'Leave_Request_March_2023.pdf'

In [10]:
#Defining some example of queries which can be asked by user
queries = [
    "Can you write a report on the topic Digital marketing?",
    
    "Can you help me write a termination letter for Adyant Pandey working in the Data Science department for the policy breach?",
    
    "I have a review tommorow and I haven't created a presentation yet can you help me generate it on SOTA models in AI?",
    
    "I need a draft of a confidentiality agreement for our new business partner.",
    
    "Write a proposal for a new software development project for our tech team.",

    "Can you draft a research report on renewable energy adoption in developing countries?",

    "Draft an email to invite investors to our upcoming product launch event.",

    "Can you generate a professional resume for a AI Engineer with 3 years of experience?"
]

In [None]:
for query in queries:
    doc_type = get_document_type(query)
    print("="*100)
    print(f"Query: {query}")
    print(f"Document Type: {doc_type}")
    print("="*100)
    print()

In [11]:
#prompt template for ppt generation
PPT_GEN_PROMPT = """
You are supposed to convert the user's given content in the python list of dictionaries.

[
{"title": "Some title1", "content": "Some content1"},
{"title": "Some title2", "content": "Some content2"},
{"title": "Some title3", "content": "Some content3"},
{"title": "Some title4", "content": "Some content4"}
]

For example:
[
    {"title": "SOTA Models in AI", "content": "Exploring State-of-the-Art AI Models\nYour Name\nApril 2025"},
    {"title": "Introduction", "content": (
        "In the rapidly evolving field of Artificial Intelligence (AI), SOTA (State-of-the-Art) models "
        "represent the cutting-edge advancements that push the boundaries of AI capabilities in various domains."
    )},
    {"title": "What Are SOTA Models?", "content": (
        "SOTA models refer to AI models that achieve the highest level of performance or accuracy "
        "on a specific task or benchmark. These models are typically built using the most advanced techniques, "
        "including deep learning, transformer networks, reinforcement learning, etc."
    )},
    {"title": "Examples of SOTA Models", "content": (
        "1. GPT-3/4 (Language Models): Cutting-edge models for natural language understanding and generation.\n"
        "2. BERT (NLP): Transformer-based model for various NLP tasks like text classification, question answering.\n"
        "3. ResNet (Computer Vision): Deep convolutional networks for image classification and recognition.\n"
        "4. AlphaFold (Protein Folding): Predicting protein structures with unparalleled accuracy."
    )},
    {"title": "Applications of SOTA Models", "content": (
        "1. Healthcare: AI models like AlphaFold assist in drug discovery and protein structure prediction.\n"
        "2. Autonomous Vehicles: SOTA models are used in self-driving cars for object detection and path planning.\n"
        "3. Customer Support: GPT-3 and other NLP models help automate customer service via chatbots.\n"
        "4. Content Creation: Language models like GPT are used to create content, write articles, and generate code."
    )},
    {"title": "Challenges with SOTA Models", "content": (
        "1. Computational Cost: Training SOTA models require massive computational resources.\n"
        "2. Data Bias: SOTA models can inherit biases from the data they are trained on.\n"
        "3. Explainability: Many SOTA models, especially deep learning models, are seen as black boxes.\n"
        "4. Ethical Concerns: Misuse of AI technologies can lead to privacy violations, misinformation, etc."
    )},
    {"title": "Future Directions", "content": (
        "1. Efficiency: Future SOTA models will focus on energy-efficient training and inference.\n"
        "2. Generalization: We may see models that generalize better across different tasks and domains.\n"
        "3. Ethical AI: There will be greater focus on building responsible and ethical AI models.\n"
        "4. Multimodal Models: Combining text, image, and video understanding to create more versatile models."
    )},
    {"title": "Conclusion", "content": (
        "SOTA models in AI are pushing the boundaries of what is possible in AI research and application. "
        "Despite challenges such as resource constraints, bias, and explainability issues, the future looks bright for "
        "AI innovations that can transform industries and improve human life."
    )}
]


Make sure you output only this so, that I can run it using the exec() command in python and get the content in the 
desired variable. Thsi is very critical as if you will make mistake my whole programme will halt because of error.
"""

def get_ppt_content(content):
    messages = [
        {"role": "system", "content": PPT_GEN_PROMPT},
        {"role": "user", "content": content}
    ]
    
    return generate_response(messages)

In [12]:
#prompt template for AI resume generation
RESUME_GEN_PROMPT = """
You are an AI-powered document generator specializing in professional resumes. Your task is to generate a well-structured, ATS-friendly resume in a formal document format (.docx or PDF) based on the given user input. The resume should be tailored for a IT Tech job specialist and follow industry standards.

### **Instructions:**
- Generate the resume in a **formal document format (.docx or PDF)**.
- Ensure **proper formatting**, including bold headers, bullet points, and consistent spacing.
- Keep the resume **concise (1-2 pages)** and optimized for **Applicant Tracking Systems (ATS)**.
- Use industry-relevant keywords and maintain a **clear structure**.
- Do **not** use JSON, LaTeX, or Markdown formatting. The output must be readable in a document format.

### **Resume Structure:**

#### **1. Header**
- Full Name
- Location (City, State)
- Contact Information (Phone, Email, LinkedIn, Portfolio)

#### **2. Professional Summary**
- A brief, impactful summary highlighting the candidate's experience, key skills, and career goals.

#### **3. Education**
- List degrees, universities, and graduation years.

#### **4. Work Experience**
- Job title, company name, and duration.
- Key achievements and responsibilities, written in bullet points.

#### **5. Projects**
- Highlight relevant projects, including problem statements, methodologies, and outcomes.

#### **6. Skills**
- List technical skills such as programming languages, data visualization tools, and databases.

#### **7. Certifications** (if applicable)
- Relevant industry certifications.

#### **8. Achievements/Publications** (if applicable)
- Any awards, research papers, or notable contributions.

### **Example Resume for Data Analyst:**

**John Doe**  
New York, NY | john.pandey@email.com | (123) 456-7890 | linkedin.com/in/johnpandey  

**Professional Summary:**  
Results-driven Data Analyst with 2+ years of experience in data processing, visualization, and statistical modeling. Proficient in Python, SQL, and Power BI, with a strong ability to derive actionable insights from complex datasets.

**Education:**  
- M.Sc. in Data Science, XYZ University, 2022  
- B.Sc. in Mathematics & Statistics, ABC University, 2020  

**Work Experience:**  
**Data Analyst | TechCorp Inc. | Jan 2022 - Present**  
- Analyzed large datasets to identify trends, improving business decision-making by 20%.  
- Developed interactive dashboards using Power BI, reducing reporting time by 40%.  
- Optimized SQL queries, improving database performance by 30%.  

**Skills:**  
- **Programming:** Python, SQL, R  
- **Visualization:** Power BI, Tableau  
- **Databases:** PostgreSQL, MongoDB  
- **Machine Learning:** Scikit-learn, TensorFlow  

**Projects:**  
- Developed a customer segmentation model using clustering algorithms, increasing customer engagement by 25%.  
- Automated data ETL pipelines, reducing manual effort by 50%.
_ Developed a dashboard of pizza data analysis power bi, excel and SQL queries.

**Certifications:**  
- Google Data Analytics Professional Certificate  
- Tableau Desktop Specialist  

**Achievements:**  
- Published research on "Optimizing Predictive Models for Business Intelligence" in XYZ Journal.  

### **Output Format:**
- Generate the resume in a **DOCS or PDF format** without extra formatting symbols.
- Ensure the output is **professional, structured, and ready to use.**
"""


def get_ppt_content(content):
    messages = [
        {"role": "system", "content": RESUME_GEN_PROMPT},
        {"role": "user", "content": content}
    ]
    
    return generate_response(messages)


In [13]:
#function for conversion of ai generated to pdf or docs
def convert_to_pdf_or_doc(content, doc_type="docx"):
    filename = get_suggested_filename(content)
    output_filename = f"{filename}.{doc_type}"
    pypandoc.convert_text(content, to=doc_type, format='md',
                          outputfile=output_filename)
    print(f"File saved as {output_filename}")

In [14]:
import math
import time
import pypandoc
from pptx import Presentation
def convert_to_ppt(ppt_content):
    ppt_content = get_ppt_content(ppt_content)
    filename = get_suggested_filename(ppt_content)
    
    slides_content = []  # Ensure slides_content is defined
    
    try:
        res = ppt_content.replace("```python", "").replace("```", "").strip()
        # Replace typographic quotes/apostrophes with standard ones
        res = res.replace("’", "'").replace("‘", "'").replace("“", "\"").replace("”", "\"")
        
        if not res.startswith("slides_content = "):
            res = "slides_content = " + res
        
        local_scope = {}
        exec(res, {}, local_scope)
        slides_content = local_scope.get("slides_content", [])
        
        if not isinstance(slides_content, list) or not all(isinstance(slide, dict) for slide in slides_content):
            raise ValueError("Invalid slide content format.")
    
    except Exception as e:
        print("PPT Conversion Failed:", str(e))
        return
        
    prs = Presentation()
    
    for slide_info in slides_content:
        slide_layout = prs.slide_layouts[1]
        slide = prs.slides.add_slide(slide_layout)
    
        title = slide.shapes.title
        body = slide.shapes.placeholders[1]
    
        title.text = slide_info.get("title", "Untitled Slide")
        body.text = slide_info.get("content", "No content provided.")
    
    ppt_filename = f'{filename}.pptx'
    prs.save(ppt_filename)
    
    print(f"PowerPoint saved successfully as {ppt_filename}!")


In [17]:
#Prompt template for documenet generation
DOC_GEN_PROMPT = """
You are an expert in the {doc_type} type document generation task. 
Help user to generate a high quality document and analyze the document before 
giving the output for the same because these documents are very crucial to the user 
and will be served as a medium of communication between two parties.

While generating the prompt make sure you follow {doc_tone} tone.

Only the relevant content is required because it will be passed directly
so, you are not supposed to generate any text that may indicate that this has 
been generated from an AI Assistant.
"""

def prepare_document(query, doc_type, tone="Formal"):
    doc_type = doc_type.lower()
    CURR_DOC_GEN_PROMPT = DOC_GEN_PROMPT.format(doc_type=doc_type,
                                               doc_tone=tone)
    
    messages = [
        {"role": "system", "content": CURR_DOC_GEN_PROMPT},
        {"role": "user", "content": query}
    ]

    response = generate_response(messages)
    if doc_type=="presentation":
        convert_to_ppt(response)        
    else:
        convert_to_pdf_or_doc(response)
        
    return response


In [18]:
#function for converting generated text to markdwon for readability
import textwrap
from IPython.display import display, Markdown

def to_markdown(text):
    text = text.replace('•', '  *')  # Convert bullet points to Markdown format
    return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
queries = [
    "Can you write a report on the topic Digital marketing?",
    "Can you help me write a termination letter for Adyant Pandey working in the data science department for the policy breach?"
]

In [None]:
#function for passing multiple queries at a time

for query in queries:
    doc_type = get_document_type(query)
    response = prepare_document(query, doc_type)
    markdown_output = f"""
# Document Generation Result

**Query:**  
> {query}

**Document Type:**  
> {doc_type}

**Generated Document:**  
{response}
    """
    
    display(to_markdown(markdown_output))
    if doc_type.lower() == "presentation":
        convert_to_ppt(response)
        print("PPT generated successfully.")
    

In [None]:
"""for query in queries1:
    doc_type = get_document_type(query)
    response = prepare_document(query, doc_type)
    print("="*100)
    print(f"Query: {query}")
    print(f"Document Type: {doc_type}")
    print(f"Document: {response}")
    print("="*100)
    print()"""""

# Creating API Endpoint

In [16]:
!pip install -q Flask pyngrok

In [17]:
from pyngrok import ngrok
!ngrok authtoken 2vD7PkNnDdhF5GC2bXjcOrx0uin_54fxPGkc69hJrg9P98umQ
tunnel = ngrok.connect(8501, "http", "us")
public_url = tunnel.public_url
print(public_url)

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml                                
https://5277-107-167-177-75.ngrok-free.app


In [18]:
# function for generate single query response
from flask import Flask, request, jsonify

app=Flask(__name__)
@app.route('/predict', methods=['POST'])
def process_query():
    query=request.json['query']
    print(query)
    doc_type = get_document_type(query)  # Determine document type
    response = prepare_document(query, doc_type)  # Generate document
    
    markdown_output = f"""
# Document Generation Result

**Query:**  
> {query}

**Document Type:**  
> {doc_type}

**Generated Document:**  
{response}
    """
    
    display(to_markdown(markdown_output))

    if doc_type.lower() == "presentation":
        convert_to_ppt(response)

    return jsonify({'response': response})
if __name__ == '__main__':
    app.run(host='127.0.0.1', port=8501)

 * Serving Flask app '__main__'
 * Debug mode: off
write a agreement letter for the IT Employee to sign one year bond with 50000 penalty in 100 words only.
File saved as Employee_Contract_Agreement_Company_Name_Date-MM-DD-yyyy_V1.docx


> 
> # Document Generation Result
> 
> **Query:**  
> > write a agreement letter for the IT Employee to sign one year bond with 50000 penalty in 100 words only.
> 
> **Document Type:**  
> > Agreement
> 
> This agreement is made on [Date] between [Employee Name] (hereinafter referred to as "Employee") and [Company Name] (hereinafter referred to as "Company").
> 
> Employee agrees to be bound by a one-year employment contract with the Company, effective from [Start Date] to [End Date]. In the event of premature termination of this contract by the Employee, a penalty of $50,000 shall be imposed.
> 
> By signing below, Employee acknowledges and agrees to the terms outlined herein.
> 
> Employee Signature: ___________________________
> 
> Date: ___________________________
> 
> Company Signature: ___________________________
> 
> Date: ___________________________
> 
> **Generated Document:**  
> This agreement is made on [date] between [Employee Name] (hereinafter referred to as "Employee") and [Company Name] (hereinafter referred to as "Company").
> 
> Employee agrees to be bound by a one-year employment contract with the Company, effective from [Start Date] to [End Date]. In the event of premature termination of this contract by the Employee, a penalty of $50,000 shall be imposed.
> 
> By signing below, Employee acknowledges and agrees to the terms outlined herein.
> 
> Employee Signature: ___________________________
> 
> Date: ___________________________
> 
> Company Signature: ___________________________
> 
> Date: ___________________________
>     

In [19]:
def process_query(query):
    doc_type = get_document_type(query)  # Determine document type
    response = prepare_document(query, doc_type)  # Generate document
    
    markdown_output = f"""
# Document Generation Result

**Query:**  
> {query}

**Document Type:**  
> {doc_type}

**Generated Document:**  
{response}
    """
    
    display(to_markdown(markdown_output))

    if doc_type.lower() == "presentation":
        convert_to_ppt(response)

In [20]:
query35 = "Generate PSUR for Levofloxacin"
process_query(query35)

File saved as PSUR_Levofloxacin_Safety_Update_[Start_Date]-[End_Date].docx


> 
> # Document Generation Result
> 
> **Query:**  
> > Generate PSUR for Levofloxacin
> 
> **Document Type:**  
> > Report
> 
> **Generated Document:**  
> **Post-Approval Safety Update Report (PSUR) for Levofloxacin**
> 
> **1. Introduction**
> 
> This Post-Approval Safety Update Report (PSUR) for Levofloxacin is prepared in accordance with regulatory requirements to provide an overview of the post-marketing safety data collected during the specified period. This report aims to identify and evaluate any new or emerging safety issues associated with the use of Levofloxacin.
> 
> **2. Scope and Period Covered**
> 
> This PSUR covers the period from [Start Date] to [End Date]. The data includes both spontaneous reports and data from other sources such as clinical trials, literature reviews, and observational studies.
> 
> **3. Summary of Safety Data**
> 
> **3.1 Adverse Drug Reactions (ADRs)**
> 
> - **Gastrointestinal Disorders**: Reports of nausea, vomiting, and diarrhea have been observed. These are generally mild to moderate in severity.
> - **Musculoskeletal and Connective Tissue Disorders**: A small number of cases of tendonitis and tendinopathy have been reported, particularly in patients undergoing corticosteroid therapy or who are over 60 years old.
> - **Nervous System Disorders**: Dizziness, headache, and peripheral neuropathy have been noted. In rare cases, more severe neurological events such as seizures have been reported.
> - **Renal and Urinary Disorders**: Cases of acute kidney injury and nephrotoxicity have been observed, primarily in patients with pre-existing renal conditions.
> - **Hepatobiliary Disorders**: There have been isolated reports of hepatic function abnormalities, including elevated liver enzymes and hepatitis.
> 
> **3.2 Serious Adverse Events (SAEs)**
> 
> - **Severe Allergic Reactions**: Including anaphylaxis and angioedema have been reported, necessitating immediate medical intervention.
> - **Serious Infections**: In rare instances, serious infections such as Clostridium difficile-associated diarrhea have been linked to Levofloxacin use.
> - **Suicide Attempts**: Although rare, there have been isolated reports of suicidal ideation and behavior, especially in psychiatric patients.
> 
> **4. Trends and Patterns**
> 
> - **Tendon Disorders**: An increased incidence of tendon disorders, especially in elderly patients and those on concomitant corticosteroid therapy, has been observed.
> - **Liver Function Abnormalities**: A trend towards higher rates of liver enzyme elevations, particularly in patients with underlying liver disease.
> - **Kidney Function Issues**: Persistent reports of acute kidney injury, suggesting a potential nephrotoxic effect.
> 
> **5. Regulatory Actions**
> 
> - **Warnings and Precautions**: The manufacturer has issued updated warnings regarding the risk of tendon disorders, particularly in elderly patients and those on corticosteroids.
> - **Patient Information Sheets**: Revised patient information sheets have been distributed to healthcare providers and patients to highlight the importance of monitoring for adverse effects.
> 
> **6. Recommendations**
> 
> - **Patient Monitoring**: Healthcare providers are advised to closely monitor patients for signs of adverse reactions, especially gastrointestinal, musculoskeletal, nervous system, renal, and hepatic disorders.
> - **Precautionary Measures**: Caution should be exercised when prescribing Levofloxacin to elderly patients and those with pre-existing liver or kidney conditions.
> - **Follow-Up Studies**: Further studies are recommended to explore the long-term effects of Levofloxacin on various organ systems.
> 
> **7. Conclusion**
> 
> The post-marketing surveillance for Levofloxacin continues to identify new safety concerns. It is essential to remain vigilant and implement appropriate measures to mitigate risks and ensure patient safety.
> 
> **8. References**
> 
> - [List of references and sources used in compiling this report]
> 
> **9. Attachments**
> 
> - [List of any additional attachments, such as tables, figures, or supplementary data]
> 
> ---
> 
> **Prepared by:**
> [Name]
> [Title]
> [Contact Information]
> 
> **Reviewed by:**
> [Name]
> [Title]
> [Contact Information]
> 
> **Approved by:**
> [Name]
> [Title]
> [Contact Information]
> 
> ---
> 
> This document is intended for internal use and may be shared with regulatory authorities as required. Please ensure all information is accurate and up-to-date before final submission.
>     

In [None]:
query4 = "Can you generate a professional resume for a AI Engineer with 3 years of experience in a company name Wipro?"
process_query(query4)