**The Objective of the Project was to develop a platform where one can enter a pdf and perform following operations:-**


**1. Generate PDF Overview in terms of Course Outline.**

**2. Generate Questions from PDF in terms of MCQ, Short Answer Questions and Fill in the Blanks.**

**3. Chat with PDF in their original Language.**


# **1. Installing Packages**

In [None]:
!pip install google-generativeai
!pip install transformers
!pip install tika
!pip install PyPDF2
!pip install reportlab

# **2. AnantaLearn**

In [None]:
import PyPDF2
import google.generativeai as genai
from io import BytesIO
import requests
import multiprocessing
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import google.generativeai as genai
from google.colab import userdata
import warnings
from reportlab.lib.pagesizes import letter
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet
import markdown
import time

warnings.filterwarnings("ignore")

# Configure the Google Generative AI API
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

def extract_text_from_pdf(pdf_path):
    with open(pdf_path, "rb") as file:
        pdf_reader = PyPDF2.PdfReader(file)
        text = ""
        for page_num in range(len(pdf_reader.pages)):
            page = pdf_reader.pages[page_num]
            text += page.extract_text()
    return text

def split_text_into_chunks(text, max_chunk_size=2000):
    sentences = text.split('. ')
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) + 1 > max_chunk_size:
            chunks.append(current_chunk)
            current_chunk = sentence
        else:
            if current_chunk:
                current_chunk += ". " + sentence
            else:
                current_chunk = sentence

    if current_chunk:
        chunks.append(current_chunk)

    return chunks

def generate_content(prompt, max_tokens=1024, retries=3, wait=5):
    model = genai.GenerativeModel('gemini-pro')
    for attempt in range(retries):
        try:
            response = model.generate_content(prompt)
            if response.parts:
                return response.text.strip()
            else:
                raise ValueError("No valid parts in response.")
        except Exception as e:
            print(f"Error: {e}")
            if attempt < retries - 1:
                print(f"Retrying in {wait} seconds...")
                time.sleep(wait)
            else:
                return f"Failed to generate content after {retries} attempts."

def translate_text(text, language_name):
    model = genai.GenerativeModel('gemini-pro')
    prompt = f"Translate the following text to {language_name}: {text}"
    response = model.generate_content(prompt)
    if response.parts:
        return response.text.strip()
    else:
        raise ValueError("No valid parts in response.")

def create_prompts(text, task_type):
    prompts = {
        "mcq": f"Read the following text carefully and generate multiple-choice questions. Each question should include:\n"
               f"1. A clear and concise question based on the text.\n"
               f"2. Four answer options (A, B, C, D), with one correct answer clearly indicated.\n"
               f"3. The questions should cover key concepts, definitions, critical points, and significant details discussed in the text.\n"
               f"4. Ensure the options are plausible and relevant to the content.\n\n"
               f"Text:\n{text}\n\nMCQ:",
        "fill_in_the_blank": f"Read the following text thoroughly and generate fill-in-the-blank questions. Each question should include:\n"
                            f"1. A sentence from the text with one key term or concept replaced by a blank.\n"
                            f"2. The correct term or concept that completes the sentence accurately.\n"
                            f"3. Focus on important information, such as key terms, dates, names, and concepts that are critical to understanding the text.\n\n"
                            f"Text:\n{text}\n\nFill in the blank:",
        "short_answer": f"Read the following text attentively and generate short answer questions. Each question should include:\n"
                        f"1. A clear and specific question that requires a brief response.\n"
                        f"2. The response should address key points, explanations, or definitions provided in the text.\n"
                        f"3. Ensure the questions encourage critical thinking and comprehension of the material, focusing on important details and concepts.\n\n"
                        f"Text:\n{text}\n\nShort answer question:",
        "course": f"Read the following text and generate a comprehensive, structured curriculum content. The content should include:\n"
                  f"1. Learning objectives and outcomes.\n"
                  f"2. Topic-wise breakdown with detailed descriptions.\n"
                  f"3. Key concepts, definitions, and explanations.\n"
                  f"4. Examples, illustrations, and case studies.\n"
                  f"5. Assessment and evaluation criteria.\n\n"
                  f"Text:\n{text}\n\nCurriculum Content:",
    }
    return prompts.get(task_type, "")

def get_user_input(prompt):
    return input(prompt)

def chatbot():
    pdf_path = get_user_input("Enter the path to your PDF file: ")
    pdf_text = extract_text_from_pdf(pdf_path)

    languages = [
        "Arabic", "Czech", "German", "English", "Spanish", "Estonian", "Finnish", "French", "Gujarati",
        "Hindi", "Italian", "Japanese", "Kazakh", "Korean", "Lithuanian", "Latvian", "Burmese", "Nepali",
        "Dutch", "Romanian", "Russian", "Sinhala", "Turkish", "Vietnamese", "Chinese", "Afrikaans",
        "Azerbaijani", "Bengali", "Persian", "Hebrew", "Croatian", "Indonesian", "Georgian", "Khmer",
        "Macedonian", "Malayalam", "Mongolian", "Marathi", "Polish", "Pashto", "Portuguese", "Swedish",
        "Swahili", "Tamil", "Telugu", "Thai", "Tagalog", "Ukrainian", "Urdu", "Xhosa", "Galician",
        "Slovene"
    ]

    print("Available languages:")
    for language in languages:
        print(language)

    while True:
        language_choice = get_user_input("Choose a language for the output: ")
        if language_choice in languages:
            language_name = language_choice
            break
        else:
            print("Invalid choice. Please choose a valid language.")

    text_chunks = split_text_into_chunks(pdf_text)

    while True:
        main_choice = get_user_input("Choose an option: (1) Generate Course, (2) Generate Questions, (3) Chat with PDF, (4) Exit: ").lower()
        if main_choice not in ['1', '2', '3', '4']:
            print("Invalid choice. Please choose either '1', '2', '3', or '4'.")
            continue

        if main_choice == '1':
            prompt = create_prompts(pdf_text, "course")
            course_outline = generate_content(prompt)
            translated_course_outline = translate_text(course_outline, language_name)
            print("Course Outline and Lesson Plan:")
            print(translated_course_outline)

        elif main_choice == '2':
            previous_question_type = None
            while True:
                question_type = get_user_input("Choose the type of questions to generate (mcq, fill_in_the_blank, short_answer): ").lower()
                if question_type not in ['mcq', 'fill_in_the_blank', 'short_answer']:
                    print("Invalid choice. Please choose either 'mcq', 'fill_in_the_blank', or 'short_answer'.")
                    continue
                if question_type == previous_question_type:
                    print(f"You've already generated {question_type} questions. Please choose a different type.")
                    continue

                num_questions = int(get_user_input("Enter the number of questions to generate (5, 10, 15): "))
                if num_questions not in [5, 10, 15]:
                    print("Invalid number of questions. Please choose either 5, 10, or 15.")
                    continue

                questions = []
                for chunk in text_chunks:
                    prompt = create_prompts(chunk, question_type)
                    question = generate_content(prompt)
                    translated_question = translate_text(question, language_name)
                    questions.append(translated_question)

                print(f"{question_type.upper()} Questions:")
                for question in questions[:num_questions]:
                    print(question)
                    print()

                another_round = get_user_input("Do you want to generate a different type of questions? (yes/no): ").lower()
                if another_round != 'yes':
                    break
                previous_question_type = question_type

        elif main_choice == '3':  # Option 3: Chat with PDF
            while True:
                user_query = get_user_input("Ask a question about the content of the PDF: ")
                prompt = f"""
                You are an advanced AI assistant that has access to the content of a provided PDF document. Your task is to answer user questions based on the information contained within the PDF. Please follow these instructions carefully:

                1. **Understanding the PDF Content**:
                   - You have full access to the text of the PDF.
                   - Read and analyze the content to extract relevant information in order to answer the questions accurately.

                2. **Responding to User Queries**:
                   - When a user asks a question, first clarify the main topic or keywords within the question.
                   - Search for relevant information in the PDF text that corresponds to the user's query.

                3. **Answer Format**:
                   - Provide clear, concise, and accurate answers.
                   - If the answer cannot be found, politely inform the user that the information is unavailable.
                   - Ensure that all information provided is based on the PDF content only.

                4. **Language and Clarity**:
                   - After formulating your answer, translate it into the specified output language chosen by the user (e.g., Hindi, Spanish, etc.).
                   - Ensure that the translation maintains the meaning and clarity of the original response.

                4. **Additional Information**:
                  - If appropriate, provide additional context, examples, or details that may help the user understand the answer better.
                  - Include references to specific sections, figures, or pages in the PDF if relevant, to guide the user to more detailed information.

                5. **User Engagement**:
                  - Encourage the user to ask follow-up questions or seek clarification on your answers.
                  - Maintain a friendly and helpful tone throughout the interaction.

                **User Query**: {user_query}
                **PDF Content**: {pdf_text}
                """
                answer = generate_content(prompt)
                translated_answer = translate_text(answer, language_name)
                print("AI:", translated_answer)

                another_question = get_user_input("Do you want to ask another question? (yes/no): ").lower()
                if another_question != 'yes':
                    break

        elif main_choice == '4':
            print("Exiting the program. Goodbye!")
            break  # This break statement correctly exits the main loop.

# Start the chatbot
if __name__ == "__main__":
    chatbot()


Enter the path to your PDF file: /content/Profile.pdf
Available languages:
Arabic
Czech
German
English
Spanish
Estonian
Finnish
French
Gujarati
Hindi
Italian
Japanese
Kazakh
Korean
Lithuanian
Latvian
Burmese
Nepali
Dutch
Romanian
Russian
Sinhala
Turkish
Vietnamese
Chinese
Afrikaans
Azerbaijani
Bengali
Persian
Hebrew
Croatian
Indonesian
Georgian
Khmer
Macedonian
Malayalam
Mongolian
Marathi
Polish
Pashto
Portuguese
Swedish
Swahili
Tamil
Telugu
Thai
Tagalog
Ukrainian
Urdu
Xhosa
Galician
Slovene
Choose a language for the output: Urdu
Choose an option: (1) Generate Course, (2) Generate Questions, (3) Chat with PDF, (4) Exit: 3
Ask a question about the content of the PDF: Give the experience.
AI: سوال پی ڈی ایف میں درج اتکرش راج کے تجربے کے بارے میں پوچھتا ہے۔ اس سوال کا جواب دینے کے لیے، میں نے پی ڈی ایف کے تجربے کے حصے سے متعلق معلومات نکالی ہیں اور اسے ذیل میں سمری شکل میں پیش کیا ہے:

**تجربہ**

113 انڈسٹریز میں * **جنرل اے آئی انجینئر انٹرن** (جون 2024 - موجودہ)
اومڈینا میں * **جونیئر م