**DYNAMIC QUIZ GENERATION USING POS TAGGING INCLUDING ANSWERS:**

In [7]:
import nltk
import numpy as np
from pptx import Presentation

class SubjectiveTest:
    def __init__(self, data, noOfQues):
        self.question_pattern = [
            "Explain in detail ",
            "Define ",
            "Write a short note on ",
            "What do you mean by "
        ]
        self.grammar = r"""
            CHUNK: {<NN>+<IN|DT>*<NN>+}
            {<NN>+<IN|DT>*<NNP>+}
            {<NNP>+<NNS>*}
        """
        self.summary = data
        self.noOfQues = noOfQues

    @staticmethod
    def word_tokenizer(sequence):
        word_tokens = []
        for sent in nltk.sent_tokenize(sequence):
            for w in nltk.word_tokenize(sent):
                word_tokens.append(w)
        return word_tokens

    def create_vector(answer_tokens, tokens):
        return np.array([1 if tok in answer_tokens else 0 for tok in tokens])

    def cosine_similarity_score(vector1, vector2):
        def vector_value(vector):
            return np.sqrt(np.sum(np.square(vector)))
        v1 = vector_value(vector1)
        v2 = vector_value(vector2)
        v1_v2 = np.dot(vector1, vector2)
        return (v1_v2 / (v1 * v2)) * 100

    def generate_test(self):
        sentences = nltk.sent_tokenize(self.summary)
        cp = nltk.RegexpParser(self.grammar)
        question_answer_dict = dict()
        for sentence in sentences:
            tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
            tree = cp.parse(tagged_words)
            for subtree in tree.subtrees():
                if subtree.label() == "CHUNK":
                    temp = ""
                    for sub in subtree:
                        temp += sub[0]
                        temp += " "
                    temp = temp.strip()
                    temp = temp.upper()
                    if temp not in question_answer_dict:
                        if len(nltk.word_tokenize(sentence)) > 20:
                            question_answer_dict[temp] = sentence
                    else:
                        question_answer_dict[temp] += sentence
        keyword_list = list(question_answer_dict.keys())
        question_answer = []
        for _ in range(int(self.noOfQues)):
            rand_num = np.random.randint(0, len(keyword_list))
            selected_key = keyword_list[rand_num]
            answer = question_answer_dict[selected_key]
            rand_num %= 4
            question = self.question_pattern[rand_num] + selected_key + "."
            question_answer.append({"Question": question, "Answer": answer})
        que = []
        ans = []
        while len(que) < int(self.noOfQues):
            rand_num = np.random.randint(0, len(question_answer))
            if question_answer[rand_num]["Question"] not in que:
                que.append(question_answer[rand_num]["Question"])
                ans.append(question_answer[rand_num]["Answer"])
            else:
                continue
        return que, ans

def extract_text_from_ppt(ppt_file):
    prs = Presentation(ppt_file)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text.strip()

# Extract text from the PowerPoint presentation
ppt_file = "/content/Module2.pptx"
lecture_text = extract_text_from_ppt(ppt_file)

# Generate Subjective test
noOfQues = 5  # Number of questions to generate
subjective_generator = SubjectiveTest(lecture_text, noOfQues)

subjective_questions, subjective_answers = subjective_generator.generate_test()

# Printing generated questions and answers
print("Subjective Questions:")
for i, question in enumerate(subjective_questions):
    print(f"Question {i+1}: {question}")
    print(f"Answer {i+1}: {subjective_answers[i]}")
    print()


Subjective Questions:
Question 1: Define DATA PROCESSING LAYER.
Answer 1: Stitch, Apache Kafta, Blendo

The data processing layer optimize the data to facilitate more efficient analysis, and provide a compute engine to run the queries.

Question 2: Define CLUSTER MANAGEMENT.
Answer 2: Big Data Technologies
Cloudera is a commercial Hadoop distribution that includes enterprise-grade features such as Cloudera Manager for cluster management, integrated security, and data governance.

Question 3: Define HADOOP ECOSYSTEM.
Answer 3: It includes:
	– MapReduce – offline computing engine
	– HDFS – Hadoop distributed file system
	– HBase– online data access

Hadoop Ecosystem: Internal software architecture
Hadoop Ecosystem Projects includes:
Hadoop Common utilities
Avro: A data serialization system with scripting languages.

Question 4: Define VIDEOS.
Answer 4: Flexibility: can deal with any kind of dataset like structured(MySql Data), Semi-Structured(XML, JSON), Un-structured (Images and Videos)

**DYNAMIC QUIZ GENERATION USING POS TAGGING**

In [6]:
import nltk
import numpy as np
from pptx import Presentation

class SubjectiveTest:
    def __init__(self, data, noOfQues):
        self.question_pattern = [
            "Explain in detail ",
            "Define ",
            "Write a short note on ",
            "What do you mean by "
        ]
        self.grammar = r"""
            CHUNK: {<NN>+<IN|DT>*<NN>+}
            {<NN>+<IN|DT>*<NNP>+}
            {<NNP>+<NNS>*}
        """
        self.summary = data
        self.noOfQues = noOfQues

    @staticmethod
    def word_tokenizer(sequence):
        word_tokens = []
        for sent in nltk.sent_tokenize(sequence):
            for w in nltk.word_tokenize(sent):
                word_tokens.append(w)
        return word_tokens

    def generate_test(self):
        sentences = nltk.sent_tokenize(self.summary)
        cp = nltk.RegexpParser(self.grammar)
        question_answer_dict = dict()
        for sentence in sentences:
            tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
            tree = cp.parse(tagged_words)
            for subtree in tree.subtrees():
                if subtree.label() == "CHUNK":
                    temp = ""
                    for sub in subtree:
                        temp += sub[0]
                        temp += " "
                    temp = temp.strip()
                    temp = temp.upper()
                    if temp not in question_answer_dict:
                        if len(nltk.word_tokenize(sentence)) > 20:
                            question_answer_dict[temp] = sentence
                    else:
                        question_answer_dict[temp] += sentence
        keyword_list = list(question_answer_dict.keys())
        question_answer = []
        for _ in range(int(self.noOfQues)):
            rand_num = np.random.randint(0, len(keyword_list))
            selected_key = keyword_list[rand_num]
            rand_num %= 4
            question = self.question_pattern[rand_num] + selected_key + "."
            question_answer.append(question)
        return question_answer

def extract_text_from_ppt(ppt_file):
    prs = Presentation(ppt_file)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text.strip()

# Extract text from the PowerPoint presentation
ppt_file = "/content/Module2.pptx"
lecture_text = extract_text_from_ppt(ppt_file)

# Generate Subjective test
noOfQues = 5  # Number of questions to generate
subjective_generator = SubjectiveTest(lecture_text, noOfQues)

subjective_questions = subjective_generator.generate_test()

# Printing generated questions
print("Subjective Questions:")
for i, question in enumerate(subjective_questions):
    print(f"Question {i+1}: {question}")
    print()


Subjective Questions:
Question 1: What do you mean by APACHE HADOOP.

Question 2: Define YARN.

Question 3: Write a short note on DISTRIBUTION FOR HADOOP.

Question 4: Explain in detail PRODUCTION USE.

Question 5: Explain in detail BIG DATA.



In [30]:
import nltk
import numpy as np
from pptx import Presentation

class SubjectiveTest:
    def __init__(self, data, noOfQues):
        self.question_pattern = [
            "Explain in detail ",
            "Define ",
            "Write a short note on ",
            "What do you mean by "
        ]
        self.grammar = r"""
            CHUNK: {<NN>+<IN|DT>*<NN>+}
            {<NN>+<IN|DT>*<NNP>+}
            {<NNP>+<NNS>*}
        """
        self.summary = data
        self.noOfQues = noOfQues

    @staticmethod
    def word_tokenizer(sequence):
        word_tokens = []
        for sent in nltk.sent_tokenize(sequence):
            for w in nltk.word_tokenize(sent):
                word_tokens.append(w)
        return word_tokens

    def generate_test(self):
        sentences = nltk.sent_tokenize(self.summary)
        cp = nltk.RegexpParser(self.grammar)
        question_answer_dict = dict()
        for sentence in sentences:
            tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
            tree = cp.parse(tagged_words)
            for subtree in tree.subtrees():
                if subtree.label() == "CHUNK":
                    temp = ""
                    for sub in subtree:
                        temp += sub[0]
                        temp += " "
                    temp = temp.strip()
                    temp = temp.upper()
                    if temp not in question_answer_dict:
                        if len(nltk.word_tokenize(sentence)) > 20:
                            question_answer_dict[temp] = sentence
                    else:
                        question_answer_dict[temp] += sentence
        keyword_list = list(question_answer_dict.keys())
        question_answer = []
        for _ in range(int(self.noOfQues)):
            rand_num = np.random.randint(0, len(keyword_list))
            selected_key = keyword_list[rand_num]
            rand_num %= 4
            question = self.question_pattern[rand_num] + selected_key + "."
            question_answer.append(question)
        return question_answer

def extract_text_from_ppt(ppt_file):
    prs = Presentation(ppt_file)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text.strip()

# Extract text from the PowerPoint presentation
ppt_file = "/content/PPT 2=Lecture 2 Types and Evolution of Money.pptx"
lecture_text = extract_text_from_ppt(ppt_file)

# Generate Subjective test
noOfQues = 5  # Number of questions to generate
subjective_generator = SubjectiveTest(lecture_text, noOfQues)

subjective_questions = subjective_generator.generate_test()

# Printing generated questions
print("Subjective Questions:")
for i, question in enumerate(subjective_questions):
    print(f"Question {i+1}: {question}")
    print()


Subjective Questions:
Question 1: Define LEGAL.

Question 2: Explain in detail LEGAL TENDER MONEY.

Question 3: Define GRESHAM.

Question 4: What do you mean by PAYMENT MESSAGE.

Question 5: Write a short note on MONEY E-MONEY.



In [10]:
import nltk
import numpy as np
from pptx import Presentation

class SubjectiveTest:
    def __init__(self, data):
        self.question_pattern = [
            "Explain in detail ",
            "Define ",
            "Write a short note on ",
            "What do you mean by "
        ]
        self.grammar = r"""
            CHUNK: {<NN>+<IN|DT>*<NN>+}
            {<NN>+<IN|DT>*<NNP>+}
            {<NNP>+<NNS>*}
        """
        self.summary = data

    @staticmethod
    def word_tokenizer(sequence):
        word_tokens = []
        for sent in nltk.sent_tokenize(sequence):
            for w in nltk.word_tokenize(sent):
                word_tokens.append(w)
        return word_tokens

    def generate_test(self, noOfQues=5):
        sentences = nltk.sent_tokenize(self.summary)
        cp = nltk.RegexpParser(self.grammar)
        question_answer_dict = dict()
        for sentence in sentences:
            tagged_words = nltk.pos_tag(nltk.word_tokenize(sentence))
            tree = cp.parse(tagged_words)
            for subtree in tree.subtrees():
                if subtree.label() == "CHUNK":
                    temp = ""
                    for sub in subtree:
                        temp += sub[0]
                        temp += " "
                    temp = temp.strip()
                    temp = temp.upper()
                    if temp not in question_answer_dict:
                        if len(nltk.word_tokenize(sentence)) > 20:
                            question_answer_dict[temp] = sentence
                    else:
                        question_answer_dict[temp] += sentence
        keyword_list = list(question_answer_dict.keys())
        question_answer = []
        for _ in range(int(noOfQues)):
            rand_num = np.random.randint(0, len(keyword_list))
            selected_key = keyword_list[rand_num]
            rand_num %= 4
            question = self.question_pattern[rand_num] + selected_key + "."
            question_answer.append(question)
        return question_answer

def extract_text_from_ppt(ppt_file):
    prs = Presentation(ppt_file)
    text = ""
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text.strip()

# Extract text from the PowerPoint presentation
ppt_file = "/content/Module2.pptx"
lecture_text = extract_text_from_ppt(ppt_file)

# Generate Subjective test
noOfQues = int(input("Enter the number of questions to generate: "))  # Number of questions to generate
subjective_generator = SubjectiveTest(lecture_text)

subjective_questions = subjective_generator.generate_test(noOfQues)

# Printing generated questions
print("Subjective Questions:")
for i, question in enumerate(subjective_questions):
    print(f"Question {i+1}: {question}")
    print()


Enter the number of questions to generate: 7
Subjective Questions:
Question 1: What do you mean by APACHE KAFTA.

Question 2: What do you mean by BI TOOLS.

Question 3: Explain in detail BIG DATA.

Question 4: Explain in detail PRODUCTION USE.

Question 5: Write a short note on – MAPREDUCE –.

Question 6: Explain in detail OPEN SOURCE.

Question 7: Explain in detail SEMI-STRUCTURED.

