In [None]:
from sentence_transformers import SentenceTransformer, CrossEncoder, util
import torch
from nltk import sent_tokenize
import nltk
import re
import spacy
import pytextrank
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("textrank")
nltk.download('punkt')

def semanticsearch(para, topic, k=5):

    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """

    # Separates the sentences in the given para
    passage = sent_tokenize(para)

    # Loads the Bi-Encoder Model 
    bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
    bi_encoder.max_seq_length = 256     #Truncate long passages to 256 tokens

    # Loads the Cross-Encoder Reranker
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    # embedding the paragraph and topic
    corpus_embeddings = bi_encoder.encode(passage, convert_to_tensor=True, show_progress_bar=True)
    question_embedding = bi_encoder.encode(topic, convert_to_tensor=True)
    
    # enables gpu if available
    if torch.cuda.is_available():
        question_embedding = question_embedding.cuda()

    # Select 2 * k sentences from the para using the bi-encoder
    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=k*2)
    hits = hits[0]  

    # Reranks the selected sentences and helps select k sentences
    cross_inp = [[topic, passage[hit['corpus_id']]] for hit in hits]
    cross_scores = cross_encoder.predict(cross_inp)

    for idx in range(len(cross_scores)):
        hits[idx]['cross-score'] = cross_scores[idx]

    results = []
    hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
    for hit in hits[0:min(k,len(passage))]:
        results.append(passage[hit['corpus_id']].lower())
    
    return results



def find_keyphrase(para):

    """
    Selects the best keyword from the text

    Parameters
    ----------
    sent : string
        Sentence

    Returns
    -------
    word : string
        the best keyword from the text
          
    """

    # To avoid alpha and numeric values as the keywords
    doc = nlp(para)
    i = 0
    while i < len(doc._.phrases):
        word = doc._.phrases[0].text
        word = re.sub(r'[^\w\s]', ' ', word)
        temp = re.sub(' ', '', word)
        if temp.isnumeric():
            return word
        elif temp.isalpha():
            return word
        i += 1
    
    return False

def sent_ans_extractor(para, title , k=5):
    
    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.
    Selects the best keyword from the text

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """
    new_sents = []
    words = []

    sents = semanticsearch(para, title, k)
    for i in sents:
        word = find_keyphrase(i)
        if word == False:
            continue
        new_sents.append(i)
        words.append(word)
    return new_sents, words

In [None]:
para = """Neural networks, also known as artificial neural networks (ANNs) or simulated neural networks (SNNs), are a subset of machine learning and are at the heart of deep learning algorithms. Their name and structure are inspired by the human brain, mimicking the way that biological neurons signal to one another.

Artificial neural networks (ANNs) are comprised of a node layers, containing an input layer, one or more hidden layers, and an output layer. Each node, or artificial neuron, connects to another and has an associated weight and threshold. If the output of any individual node is above the specified threshold value, that node is activated, sending data to the next layer of the network. Otherwise, no data is passed along to the next layer of the network.

Neural networks rely on training data to learn and improve their accuracy over time. However, once these learning algorithms are fine-tuned for accuracy, they are powerful tools in computer science and artificial intelligence, allowing us to classify and cluster data at a high velocity. Tasks in speech recognition or image recognition can take minutes versus hours when compared to the manual identification by human experts. One of the most well-known neural networks is Google’s search algorithm.

Once an input layer is determined, weights are assigned. These weights help determine the importance of any given variable, with larger ones contributing more significantly to the output compared to other inputs. All inputs are then multiplied by their respective weights and then summed. Afterward, the output is passed through an activation function, which determines the output. If that output exceeds a given threshold, it “fires” (or activates) the node, passing data to the next layer in the network. This results in the output of one node becoming in the input of the next node. This process of passing data from one layer to the next layer defines this neural network as a feedforward network.

If we use the activation function from the beginning of this section, we can determine that the output of this node would be 1, since 6 is greater than 0. In this instance, you would go surfing; but if we adjust the weights or the threshold, we can achieve different outcomes from the model. When we observe one decision, like in the above example, we can see how a neural network could make increasingly complex decisions depending on the output of previous decisions or layers.

In the example above, we used perceptrons to illustrate some of the mathematics at play here, but neural networks leverage sigmoid neurons, which are distinguished by having values between 0 and 1. Since neural networks behave similarly to decision trees, cascading data from one node to another, having x values between 0 and 1 will reduce the impact of any given change of a single variable on the output of any given node, and subsequently, the output of the neural network.

Neural networks can be classified into different types, which are used for different purposes. While this isn’t a comprehensive list of types, the below would be representative of the most common types of neural networks that you’ll come across for its common use cases:

The perceptron is the oldest neural network, created by Frank Rosenblatt in 1958.

Feedforward neural networks, or multi-layer perceptrons (MLPs), are what we’ve primarily been focusing on within this article. They are comprised of an input layer, a hidden layer or layers, and an output layer. While these neural networks are also commonly referred to as MLPs, it’s important to note that they are actually comprised of sigmoid neurons, not perceptrons, as most real-world problems are nonlinear. Data usually is fed into these models to train them, and they are the foundation for computer vision, natural language processing, and other neural networks.

Convolutional neural networks (CNNs) are similar to feedforward networks, but they’re usually utilized for image recognition, pattern recognition, and/or computer vision. These networks harness principles from linear algebra, particularly matrix multiplication, to identify patterns within an image.

Recurrent neural networks (RNNs) are identified by their feedback loops. These learning algorithms are primarily leveraged when using time-series data to make predictions about future outcomes, such as stock market predictions or sales forecasting.

Deep Learning and neural networks tend to be used interchangeably in conversation, which can be confusing. As a result, it’s worth noting that the “deep” in deep learning is just referring to the depth of layers in a neural network. A neural network that consists of more than three layers—which would be inclusive of the inputs and the output—can be considered a deep learning algorithm. A neural network that only has two or three layers is just a basic neural network.

"""
title = 'what are neural networks'

In [None]:
sent_ans_extractor(para, title , k=5)

In [None]:
### WORKING VERSION ###

from sentence_transformers import SentenceTransformer, CrossEncoder, util
import torch
from nltk import sent_tokenize
import nltk
import re
import spacy
import pytextrank
from transformers import T5Tokenizer, T5ForConditionalGeneration
from QuestionGenerator import QuestionGenerator
import random
from collections import OrderedDict
from sense2vec import Sense2Vec
import requests
from bs4 import BeautifulSoup
from nltk.corpus import wordnet as wn
import random
import pandas as pd 
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("textrank")
nltk.download('punkt')

def semanticsearch(para, topic, k=5):

    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """

    # Separates the sentences in the given para
    passage = sent_tokenize(para)

    # Loads the Bi-Encoder Model 
    bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
    bi_encoder.max_seq_length = 256     #Truncate long passages to 256 tokens

    # Loads the Cross-Encoder Reranker
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    # embedding the paragraph and topic
    corpus_embeddings = bi_encoder.encode(passage, convert_to_tensor=True, show_progress_bar=True)
    question_embedding = bi_encoder.encode(topic, convert_to_tensor=True)
    
    # enables gpu if available
    if torch.cuda.is_available():
        question_embedding = question_embedding.cuda()

    # Select 2 * k sentences from the para using the bi-encoder
    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=k*2)
    hits = hits[0]  

    # Reranks the selected sentences and helps select k sentences
    cross_inp = [[topic, passage[hit['corpus_id']]] for hit in hits]
    cross_scores = cross_encoder.predict(cross_inp)

    for idx in range(len(cross_scores)):
        hits[idx]['cross-score'] = cross_scores[idx]

    results = []
    hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
    for hit in hits[0:min(k,len(passage))]:
        results.append(passage[hit['corpus_id']].lower())
    
    return results



def find_keyphrase(para):

    """
    Selects the best keyword from the text

    Parameters
    ----------
    sent : string
        Sentence

    Returns
    -------
    word : string
        the best keyword from the text
          
    """

    # To avoid alpha and numeric values as the keywords
    doc = nlp(para)
    i = 0
    while i < len(doc._.phrases):
        word = doc._.phrases[0].text
        word = re.sub(r'[^\w\s]', ' ', word)
        temp = re.sub(' ', '', word)
        if temp.isnumeric():
            return word
        elif temp.isalpha():
            return word
        i += 1
    
    return False

def sent_ans_extractor(para, topic , k=5):
    
    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.
    Selects the best keyword from the text

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """
    new_sents = []
    words = []

    sents = semanticsearch(para, topic, k)
    for i in sents:
        word = find_keyphrase(i)
        if word == False:
            continue
        new_sents.append(i)
        words.append(word)
    return new_sents, words

def question_generator(sentence, answer):
    
    """
    Takes the sentence and the answer as the input
    to generate a question

    Parameters
    ----------
    sentece : string
        Text for the sentence
    answer : string
        Text for the answer

    Returns
    -------
    question : string
        Text for the string
          
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"

    my_model_params = { "MODEL_DIR": "./outputs/final/", 
                    "MAX_SOURCE_TEXT_LENGTH": 75
                    } 

    # encode text
    tokenizer = T5Tokenizer.from_pretrained(my_model_params["MODEL_DIR"])
    tokenizer.add_special_tokens({'additional_special_tokens': ['<answer>', '<context>']})

    # using T5 with language model layer
    model = T5ForConditionalGeneration.from_pretrained(my_model_params["MODEL_DIR"])
    model = model.to(device)  
    
    # prepare input
    qg_input = f"<answer> {answer} <context> {sentence}"

    # generate question
    qg =  QuestionGenerator(model, tokenizer, device, max_input_length=75, max_output_length=25)
    question = qg.generate(source_text=qg_input)
    return question


def find_related_word_online(word):

    """
    Takes word/phrase as an input and generates similar words/phrases 
    aka distractors using webscrapping from relatedwords.org website

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for

    Returns
    -------
    words : list of strings
        List of distractors for the given input
          
    """
    r = requests.get("https://relatedwords.org/relatedto/" + word)
    soup = BeautifulSoup(r.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' or install html5lib
    sent = soup.prettify()[soup.prettify().find('"terms"'):]
    words = []
    count = 0
    while count != 3:
        ind1 = sent.find('"word":')+8
        ind2 = sent[ind1:].find('"')+ind1
        words.append(sent[ind1:ind2])
        sent = sent[ind2:]
        count+=1
    return words

def sense2vec_get_words(word,s2v):
    
    """
    Takes word/phrase as an input and generates similar words/phrases 
    aka distractors using sense2vec

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for
    s2v : Module instance from Sense2Vec class
        Module instance from Sense2Vec class to generate distractor

    Returns
    -------
    distractors : list of strings
        List of distractors for the given input
          
    """
    output = []
    word = word.lower()
    word = word.replace(" ", "_")

    sense = s2v.get_best_sense(word)

    if sense == None:
        return find_related_word_online(word)

    most_similar = s2v.most_similar(sense, n=20)
 
    for each_word in most_similar:
        append_word = each_word[0].split("|")[0].replace("_", " ").lower()
        if append_word.lower() != word:
            if sense.split("|")[1] == each_word[0].split("|")[1]:
                output.append(append_word.title().lower())

    out = list(OrderedDict.fromkeys(output))
    return out

def get_distractors(word):
    
    """
    Takes word/phrase as an input and generates similar words/phrases aka distractors

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for

    Returns
    -------
    distractors : list of strings
        List of distractors for the given input
          
    """
    distractors = []
    if word.isnumeric():
        if len(word) == 4:
            # if 4-digit number --> assume it's a year --> add/subtract random number btw 1-10
            randomlist = random.sample(range(-10, 10), 3)
            for num in randomlist:
                distractors.append(str(int(word) + num))
            return distractors
        else:
            # else if other number --> add/subtract random number --> don't change +ve or -ve
            randomlist = random.sample(range(-1000, 1000), 3)
            for num in randomlist:
                distractors.append(str(int(word) + num))
            return distractors

    else:
        word = word.lower()
        s2v = Sense2Vec().from_disk('s2v_old')
        distractors = sense2vec_get_words(word, s2v)
        return distractors

def distractor_generator(answer):

    """
    Takes word.phrase as an input and generates similar words/phrases aka distractors

    Parameters
    ----------
    answer : string
        input words/phrases to generate distractors for

    Returns
    -------
    all_distractors : list of strings
        List of distractors for the given input
          
    """
    all_distractors = []
    dis = {}
    for word in answer.split(" "):
        distractor = get_distractors(word)
        dis[word] = distractor
    
    while len(all_distractors) < 3:
        distr = ""
        for word in dis:
            rand_idx = int(random.random() * len(dis[word]))
            distr += dis[word][rand_idx] + " "
        if distr not in all_distractors:
            all_distractors.append(distr[:-1])
    return all_distractors

def MCQ_generator(para, topic , k=5):

    """
    Takes paragraph and it's topic as its input.
    Generates questions, correct and incorrect answers for MCQs

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """
    sents, correct_ans = sent_ans_extractor(para, topic , k=5) # extracts the sentences and keywords
    questions = []
    all_ans = []
    for i,j in zip(sents, correct_ans):
        ques = question_generator(i, j) # generates the question
        questions.append(ques)

        temp = distractor_generator(j) # generates the distractors
        temp.append(j)

        random.shuffle(temp)
        all_ans.append(temp)

        # print('sentence :',i)
        # print('question :',ques)
        # print('all answer :',temp)
        # print('correct answer:',j)
        # print('\n')

    return questions, all_ans, correct_ans

In [53]:
from sentence_transformers import SentenceTransformer, CrossEncoder, util
import torch
from nltk import sent_tokenize
import nltk
import re
import spacy
import pytextrank
from transformers import T5Tokenizer, T5ForConditionalGeneration
from QuestionGenerator import QuestionGenerator
import random
from collections import OrderedDict
from sense2vec import Sense2Vec
import requests
from bs4 import BeautifulSoup
from nltk.corpus import wordnet as wn
import random
import pandas as pd 
nlp = spacy.load("en_core_web_lg")
nlp.add_pipe("textrank")
nltk.download('punkt')

def semanticsearch(para, topic, k=5):

    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """

    # Separates the sentences in the given para
    passage = sent_tokenize(para)

    # Loads the Bi-Encoder Model 
    bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
    bi_encoder.max_seq_length = 256     #Truncate long passages to 256 tokens

    # Loads the Cross-Encoder Reranker
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    # embedding the paragraph and topic
    corpus_embeddings = bi_encoder.encode(passage, convert_to_tensor=True, show_progress_bar=True)
    question_embedding = bi_encoder.encode(topic, convert_to_tensor=True)
    
    # enables gpu if available
    if torch.cuda.is_available():
        question_embedding = question_embedding.cuda()

    # Select 2 * k sentences from the para using the bi-encoder
    hits = util.semantic_search(question_embedding, corpus_embeddings, top_k=k*2)
    hits = hits[0]  

    # Reranks the selected sentences and helps select k sentences
    cross_inp = [[topic, passage[hit['corpus_id']]] for hit in hits]
    cross_scores = cross_encoder.predict(cross_inp)

    for idx in range(len(cross_scores)):
        hits[idx]['cross-score'] = cross_scores[idx]

    results = []
    hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
    for hit in hits[0:min(k,len(passage))]:
        results.append(passage[hit['corpus_id']].lower())
    
    return results



def find_keyphrase(para):

    """
    Selects the best keyword from the text

    Parameters
    ----------
    sent : string
        Sentence

    Returns
    -------
    word : string
        the best keyword from the text
          
    """

    # To avoid alpha and numeric values as the keywords
    doc = nlp(para)
    i = 0
    while i < len(doc._.phrases):
        word = doc._.phrases[0].text
        word = re.sub(r'[^\w\s]', ' ', word)
        temp = re.sub(' ', '', word)
        if temp.isnumeric():
            return word
        elif temp.isalpha():
            return word
        i += 1
    
    return False

def sent_ans_extractor(para, topic , k=5):
    
    """
    Takes paragraph and it's topic as its input.
    Extracts top 5 best sentences best linked to the topic.
    Selects the best keyword from the text

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """
    new_sents = []
    words = []

    sents = semanticsearch(para, topic, k)
    for i in sents:
        word = find_keyphrase(i)
        if word == False:
            continue
        new_sents.append(i)
        words.append(word)
    return new_sents, words

def question_generator(sentence, answer):
    
    """
    Takes the sentence and the answer as the input
    to generate a question

    Parameters
    ----------
    sentece : string
        Text for the sentence
    answer : string
        Text for the answer

    Returns
    -------
    question : string
        Text for the string
          
    """
    device = "cuda" if torch.cuda.is_available() else "cpu"

    my_model_params = { "MODEL_DIR": "./outputs/final/", 
                    "MAX_SOURCE_TEXT_LENGTH": 75
                    } 

    # encode text
    tokenizer = T5Tokenizer.from_pretrained(my_model_params["MODEL_DIR"])
    tokenizer.add_special_tokens({'additional_special_tokens': ['<answer>', '<context>']})

    # using T5 with language model layer
    model = T5ForConditionalGeneration.from_pretrained(my_model_params["MODEL_DIR"])
    model = model.to(device)  
    
    # prepare input
    qg_input = f"<answer> {answer} <context> {sentence}"

    # generate question
    qg =  QuestionGenerator(model, tokenizer, device, max_input_length=75, max_output_length=25)
    question = qg.generate(source_text=qg_input)
    return question


def find_related_word_online(word):

    """
    Takes word/phrase as an input and generates similar words/phrases 
    aka distractors using webscrapping from relatedwords.org website

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for

    Returns
    -------
    words : list of strings
        List of distractors for the given input
          
    """
    r = requests.get("https://relatedwords.org/relatedto/" + word)
    soup = BeautifulSoup(r.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' or install html5lib
    sent = soup.prettify()[soup.prettify().find('"terms"'):]
    words = []
    count = 0
    while count != 20:
        ind1 = sent.find('"word":')+8
        ind2 = sent[ind1:].find('"')+ind1
        words.append(sent[ind1:ind2])
        sent = sent[ind2:]
        count+=1
    return words

def sense2vec_get_words(word,s2v):
    
    """
    Takes word/phrase as an input and generates similar words/phrases 
    aka distractors using sense2vec

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for
    s2v : Module instance from Sense2Vec class
        Module instance from Sense2Vec class to generate distractor

    Returns
    -------
    distractors : list of strings
        List of distractors for the given input
          
    """
    output = []
    word = word.lower()
    word = word.replace(" ", "_")

    sense = s2v.get_best_sense(word)

    if sense == None:
        return find_related_word_online(word)

    most_similar = s2v.most_similar(sense, n=20)
 
    for each_word in most_similar:
        append_word = each_word[0].split("|")[0].replace("_", " ").lower()
        if append_word.lower() != word:
            if sense.split("|")[1] == each_word[0].split("|")[1]:
                output.append(append_word.title().lower())

    out = list(OrderedDict.fromkeys(output))
    return out

def get_distractors(word):
    
    """
    Takes word/phrase as an input and generates similar words/phrases aka distractors

    Parameters
    ----------
    word : string
        input words/phrases to generate distractors for

    Returns
    -------
    distractors : list of strings
        List of distractors for the given input
          
    """
    distractors = []
    if word.isnumeric():
        if len(word) == 4:
            # if 4-digit number --> assume it's a year --> add/subtract random number btw 1-10
            randomlist = random.sample(range(-10, 10), 20)
            for num in randomlist:
                distractors.append(str(int(word) + num))
            return distractors
        else:
            # else if other number --> add/subtract random number --> don't change +ve or -ve
            randomlist = random.sample(range(-1000, 1000), 20)
            for num in randomlist:
                distractors.append(str(int(word) + num))
            return distractors

    else:
        word = word.lower()
        s2v = Sense2Vec().from_disk('s2v_old')
        distractors = sense2vec_get_words(word, s2v)
        return distractors

def distractor_generator(answer):

    """
    Takes word.phrase as an input and generates similar words/phrases aka distractors

    Parameters
    ----------
    answer : string
        input words/phrases to generate distractors for

    Returns
    -------
    all_distractors : list of strings
        List of distractors for the given input
          
    """
    cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

    all_distractors = []
    dis = {}
    for word in answer.split(" "):
        distractor = get_distractors(word)
        dis[word] = distractor
    
    while len(all_distractors) < 20:
        distr = ""
        for word in dis:
            rand_idx = int(random.random() * len(dis[word]))
            distr += dis[word][rand_idx] + " "
        if not distr in all_distractors:
            all_distractors.append(distr[:-1])

    cross_inp = [[answer, all_distractors[i]] for i in range(len(all_distractors))]
    cross_scores = cross_encoder.predict(cross_inp)

    results = []
    for i in sorted(range(len(cross_scores)), key=lambda i: cross_scores[i])[-3:]:
        results.append(all_distractors[i])

    return results

def MCQ_generator(para, topic , k=5):

    """
    Takes paragraph and it's topic as its input.
    Generates questions, correct and incorrect answers for MCQs

    Parameters
    ----------
    para : string
        Text for the pare
    topic : string
        Text for the topic
    k : int
        Number of sentences to be selected 
        (default value is 5)

    Returns
    -------
    data : list of strings
        List of k sentences best linked topic
          
    """
    sents, correct_ans = sent_ans_extractor(para, topic , k=5) # extracts the sentences and keywords
    questions = []
    all_ans = []
    for i,j in zip(sents, correct_ans):
        ques = question_generator(i, j) # generates the question
        questions.append(ques)

        temp = distractor_generator(j) # generates the distractors
        temp.append(j)

        random.shuffle(temp)
        all_ans.append(temp)
        
        print('sentence :',i)
        print('question :',ques)
        print('all answer :',temp)
        print('correct answer:',j)
        print('\n')

    return questions, all_ans, correct_ans

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mihir\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


singing and dancing
lead singer
baby boy

In [54]:
distractor_generator('singing and dancing')


['belting either danced', 'serenaded but dance', 'dancing either dances']

In [55]:
distractor_generator('lead singer')

['leads lead guitarist', 'cause lead vocalist', 'results lead singer']

In [56]:
distractor_generator('baby boy')

['baby boy doll', 'baby boy other boy', 'newborn baby little boy']

In [57]:
para = """Neural networks, also known as artificial neural networks (ANNs) or simulated neural networks (SNNs), are a subset of machine learning and are at the heart of deep learning algorithms. Their name and structure are inspired by the human brain, mimicking the way that biological neurons signal to one another.

Artificial neural networks (ANNs) are comprised of a node layers, containing an input layer, one or more hidden layers, and an output layer. Each node, or artificial neuron, connects to another and has an associated weight and threshold. If the output of any individual node is above the specified threshold value, that node is activated, sending data to the next layer of the network. Otherwise, no data is passed along to the next layer of the network.

Neural networks rely on training data to learn and improve their accuracy over time. However, once these learning algorithms are fine-tuned for accuracy, they are powerful tools in computer science and artificial intelligence, allowing us to classify and cluster data at a high velocity. Tasks in speech recognition or image recognition can take minutes versus hours when compared to the manual identification by human experts. One of the most well-known neural networks is Google’s search algorithm.

Once an input layer is determined, weights are assigned. These weights help determine the importance of any given variable, with larger ones contributing more significantly to the output compared to other inputs. All inputs are then multiplied by their respective weights and then summed. Afterward, the output is passed through an activation function, which determines the output. If that output exceeds a given threshold, it “fires” (or activates) the node, passing data to the next layer in the network. This results in the output of one node becoming in the input of the next node. This process of passing data from one layer to the next layer defines this neural network as a feedforward network.

If we use the activation function from the beginning of this section, we can determine that the output of this node would be 1, since 6 is greater than 0. In this instance, you would go surfing; but if we adjust the weights or the threshold, we can achieve different outcomes from the model. When we observe one decision, like in the above example, we can see how a neural network could make increasingly complex decisions depending on the output of previous decisions or layers.

In the example above, we used perceptrons to illustrate some of the mathematics at play here, but neural networks leverage sigmoid neurons, which are distinguished by having values between 0 and 1. Since neural networks behave similarly to decision trees, cascading data from one node to another, having x values between 0 and 1 will reduce the impact of any given change of a single variable on the output of any given node, and subsequently, the output of the neural network.

Neural networks can be classified into different types, which are used for different purposes. While this isn’t a comprehensive list of types, the below would be representative of the most common types of neural networks that you’ll come across for its common use cases:

The perceptron is the oldest neural network, created by Frank Rosenblatt in 1958.

Feedforward neural networks, or multi-layer perceptrons (MLPs), are what we’ve primarily been focusing on within this article. They are comprised of an input layer, a hidden layer or layers, and an output layer. While these neural networks are also commonly referred to as MLPs, it’s important to note that they are actually comprised of sigmoid neurons, not perceptrons, as most real-world problems are nonlinear. Data usually is fed into these models to train them, and they are the foundation for computer vision, natural language processing, and other neural networks.

Convolutional neural networks (CNNs) are similar to feedforward networks, but they’re usually utilized for image recognition, pattern recognition, and/or computer vision. These networks harness principles from linear algebra, particularly matrix multiplication, to identify patterns within an image.

Recurrent neural networks (RNNs) are identified by their feedback loops. These learning algorithms are primarily leveraged when using time-series data to make predictions about future outcomes, such as stock market predictions or sales forecasting.

Deep Learning and neural networks tend to be used interchangeably in conversation, which can be confusing. As a result, it’s worth noting that the “deep” in deep learning is just referring to the depth of layers in a neural network. A neural network that consists of more than three layers—which would be inclusive of the inputs and the output—can be considered a deep learning algorithm. A neural network that only has two or three layers is just a basic neural network.

"""
title = 'what are neural networks'

In [58]:
MCQ_generator(para, title, k=5)

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

sentence : neural networks, also known as artificial neural networks (anns) or simulated neural networks (snns), are a subset of machine learning and are at the heart of deep learning algorithms.
question : ['is a neural network at the heart of deep learning algorithms?']
all answer : ['deeper programming machine learning', 'deepest learned machine learning', 'deepest learn computations', 'deep learning algorithms']
correct answer: deep learning algorithms


sentence : a neural network that only has two or three layers is just a basic neural network.
question : ['a neural network that has two or three layers is just a basic neural network?']
all answer : ['a neural network', 'that neuronal based network', 'an neuronal new network', 'that neuronal whole network']
correct answer: a neural network


sentence : artificial neural networks (anns) are comprised of a node layers, containing an input layer, one or more hidden layers, and an output layer.
question : ['is a layer with an input la

([['is a neural network at the heart of deep learning algorithms?'],
  ['a neural network that has two or three layers is just a basic neural network?'],
  ['is a layer with an input layer, hidden layers, and an output layer?'],
  ['do neural networks use training data to learn and improve their accuracy over time?'],
  ['are cnns similar to feedforward networks?']],
 [['deeper programming machine learning',
   'deepest learned machine learning',
   'deepest learn computations',
   'deep learning algorithms'],
  ['a neural network',
   'that neuronal based network',
   'an neuronal new network',
   'that neuronal whole network'],
  ['a output signal single layer',
   'an output layer',
   'that input/output solid layer',
   'a input signal layers'],
  ['time', 'time-', 'time time', 'time-'],
  ['main computer clear vision',
   'main computer clear vision',
   'computer vision',
   'windows computer direct vision']],
 ['deep learning algorithms',
  'a neural network',
  'an output layer