In [1]:
# importing required libraries

import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import json
import re
import numpy as np

In [2]:
tokenizer = AutoTokenizer.from_pretrained('facebook/contriever-msmarco')
model = AutoModel.from_pretrained('facebook/contriever-msmarco')

In [3]:
def mean_pooling(token_embeddings, mask):
    token_embeddings = token_embeddings.masked_fill(~mask[..., None].bool(), 0.)
    sentence_embeddings = token_embeddings.sum(dim=1) / mask.sum(dim=1)[..., None]
    return sentence_embeddings

In [4]:
fp = open("paragraphs.json")
para_data = json.load(fp)

fsec = open("sections.json")
section_data = json.load(fsec)

fs = open("sentences.json")
sentence_data = json.load(fs)

In [5]:
saved_section_embeddings = np.load('section_embeddings.npy')
saved_para_embeddings = np.load('paragraph_embeddings.npy')
saved_sentence_embeddings = np.load('sentence_embeddings.npy')

In [6]:
section_tensors = torch.from_numpy(saved_section_embeddings)
para_tensors = torch.from_numpy(saved_para_embeddings)
sentence_tensors = torch.from_numpy(saved_sentence_embeddings)

In [7]:
print(len(section_data))
print(len(para_data))
print(len(sentence_data))

144
2078
4243


In [8]:
# cleaning the data

def clean(text):
    new_text = re.sub('\n', '', text)
    return new_text

Extracting questions from the gpt-3 sections data

In [9]:
# loading the gpt-3 data

fq = open("../data-generator/gpt-3/GPT-3_section_level.json")
gpt3_section_data = json.load(fq)

In [10]:
# create a dictionary with section number as key and question as value

qc_section_dict = {}
for i in range(len(gpt3_section_data)):
    clean_question = clean(gpt3_section_data[i]['questions'][4:])
    qc_section_dict[i] = clean_question

Retrieving most relevant section, paragraph, and sentence from saved embeddings

In [26]:
# retrieves the section with highest dot product with the question

def section_retriever(embedded_question):
    section_scores = {}
    for i in range(len(section_tensors)):
        score = embedded_question[0]@section_tensors[i]
        section_scores[i] = score
    
    highest_score = max(section_scores, key=section_scores.get)
    return highest_score

In [27]:
# retrieves the paragraph with highest dot product with the question

def paragraph_retriever(embedded_question):
    para_scores = {}
    for i in range(len(para_tensors)):
        score = embedded_question[0]@para_tensors[i]
        para_scores[i] = score
    
    highest_score = max(para_scores, key=para_scores.get)
    return highest_score

In [28]:
# retrieves the sentence with highest dot product with the question

def sentence_retriever(embedded_question):
    sentence_scores = {}
    for i in range(len(sentence_tensors)):
        score = embedded_question[0]@sentence_tensors[i]
        sentence_scores[i] = score
    
    highest_score = max(sentence_scores, key=sentence_scores.get)
    return highest_score

In [38]:
# creating a dataframe with highest section/paragraph/sentence level scores for all questions

cols = ['Question', 'GPT-3 Section', 'Section', 'Paragraph', 'Sentence']
score_data = []

for j in qc_section_dict:
    question = qc_section_dict[j]
    
    #embed the question
    tokenized_question = tokenizer(question, padding=True, truncation=True, return_tensors='pt')
    output_question = model(**tokenized_question)
    embeddings_question = mean_pooling(output_question[0], tokenized_question['attention_mask'])
    
    # retrieve section/para/sentence
    section_no = section_retriever(embeddings_question)
    paragraph_no = paragraph_retriever(embeddings_question)
    sentence_no = sentence_retriever(embeddings_question)
    
    # store the question no., question, and scores in a dataframe
    row = [question, j, section_no, paragraph_no, sentence_no]
    score_data.append(row)

score_df = pd.DataFrame(score_data, columns=cols)

In [39]:
score_df

Unnamed: 0,Question,GPT-3 Section,Section,Paragraph,Sentence
0,How does the design process for a digital FSM ...,0,28,371,1
1,Why is it important to design digital systems ...,1,1,845,1883
2,What is a Gray code?,2,2,12,2600
3,How does a three-bit gray code counter work?,3,2,124,225
4,Is it possible to create a counter with fewer ...,4,41,30,59
...,...,...,...,...,...
139,"What is the meaning of the term ""universal com...",139,60,1687,3636
140,What is the overflow condition for unsigned ad...,140,73,1398,3046
141,Why is the converse of an implication not alwa...,141,81,1571,4119
142,-Why is it important to know when an addition ...,142,53,1397,4124


In [40]:
# extracting the section level retrievals not matching with GPT-3 data

sections_not_match = score_df.loc[~(score_df['GPT-3 Section'] == score_df['Section'])]

In [50]:
score_df.loc[0]

Question         How does the design process for a digital FSM ...
GPT-3 Section                                                    0
Section                                                         28
Paragraph                                                      371
Sentence                                                         1
Name: 0, dtype: object

In [57]:
print(score_df.loc[0]['Question'])
print("-------------")
print(gpt3_section_data[score_df.loc[0]['GPT-3 Section']]['positive_ctxs']['text'])
print("-------------")
print("SECTION:", section_data[str(score_df.loc[0]['Section'])])
print("-------------")
print("PARAGRAPH:", para_data[str(score_df.loc[0]['Paragraph'])])
print("-------------")
print("SENTENCE: ", sentence_data[str(score_df.loc[0]['Sentence'])])

How does the design process for a digital FSM work?
-------------
{Finite State Machine Design Examples, Part I}

This set of notes uses a series of examples to illustrate design principles 
for the implementation of finite state machines (FSMs) using digital logic.
We begin with an overview of the design process for a digital FSM, from
the development of an abstract model through the implementation of
functions for the next-state variables and output signals.
Our first few examples cover only the concrete aspects:
we implement several counters, which illustrate the basic 
process of translating a concrete and complete state transition diagram
into an implementation based on flip-flops and logic gates.
We next consider a counter with a number of states that is not a power of
two, with which we illustrate the need for FSM initialization.
 As part of solving the initialization problem, we also introduce 
 a general form of selection logic called a multiplexer.

We then consider the desig

In [59]:
print(score_df.loc[1])
print("-------------")
print(score_df.loc[1]['Question'])
print("-------------")
print(gpt3_section_data[score_df.loc[0]['GPT-3 Section']]['positive_ctxs']['text'])
print("-------------")
print("SECTION:", section_data[str(score_df.loc[1]['Section'])])
print("-------------")
print("PARAGRAPH:", para_data[str(score_df.loc[1]['Paragraph'])])
print("-------------")
print("SENTENCE: ", sentence_data[str(score_df.loc[1]['Sentence'])])

Question         Why is it important to design digital systems ...
GPT-3 Section                                                    1
Section                                                          1
Paragraph                                                      845
Sentence                                                      1883
Name: 1, dtype: object
-------------
Why is it important to design digital systems that are compatible with other digital systems?
-------------
{Finite State Machine Design Examples, Part I}

This set of notes uses a series of examples to illustrate design principles 
for the implementation of finite state machines (FSMs) using digital logic.
We begin with an overview of the design process for a digital FSM, from
the development of an abstract model through the implementation of
functions for the next-state variables and output signals.
Our first few examples cover only the concrete aspects:
we implement several counters, which illustrate the basic 
proces