In [1]:
import os
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import CTransformers
import sys
import datetime
from embed import Embeddor
# from awq import AutoAWQForCausalLM
# from transformers import AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open("D:\Junior23\Datasets\MIT Lectures\MIT_courses_chapters\Introduction to Computer Science and Programming in Python_chapters\\video_1\\full_transcript.txt", "r", encoding="utf-8") as file:
    transcript = file.read()

user_input = transcript #input("Enter your text: ")

with open("user_input.txt", "w", encoding="utf-8") as file:
    file.write(user_input)


loader = TextLoader("user_input.txt")
documents=loader.load()

#### Chunking

In [3]:
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,
                                             chunk_overlap=20)
text_chunks=text_splitter.split_documents(documents)

list_of_sentences = [text_chunks[x].page_content for x in range(len(text_chunks))]

question="Summarize the transcript in a detailed way."

#### Embedding

In [9]:
embedder = Embeddor()
res, vector_embedding = embedder.get_top_k_from_texts(question, list_of_sentences)
# print([text_chunks[i] for i in res])

embeddings=HuggingFaceEmbeddings(model_name='sentence-transformers/msmarco-distilbert-base-tas-b', model_kwargs={'device':'cpu'})

results_sentences = [text_chunks[i] for i in res]
print(results_sentences)
vector_store=FAISS.from_documents(text_chunks, embeddings)

vector_store.as_retriever(search_kwargs={'k': 2})
# print(vector_store.as_retriever(question))





Query: Summarize the transcript in a detailed way.

Top 5 most similar sentences in corpus:
(Score: 86.6734)
(Score: 86.4912)
(Score: 86.1197)
(Score: 85.4176)
(Score: 85.3636)
[Document(page_content="modular, easy to understand. And not only that, not\nonly will your code be read by other people,\nbut next year, maybe, you'll take another\ncourse, and you'll want to look back at\nsome of the problems that you wrote in this class. You want to be able\nto reread your code. If it's a big mess, you might\nnot be able to understand-- or reunderstand--\nwhat you were doing. So writing readable\ncode and organizing code is also a big part. And the last section is going", metadata={'source': 'user_input.txt'}), Document(page_content="flow to programs. That's what the second\nline is going to be about. The second big\npart of this course is a little bit more\nabstract, and it deals with how do you write\ngood code, good style, code that's readable. When you write code, you\nwant to write i

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain.vectorstores.faiss.FAISS object at 0x0000018888B639A0>, search_kwargs={'k': 2})

In [5]:
template="""
[INST] <<SYS>>
Use the following pieces of information to answer the user's question. If you don't know the answer just say you don't know, don't try to make up an answer.
<</SYS>>
Context:{context}
Question:{question}[/INST]
Only return the helpful answer below and nothing else
Helpful answer
"""

qa_prompt=PromptTemplate(template=template, input_variables=['context', 'question'])

### LLAMA2 models

In [70]:
llm=CTransformers(model="QA_models/llama-2-7b-chat.gguf.q4_K_M.bin",
                  model_type="llama",
                  config={'max_new_tokens': 100, 'temperature':0,'context_length':512, 'gpu_layers': 10})
print("model downloaded")

model downloaded


In [71]:
chain = RetrievalQA.from_chain_type(llm=llm,
                                   chain_type='stuff',
                                   retriever=vector_store.as_retriever(search_kwargs={'k': 2}),
                                   return_source_documents=True,
                                   chain_type_kwargs={'prompt': qa_prompt})

result=chain({'query':question})
print(result['result'])

The user is asking about how to compare programs in Python, specifically how to determine which program is better. The transcript provides information on three different sections related to learning how to program in Python:

1. Representing knowledge with data structures: This section focuses on understanding how to use data structures to represent knowledge in a program.
2. Learning how to program: This section covers the basics of programming, including creating objects and writing programs that are not linear.
3. Compar


### Mistral models

In [64]:
llm=CTransformers(model="QA_models/openinstruct-mistral-7b.Q4_K_M.gguf",
                  model_type="mistral",
                  config={'max_new_tokens': 100, 'temperature':0,'context_length':512, 'gpu_layers': 10})
print("model downloaded")

model downloaded


In [69]:
chain = RetrievalQA.from_chain_type(llm=llm,
                                   chain_type='stuff',
                                   retriever=vector_store.as_retriever(search_kwargs={'k': 2}),
                                   return_source_documents=True,
                                   chain_type_kwargs={'prompt': qa_prompt})

result=chain({'query':question})
print(result['result'])

The first two are actually part of the programming in Introduction to Programming and Computer Science in Python. And the last one deals mostly with the computer science part in Introduction to Programming and Computer Science in Python. We're going to talk about, once you have learned how to write programs in Python, how do you compare programs in Python? How do you know that one program is better than the other? How do you know

three different sections. The first one is related to these first


### Ctranslate 2

In [28]:
input_prompt = ' '.join([r.page_content for r in results_sentences])
input_prompt

"modular, easy to understand. And not only that, not\nonly will your code be read by other people,\nbut next year, maybe, you'll take another\ncourse, and you'll want to look back at\nsome of the problems that you wrote in this class. You want to be able\nto reread your code. If it's a big mess, you might\nnot be able to understand-- or reunderstand--\nwhat you were doing. So writing readable\ncode and organizing code is also a big part. And the last section is going flow to programs. That's what the second\nline is going to be about. The second big\npart of this course is a little bit more\nabstract, and it deals with how do you write\ngood code, good style, code that's readable. When you write code, you\nwant to write it such that-- you're in big company,\nother people will read it, other people will\nuse it, so it has to be readable and\nunderstandable by others. To that end, you\nneed to write code that's well organized, three different sections. The first one is related to\nthese 

In [11]:
template=f"""
[INST] <<SYS>>
Use the following pieces of information to answer the user's question. If you don't know the answer just say you don't know, don't try to make up an answer.
<</SYS>>
Context:{input_prompt}
Question:{question}[/INST]
Only return the helpful answer below and nothing else
Helpful answer
"""


In [29]:
template=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nContext:{input_prompt}
Question:{question}\n\n### Response:"""

In [40]:
import os
os.environ['TRANSFORMERS_CACHE'] = 'E:/cache/transformers'

In [1]:
# from transformers import AutoTokenizer
model_name = "michaelfeil/ct2fast-open-llama-7b-open-instruct"
from transformers import AutoTokenizer
#tokenizer = AutoTokenizer.from_pretrained("michaelfeil/ct2fast-open-llama-7b-open-instruct", use_fast=False)
tokenizer=AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf",use_fast=False)
from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
model = GeneratorCT2fromHfHub(
        # load in int8 on CUDA
        model_name_or_path=model_name,
        device="cpu",
        compute_type='int8', #"int8_float16",
        hub_kwargs={'cache_dir': 'E:/cache/transformers'},
        tokenizer=tokenizer
        # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
)


  from .autonotebook import tqdm as notebook_tqdm
Downloading tokenizer_config.json: 100%|██████████| 749/749 [00:00<00:00, 752kB/s]
Downloading config.json: 100%|██████████| 811/811 [00:00<00:00, 812kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading tokenizer.json: 100%|██████████| 1.84M/1.84M [00:00<00:00, 4.07MB/s]
Downloading vocabulary.txt: 100%|██████████| 243k/243k [00:00<00:00, 571kB/s]
Downloading model.bin:  28%|██▊       | 1.90G/6.74G [03:36<08:43, 9.27MB/s]

In [45]:
outputs = model.generate(
    text=["Give me a story in which a person is trying to find a job."],
    max_length=64,
    include_prompt_in_result=False
)

outputs

['\n']

In [None]:
model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"

# Load model
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
                                          trust_remote_code=False, safetensors=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)

# prompt = "Tell me about AI"
# prompt_template=f'''{prompt}

# '''

print("\n\n*** Generate:")

tokens = tokenizer(
    qa_prompt,
    return_tensors='pt'
).input_ids.cuda()

# Generate output
generation_output = model.generate(
    tokens,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    max_new_tokens=512
)

print("Output: ", tokenizer.decode(generation_output[0]))