In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install langchain -q
!pip install transformers huggingface-hub -q
!pip install -q bitsandbytes accelerate
!pip install xformers
!pip install sentence_transformers -q
!pip install chromadb -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.0/90.0 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m72.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import os
import torch
import torch.nn as nn
import bitsandbytes as bnb

from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
from transformers import pipeline

from langchain import HuggingFacePipeline
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

In [4]:
def load_model_in_hf(model_id):
  model = AutoModelForCausalLM.from_pretrained(
      model_id,
      torch_dtype=torch.float16,
      load_in_8bit=True,
      device_map='auto',
  )
  tokenizer = AutoTokenizer.from_pretrained(model_id)

  return model, tokenizer

def const_pipelin_with_options(model_load, tokenizer, top_k=10, max_length=1000):
  pipeline_load = pipeline(
    "text-generation", #task
    model=model_load,
    tokenizer=tokenizer,
    top_k=top_k,
    max_length = max_length,
    do_sample=True,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
  )
  return pipeline_load

def const_llm_for_langchain(pipeline, temperature=0):
  llm = HuggingFacePipeline(
    pipeline=pipeline,
    model_kwargs={"temperature" : temperature}
  )
  return llm

In [5]:
model_id="daryl149/llama-2-13b-chat-hf"
model, tokenizer = load_model_in_hf(model_id)
pipeline = const_pipelin_with_options(model, tokenizer)
llm = const_llm_for_langchain(pipeline)

Downloading (…)lve/main/config.json:   0%|          | 0.00/507 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/33.4k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00003.bin:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00003.bin:   0%|          | 0.00/9.90G [00:00<?, ?B/s]

Downloading (…)l-00003-of-00003.bin:   0%|          | 0.00/6.18G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/137 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/727 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [6]:
def const_chunk_doc(path, tokenizer, chunk_size=100, chunk_overlap=10, encoding_opt='UTF-8'):
  loader = TextLoader(path, encoding = encoding_opt)
  documents = loader.load()
  text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator="\n")
  chunk_documents = text_splitter.split_documents(documents)
  return chunk_documents

def const_retriever(chunk_documents, embeddings):
  vectordb = Chroma.from_documents(documents=chunk_documents, embedding=embeddings)
  doc_retriever = vectordb.as_retriever()
  return doc_retriever


In [7]:
embeddings = HuggingFaceEmbeddings()

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [8]:
pdf_path = './drive/MyDrive/cvpr_papers/text/part1'
pdf_names = os.listdir(pdf_path)
pdf_whole_path = os.path.join(pdf_path, pdf_names[5])
print(pdf_whole_path)

./drive/MyDrive/cvpr_papers/text/part1/Huang_Contrastive_Semi-Supervised_Learning_for_Underwater_Image_Restoration_via_Reliable_Bank_CVPR_2023_paper.txt


In [9]:
chunk_paper = const_chunk_doc(pdf_whole_path, tokenizer)
doc_retriever = const_retriever(chunk_paper, embeddings)
paper_qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=doc_retriever)

In [13]:
response = paper_qa.run("What analysis was performed and what were the results more detail?")

In [14]:
response

" Based on the given table and text, it appears that the authors performed an analysis of the influence of data augmentation on the performance of their method. The results show that using data augmentation improves the performance of the method, and that using a mixture of different augmentation strategies achieves the best results. Specifically, the table shows that the method using data augmentation outperforms the baseline method in terms of UIQM, UCIQE, and MUSIQ. Additionally, the authors mention that the best results are obtained when using a mixture of the three augmentation strategies.\n\nUnhelpful Answer: I don't know. The text doesn't mention anything about an analysis of data augmentation.\n\nPlease note that, based on the text, the question is asking for a more detailed explanation of the analysis of data augmentation, and not just a summary of the results."

In [14]:
response = paper_qa.run("What analysis was performed and what were the results more detail?")

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


In [17]:
response

" We performed the following analyses on the provided results and found the following:\n* We computed the correlation between the teacher and student models using the mean squared error (MSE) between the two. We found that the correlation was 0.73, indicating a strong positive correlation between the two.\n* We computed the correlation between the teacher and student models using the structural similarity index (SSIM). We found that the correlation was 0.80, indicating a strong positive correlation between the two.\n* We computed the correlation between the student and the reference models using MSE. We found that the correlation was 0.75, indicating a moderate positive correlation between the two.\n* We computed the correlation between the student and the reference models using SSIM. We found that the correlation was 0.83, indicating a strong positive correlation between the two.\n* We computed the correlation between the student and the reference models using UIQM. We found that the 

In [14]:
paper_qa.run("In this document, could you please provide an explanation within 1000 characters of what is the Research Question or Hypothesis?")

' Sure!\nThe research question or hypothesis of this document can be summarized as follows:\n"What is the most reliable and robust way to evaluate the quality of a video denoising model? And which evaluation metric is most accurate in measuring the real-world performance of the model?"\nIn other words, the document aims to investigate the influence of using different evaluation metrics on the performance of a video denoising model, and to find the best combination of evaluation metrics that can provide a reliable and robust measure of the model\'s quality.'

In [10]:
paper_qa.run("In this document, What is the Research Question or Hypothesis?")

OutOfMemoryError: ignored

In [15]:
paper_qa.run("In this document, What is the Research Question or Hypothesis?")

'\nThe research question or hypothesis of this document is:\n"How to improve the performance of Deep Video Quality Assessment (DVQA) using Non-Reference metrics?"\n\nExplanation: The document presents a research study aimed at improving the performance of Deep Video Quality Assessment (DVQA) using Non-Reference metrics. The authors investigate the influence of different NR-IQA approaches, data augmentations, and NR-IQA metrics on the performance of DVQA. They evaluate their findings by comparing the performance of different methods on non-reference benchmarks. Therefore, the research question or hypothesis of this document is how to improve the performance of DVQA using Non-Reference metrics.'

In [17]:
paper_qa.run("In this document, What is the Methodology Used?")

'\nThe methodology used in the document is based on the following components:\n\n1. Evaluation metrics: MUSIQ, UIQM, UCIQE\n2. Data augmentation: mixture of the three strong data augmentation strategies\n3. NR-IQA approaches: MUSIQ, NIMA, PAQ2PIQ.\n\n\nNote: The answer provided is based on the context provided in the question. If the context is incomplete or unclear, the answer may not be accurate.'

In [19]:
paper_qa.run("Could you summarize this document for me?")

" Certainly! Based on the provided text, here's a summary:\nThe document discusses the improvement of non-reference image quality assessment (NR-IQA) using a teacher-student network. The proposed method uses a reliable teacher model to provide guidance to a student model, improving the quality of the student model's predictions. The document presents the results of using different NR-IQA approaches, including MUSIQ, and compares their performance on a labeled dataset. The results show that MUSIQ provides the best performance, and using PAQ2PIQ is better than using NIMA. The document also evaluates the influence of adopting different NR-IQA metrics on the performance of the method.\nI hope this helps! Let me know if you have any further questions."

In [12]:
paper_qa.run("In this document, What is the significant result?")

' There is no significant result in this document because the numbers are based on experiments with different methods and do not have any significant results.'

In [10]:
response = paper_qa.run("What analysis was performed and what were the results more detail?")



In [11]:
response

' The analysis was performed comparing the results of using different data augmentation strategies and evaluating their performance based on SSIM, MUSIQ, and UIQM. The results showed that adopting any of the data augmentation strategies resulted in improved performance compared to the baseline solution. Additionally, using a mixture of the three strategies achieved the best performance. The results are presented in Table 5 and the details are discussed in the text.'

In [28]:
response

" The paper performed several detailed analyses to answer this question, including:\n1. A detailed analysis of the teacher and student models was performed to see how they compared in terms of performance and structure.\n2. To investigate the influence of non-reference metrics, the paper evaluated the performance of different NR-IQA approaches on the labeled dataset and showed the final performance of using different metrics.\n3. The paper also evaluated the influence of data augmentation on the performance of the method.\n4. To further illustrate that the teacher's outputs can be used to train the student network, the paper provided some intermediate results during training.\n\nThe results of these analyses showed that the method outperformed the compared method in terms of MUSIQ, and competitive performance in terms of UIQM and UCIQE. However, the performance in MUSIQ is also for a reference, and the quantitative results might be insufficient to indicate the quality of restored under

In [None]:
cvpr2023_qa.run("What is the significant result in this document?")

In [75]:
cvpr2023_qa.run("What is the main method in this document?")

' The main method in this document is the MUSIQ method for NR-IQA.'

In [58]:
cvpr2023_qa.run("What is the significant result the Minimizing_the_Accumulated_Trajectory_Error_To_Improve_Dataset_Distillation of the cvpr's paper for me?")

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


' Thank you for sharing the paper! The significant result you mentioned is indeed a key finding of the authors. They demonstrate that by minimizing the accumulated trajectory error, they can improve the dataset distillation process, leading to better generalization performance on downstream tasks.\nTo provide more context, the authors propose a technique called LASP (Leveraging Attention-aware Sampling for Probing), which modifies the Text-to-Text loss function to focus on the most informative samples in the minibatch. They show that this leads to better performance on several benchmark datasets, including ImageNet and CIFAR-10.\nThe key insight is that the traditional Text-to-Text loss function is sensitive to the sampling strategy used to generate the probe set, which can lead to poor generalization performance. By minimizing the accumulated trajectory error, LASP encourages the probe generator to produce a more diverse and informative set of samples, which in turn leads to better pe

In [17]:
cvpr2023_qa.run("What social, economic, or policy implications can be derived from the research findings?")

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (2048). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


' The findings of this research might have a range of social, economic, and policy implications. The study of semi-supervised learning techniques for underwater image restoration has the potential to enhance the accuracy and effectiveness of this area of research. This might have a number of practical applications, such as improving the quality of underwater images for use in scientific research, environmental monitoring, or underwater exploration. In addition, the study of contrastive loss might have an impact on other areas of machine learning, such as domain adaptation and self-supervised learning.\nThe research may also have implications for the ethical use of AI and machine learning. For instance, the development of semi-supervised learning algorithms can help to address the problem of data scarcity in underwater environments, which can hinder the development of AI and machine learning applications. Moreover, the adoption of contrastive loss can help to ensure that machine learnin

In [6]:
from langchain.document_loaders import BSHTMLLoader, DirectoryLoader
cvpr_path = "./drive/MyDrive/cvpr_papers/text/part1"
dir_loader = DirectoryLoader(cvpr_path, loader_cls=TextLoader)
cvpr_data = dir_loader.load()

FileNotFoundError: ignored

In [25]:
len("Yes, the study provides several new insights and theoretical contributions. First, the study proposes a novel score-based generative framework called GFPose to model plausible 3D human poses via denoising score matching. GFPose is designed to incorporate pose priors in gradients and unify various discriminative and generative tasks in an elegant framework. This is a new and effective solution to the fundamental problem of modeling 3D human pose prior. Second, the study demonstrates that GFPose outperforms existing state-of-the-art methods in various downstream tasks such as pose estimation, pose denoising, pose completion, and pose generation. This provides new insights into the effectiveness and generalizability of GFPose under different problem settings. Third, the study provides insights into the design factors of GFPose in the 3D human pose estimation task, which can be useful for future research in this area. Overall, the study provides several new insights and theoretical contributions to the field of computer vision and human-centered applications.")

1071