Skip to content

Commit

Permalink
Fix doctest for Blip2ForConditionalGeneration (huggingface#26737)
Browse files Browse the repository at this point in the history
* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

* fix

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
  • Loading branch information
2 people authored and blbadger committed Nov 8, 2023
1 parent 0e03516 commit 67611ac
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 47 deletions.
61 changes: 14 additions & 47 deletions src/transformers/models/blip_2/modeling_blip_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,14 +1272,10 @@ def get_text_features(
>>> import torch
>>> from transformers import AutoTokenizer, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> tokenizer = AutoTokenizer.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt").to(device)
>>> inputs = tokenizer(["a photo of a cat"], padding=True, return_tensors="pt")
>>> text_features = model.get_text_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
Expand Down Expand Up @@ -1333,16 +1329,12 @@ def get_image_features(
>>> import requests
>>> from transformers import AutoProcessor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> inputs = processor(images=image, return_tensors="pt")
>>> image_outputs = model.get_image_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
Expand Down Expand Up @@ -1381,15 +1373,12 @@ def get_qformer_features(
>>> import requests
>>> from transformers import Blip2Processor, Blip2Model
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)
>>> model.to(device) # doctest: +IGNORE_RESULT
>>> model = Blip2Model.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> inputs = processor(images=image, return_tensors="pt")
>>> qformer_outputs = model.get_qformer_features(**inputs)
```"""
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
Expand Down Expand Up @@ -1654,7 +1643,7 @@ def forward(
Examples:
Image captioning (without providing a text prompt):
Prepare processor, model and image input
```python
>>> from PIL import Image
Expand All @@ -1666,13 +1655,16 @@ def forward(
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16
... )
>>> model.to(device) # doctest: +IGNORE_RESULT
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16
... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
```
Image captioning (without providing a text prompt):
```python
>>> inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
>>> generated_ids = model.generate(**inputs)
Expand All @@ -1684,21 +1676,6 @@ def forward(
Visual question answering (prompt = question):
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> device = "cuda" if torch.cuda.is_available() else "cpu"
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.float16
... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.float16)
Expand All @@ -1712,20 +1689,10 @@ def forward(
This greatly reduces the amount of memory used by the model while maintaining the same performance.
```python
>>> from PIL import Image
>>> import requests
>>> from transformers import Blip2Processor, Blip2ForConditionalGeneration
>>> import torch
>>> processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xl")
>>> model = Blip2ForConditionalGeneration.from_pretrained(
... "Salesforce/blip2-flan-t5-xl", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
... "Salesforce/blip2-opt-2.7b", load_in_8bit=True, device_map={"": 0}, torch_dtype=torch.bfloat16
... ) # doctest: +IGNORE_RESULT
>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)
>>> prompt = "Question: how many cats are there? Answer:"
>>> inputs = processor(images=image, text=prompt, return_tensors="pt").to(device="cuda", dtype=torch.bfloat16)
>>> generated_ids = model.generate(**inputs)
Expand Down
1 change: 1 addition & 0 deletions utils/slow_documentation_tests.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
docs/source/en/generation_strategies.md
docs/source/en/model_doc/ctrl.md
docs/source/en/task_summary.md
src/transformers/models/blip_2/modeling_blip_2.py
src/transformers/models/ctrl/modeling_ctrl.py

0 comments on commit 67611ac

Please sign in to comment.