### Setup and load model

In [None]:
# %cd /content
# !wget https://raw.githubusercontent.com/camenduru/Qwen-VL-Chat-colab/main/app.py -O /content/app.py

!pip install -q tiktoken transformers_stream_generator gradio optimum auto-gptq huggingface_hub
!pip install -q modelscope -f https://pypi.org/project/modelscope

# !python app.py --share

In [2]:
import os
import re
from modelscope import (
    AutoModelForCausalLM, AutoTokenizer, GenerationConfig
)
from huggingface_hub import snapshot_download


model_dir = snapshot_download('4bit/Qwen-VL-Chat-Int4')
tokenizer = AutoTokenizer.from_pretrained(
    model_dir, trust_remote_code=True, resume_download=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    device_map="auto",
    trust_remote_code=True,
    resume_download=True,
).eval()

# Either a local path or an url between <img></img> tags.
image_path = 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg'
response, history = model.chat(tokenizer, query=f'<img>{image_path}</img>What is this?', history=None)
print(response)

2023-10-18 13:08:50,328 - modelscope - INFO - PyTorch version 2.0.1+cu118 Found.
2023-10-18 13:08:50,340 - modelscope - INFO - TensorFlow version 2.13.0 Found.
2023-10-18 13:08:50,349 - modelscope - INFO - Loading ast index from /root/.cache/modelscope/ast_indexer
2023-10-18 13:08:50,350 - modelscope - INFO - No valid ast index found from /root/.cache/modelscope/ast_indexer, generating ast index from prebuilt!
2023-10-18 13:08:50,565 - modelscope - INFO - Loading done! Current index file version is 1.9.2, with md5 58a61c44e558237935774e6c96bc5036 and a total number of 941 components indexed


Fetching 20 files:   0%|          | 0/20 [00:00<?, ?it/s]

Downloading (…)3b43f7a8/config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Downloading (…)3f7a8/.gitattributes:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading (…)onfiguration_qwen.py:   0%|          | 0.00/2.09k [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/221 [00:00<?, ?B/s]

Downloading (…)bb3b43f7a8/README.md:   0%|          | 0.00/23.6k [00:00<?, ?B/s]

Downloading (…)7a8/modeling_qwen.py:   0%|          | 0.00/44.2k [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/126k [00:00<?, ?B/s]

Downloading (…)of-00005.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)of-00005.safetensors:   0%|          | 0.00/1.98G [00:00<?, ?B/s]

Downloading (…)quantize_config.json:   0%|          | 0.00/214 [00:00<?, ?B/s]

Downloading (…)43f7a8/qwen.tiktoken:   0%|          | 0.00/2.56M [00:00<?, ?B/s]

Downloading SimSun.ttf:   0%|          | 0.00/10.5M [00:00<?, ?B/s]

Downloading (…)of-00005.safetensors:   0%|          | 0.00/2.00G [00:00<?, ?B/s]

Downloading (…)_generation_utils.py:   0%|          | 0.00/14.9k [00:00<?, ?B/s]

Downloading (…)of-00005.safetensors:   0%|          | 0.00/1.78G [00:00<?, ?B/s]

Downloading (…)of-00005.safetensors:   0%|          | 0.00/1.99G [00:00<?, ?B/s]

Downloading (…)7a8/requirements.txt:   0%|          | 0.00/133 [00:00<?, ?B/s]

Downloading (…)tokenization_qwen.py:   0%|          | 0.00/21.3k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

Downloading (…)bb3b43f7a8/visual.py:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

At the beach, a woman is sitting down and looking at her cell phone. A dog is sitting nearby, both appearing to be enjoying their time together.


### Load image URLs & question lists

In [4]:
# Open the text file for image url reading
file_path = 'URL.txt'
with open(file_path, 'r') as file:
    image_urls_list = eval(file.read())

print(image_urls_list[:3])

['https://media.istockphoto.com/id/1464971485/photo/vertical-top-view-of-small-cakes-served-on-decorated-plates-on-a-minty-background.jpg?s=612x612&w=0&k=20&c=Ne9VmrrRzjDHW3lmoSELFVrQMYLpiPWbE0V3N19Ox3E=', 'https://media.istockphoto.com/id/1457008492/photo/recipe-of-cod-steak-mashed-potatoes-and-its-farandole-of-vegetables-parsnip-turnip-broccoli.jpg?s=612x612&w=0&k=20&c=4TiJBmdho4Ux6Q50B7zwCYQqo-Y1bfj7WmzydU4YE_Q=', 'https://media.istockphoto.com/id/1327914654/photo/cold-pink-dragon-fruit-salad-with-root-paste-appetiser-on-wood-table-fine-dining-western-menu.jpg?s=612x612&w=0&k=20&c=eMdBxmjxg25R4xckAYTXeYVOJ2rfuIXYN9K-A7IINBQ=']


In [16]:
problem_list_basic = ["What is this food?",
"What is the name of this dish?",
"Please identify the ingredients of this food.",
"Which country's traditional cuisine is this?",
"Where is this food most popular?"]

problem_list_advanced = [
"Can you recommend similar recipes?",
"Can I know the calories of this dish?",
"What are the main nutritional components of this food?",
"How many calories are in this food?",
"Are there any special cooking methods for this food?",
"Can you provide me with health information about this food?",
"Is there an interesting story behind the history of this dish?",
"How is this food different from similar foods?",
"Could you tell me what this food tastes like?",
"Are there any recommended beverages to pair with this dish?",
"During which meal is this food most commonly served?",
"Are there any interesting customs or traditions associated with this food?",
"Are there any famous variations or adaptations of this dish?",
"Do you have any interesting stories or legends related to this food?",
"Can you tell me how to prepare this food?"]
import random

num_to_select = 2
random_selection = random.sample(problem_list_basic, 1)
random_selection +=random.sample(problem_list_advanced, num_to_select)
print(random_selection)

["Which country's traditional cuisine is this?", 'Are there any special cooking methods for this food?', 'Are there any famous variations or adaptations of this dish?']


### use model to generate answer and compile into chatML for each session

In [20]:
history_conv = []
for id,img_url in enumerate(image_urls_list):
  record = {"id": "identity_"+str(id), "conversations": []}
  rand_qns_selected = random.sample(problem_list_basic, 1) + random.sample(problem_list_advanced, num_to_select)
  query_0,query_1,query_2 = f'<img>{img_url}</img>'+rand_qns_selected[0], rand_qns_selected[1],rand_qns_selected[2]
  record["conversations"].append({"from": "user","value": query_0})
  response, history = model.chat(tokenizer, query=query_0, history=None)
  record["conversations"].append({"from": "assistant","value": response})
  record["conversations"].append({"from": "user","value": query_1})
  response, history = model.chat(tokenizer, query=query_1, history=history)
  record["conversations"].append({"from": "assistant","value": response})
  record["conversations"].append({"from": "user","value": query_2})
  response, history = model.chat(tokenizer, query=query_2, history=history)
  record["conversations"].append({"from": "assistant","value": response})
  print(record)
  history_conv.append(record)


{'id': 'identity_0', 'converstions': [{'from': 'user', 'value': '<img>https://media.istockphoto.com/id/1464971485/photo/vertical-top-view-of-small-cakes-served-on-decorated-plates-on-a-minty-background.jpg?s=612x612&w=0&k=20&c=Ne9VmrrRzjDHW3lmoSELFVrQMYLpiPWbE0V3N19Ox3E=</img>What is this food?'}, {'from': 'assistant', 'value': 'This is desserts.'}, {'from': 'user', 'value': 'Are there any special cooking methods for this food?'}, {'from': 'assistant', 'value': 'Based on the image, there are no special cooking methods shown. However, the presentation of the desserts on small plates suggests that they may be served as individual portions or for tasting.'}, {'from': 'user', 'value': 'Can you provide me with health information about this food?'}, {'from': 'assistant', 'value': 'Desserts, especially those with chocolate and other sweet ingredients, can be high in calories and sugar. While they can be enjoyed in moderation as a treat, it is important to maintain a balanced diet that include

In [27]:
# len(history_conv)

121

In [25]:
import json
chatml_json = json.dumps(history_conv, ensure_ascii=False, indent=2)

# Save to a file
with open("chatml_training_data.json", "w", encoding="utf-8") as file:
    file.write(chatml_json)