# Tutorial for Bllossom on Gradio!

## 01. Import all packages

In [None]:
!pip install -q gradio
!pip install torch transformers accelerate
!pip install pdf2image

In [7]:
import gradio as gr
import os
import torch
from transformers import AutoProcessor, MllamaForConditionalGeneration, MllamaProcessor
from PIL import Image

## 02. Load your model

In [8]:
# Determine the device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [9]:
# Load the model and processor
# model_name = """Bllossom/llama-3.2-Korean-Bllossom-AICA-5B"""
# model = MllamaForConditionalGeneration.from_pretrained(
#     model_name,
#     torch_dtype=torch.bfloat16,
#     device_map='cuda:0'
# )

# from accelerate import init_empty_weights, load_checkpoint_and_dispatch

# # Load the model with accelerate for efficient device placement
# model_name = """Bllossom/llama-3.2-Korean-Bllossom-AICA-5B"""
# with init_empty_weights():
#     model = MllamaForConditionalGeneration.from_pretrained(
#         model_name,
#         torch_dtype=torch.bfloat16
#     )
# model = load_checkpoint_and_dispatch(model, model_name, device_map=device)
model = MllamaForConditionalGeneration.from_pretrained(
  'Bllossom/llama-3.2-Korean-Bllossom-AICA-5B',
  torch_dtype=torch.bfloat16,
  device_map='auto'
)


Loading checkpoint shards: 100%|██████████| 3/3 [00:05<00:00,  1.71s/it]
Some parameters are on the meta device because they were offloaded to the disk.


In [10]:
processor = MllamaProcessor.from_pretrained('Bllossom/llama-3.2-Korean-Bllossom-AICA-5B')

In [2]:
import fitz  # PyMuPDF
from PIL import Image
import io

def pdf_to_images_with_pymupdf(pdf_path):
    pdf_document = fitz.open(pdf_path)
    images = []
    
    for page_num in range(pdf_document.page_count):
        page = pdf_document[page_num]
        # 페이지를 이미지로 렌더링
        pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 2x 해상도
        img_data = pix.tobytes("png")
        img = Image.open(io.BytesIO(img_data))
        images.append(img)
    
    pdf_document.close()
    return images

In [3]:
pdf_path = "/Users/aohus/Workspaces/github/politics/etl/data/document/pdf/2200001_교통약자의_이동편의_증진법_전부개정법률안.pdf"
images = pdf_to_images_with_pymupdf(pdf_path)

In [11]:
messages = [
  {'role': 'user','content': [
    {'type':'image'},
    {'type': 'text','text': '이 문서를 마크다운으로 바꿔줘'}
    ]},
  ]

input_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [12]:
# Move the model to the appropriate device (GPU if available)
# model.to(device)
# processor = AutoProcessor.from_pretrained(model_name)
# VRAM을 많이 먹을 경우 아래 코드 실행
model.eval()

MllamaForConditionalGeneration(
  (model): MllamaModel(
    (vision_model): MllamaVisionModel(
      (patch_embedding): Conv2d(3, 1280, kernel_size=(14, 14), stride=(14, 14), padding=valid, bias=False)
      (gated_positional_embedding): MllamaPrecomputedPositionEmbedding(
        (tile_embedding): Embedding(9, 8197120)
      )
      (pre_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
        (embedding): Embedding(9, 5120)
      )
      (post_tile_positional_embedding): MllamaPrecomputedAspectRatioEmbedding(
        (embedding): Embedding(9, 5120)
      )
      (layernorm_pre): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
      (layernorm_post): LayerNorm((1280,), eps=1e-05, elementwise_affine=True)
      (transformer): MllamaVisionEncoder(
        (layers): ModuleList(
          (0-31): 32 x MllamaVisionEncoderLayer(
            (self_attn): MllamaVisionAttention(
              (q_proj): Linear(in_features=1280, out_features=1280, bias=False)
           

In [13]:
inputs = processor(
    images[1],
    input_text,
    add_special_tokens=False,
    return_tensors="pt",
).to(model.device)

In [14]:
output = model.generate(
    **inputs, 
    max_new_tokens=1024,
    temperature=0.1,
    eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'),
    use_cache=False
)
print(processor.decode(output[0]))

KeyboardInterrupt: 

In [15]:
def predict(image=None, text=None):
  torch.cuda.empty_cache()
  with torch.no_grad():
      if image is not None:
        w,h = image.size
        if w < 500 or h < 500:
          image = image.resize((w*2,h*2))

      # LLM으로 사용할 때
      if image is None:
          messages = [
              {"role": "user", "content": [
                  {"type": "text", "text": text}  # Add the user-provided text input
              ]}
          ]
          inputs = processor.tokenizer.apply_chat_template(messages, add_generation_prompt=True,return_tensors='pt').to(model.device)
          outputs = model.generate(inputs, max_new_tokens=512,temperature=0.1,eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'))

      # VLM으로 사용할 때
      else:
          # Prepare the input messages
          messages = [
              {"role": "user", "content": [
                  {"type": "image"},  # Specify that an image is provided
                  {"type": "text", "text": text}  # Add the user-provided text input
              ]}
          ]
          # Create the input text using the processor's chat template
          input_text = processor.apply_chat_template(messages, add_generation_prompt=True)

          # Process the inputs and move to the appropriate device
          inputs = processor(image, input_text,add_special_tokens=False, return_tensors="pt").to(device)


      # Generate a response from the model
          with torch.cuda.amp.autocast():
            outputs = model.generate(
                **inputs,
                max_new_tokens=512,
                use_cache=True,
                temperature=0.1,
                eos_token_id=processor.tokenizer.convert_tokens_to_ids('<|eot_id|>'),
                )

      # Decode the output to return the final response
      response = processor.decode(outputs[0])
      response = response[
          response.rindex('<|start_header_id|>assistant<|end_header_id|>\n\n')+\
          len('<|start_header_id|>assistant<|end_header_id|>\n\n'):].replace('<|eot_id|>','')

      return response

In [18]:
# Define the Gradio interface
interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Image(type="pil", label="Image Input"),  # Image input with label
        gr.Textbox(label="Text Input")  # Textbox input with label
    ],
    outputs=gr.Textbox(label="Generated Response"),  # Output with a more descriptive label
    title="Bllossom-AICA Demo",  # Title of the interface
    description="This demo uses Bllossom-AICA model to generate responses based on an image and text input.",  # Short description
    theme="compact",  # Using a compact theme for a cleaner look,
    batch=False,
)

# Launch the interface
interface.launch(debug=True, share=True)


Sorry, we can't find the page you are looking for.


* Running on local URL:  http://127.0.0.1:7860


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


* Running on public URL: https://7795ee9e516a9b8045.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  with torch.cuda.amp.autocast():


Created dataset file at: .gradio/flagged/dataset1.csv
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://7795ee9e516a9b8045.gradio.live




# 관세청, 외국산 물품을 국산으로 숙여 조달 납품하는 부정행위 집중단속

- **조달청, 산업부**과 협업하여 입수한 자료를 통한 분석을 통해 기획조사 지속
- 조달 계약자료 및 관계청 자료의 데이터를 분석하며 시스템 구축 진행 중
- 관세청은 공정기관 조달 납품업체에 의해 납품된 외국산 물품을 국산으로 둔갑시키는 부정납품 행위를 차단하기 위해 집중단속 '23.8월 실시

## 공공조달 부정납품 단속 현황 ('2023년 기준)
| 구분          | '19년  | '20년  | '21년  | '22년  | '23년 1~5월 |
|---------------|-------|-------|-------|-------|-------------|
| 사건수(건,원) | 9(11) | 6(7)  | 15(28)| 11(12) | 3(3)       |
| 금액(억원)     | 185  | 634  | 1,224 | 1,244 | 139       |
| 주요 품목      | 전자칠판 | 바닥마감제(물론어림보드) | 의류 패셔너토(터피스) 등 | 액정모니터 의류 | 무정전전원 장치, 의류 |

## 조사 내용
- 조달 납품 업체들은 저가의 외국산 물품을 생산하지 않고 가의 성표로 국산으로 둔갑시킴.
- 저가로 수입된 국산 물품, 상표라벨 추가 등으로 국산으로 간주함.
- 공공조달 분야의 경쟁력 강화 및 소비자 보호를 위해 부정행위 단속.

## 부정납품 단속 사례
- 저가 외국산 근무복 수입 후 원산지표시를 제거한 뒤, 이를 근무복 12만점으로 위장.
- 국가 기관에 위조 근무복 200개 전달.

## 추가 단속 발표 ('22년 11월)
- 단순 조립 후 제조된 국산 근무복으로 부정납품 방지.
```