In [None]:
!pip install torch transformers

In [None]:
!pip install pyngrok
!pip install flask_cors
!pip install requests
!pip install Flask

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model_for_I2T = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

from PIL import Image
import requests
import io

def generate_caption(raw_image):
  text = "a photography of"
  inputs = processor(raw_image, text, return_tensors="pt")

  out = model_for_I2T.generate(**inputs, min_length=20, max_length=50)
  caption = processor.decode(out[0], skip_special_tokens=True)
  return caption

def generate_caption_by_file(file):
  raw_image_bytes = file.read()
  raw_image = Image.open(io.BytesIO(raw_image_bytes))

  caption = generate_caption(raw_image)
  return caption

def generate_caption_by_url(image_url):
  raw_image = Image.open(requests.get(image_url, stream=True).raw).convert('RGB')
  caption = generate_caption(raw_image)
  return caption




In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

modelid = "microsoft/Phi-3-mini-4k-instruct"
model = AutoModelForCausalLM.from_pretrained(
    modelid,
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(modelid)

In [None]:
def getClassfication(prompt):
  messages = [
      {"role": "system", "content":
      """
      You are an AI assistant that performs food classification and calorie estimation. From the food descriptions provided by the user, accurately extract the following two pieces of information.
        1. Category : Identify the major food or beverage categories mentioned in the description. Use food categories ["Alcohol/Beverage", "Noodle dish", "Vegetable", "Meat", "Grain", "Dessert/Snack", other]. If there is more than one food category, identify the most primary one.
        2. Calory: Estimate the general calorie content of the applicable food. If exact information is not available, please provide a reasonable approximation and output numbers only.
        3. Energy type: Identify the category by energy type for that meal from Category and Calory. Use the energy category (DYNAMISM , VITALITY  , SERENITY ).
        4. Food name: Get the food name from the input text.
        5. Impressions: You have eaten this meal. Give us your impressions of this simple meal..
        Always provide output in the Json format:
        "{
          "Category": [Category] ,
          "Calory": [Calories],
          "Energy type": [Energy type],
          "Food name":[Food name],
          "Impressions": [Impressions]
          }"
        """
      },
      {"role": "assistant", "content":
      """ Use Json format to output the result.
      Output example:
        {
          "Category": "Alcohol/Beverage" ,
          "Calory": "100",
          "Energy type": "DYNAMISM",
          "Food name": "Ramen",
          "Impressions": "It was very good ramen! It's my favorite!"
        }
      """
      },
      {"role": "user", "content": prompt},
  ]

  pipe = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
  )

  generation_args = {
      "max_new_tokens": 500,
      "return_full_text": False,
      "temperature": 0.0,
      "do_sample": False,
      "use_cache": False,
  }

  output = pipe(messages, **generation_args)
  return output[0]['generated_text']

In [None]:
# Run API
from flask import Flask, send_file, render_template, request, make_response,jsonify
from flask_cors import CORS
from pyngrok import ngrok, conf
import json
import os
import werkzeug
from datetime import datetime

# ngrokトークンを設定
conf.get_default().auth_token = "YOUR_AUTH_TOKEN"
NGROK_DOMAIN = "YOUR_NGROK_DOMAIN"

app = Flask(__name__)
CORS(app)

# limit upload file size : 3MB
app.config['MAX_CONTENT_LENGTH'] = 3 * 1024 * 1024

@app.route('/')
def index():
    return "I'm ready!"

@app.route('/img2txt', methods=['POST'])
def img2text():

    file = request.files.get('file')
    if 'file' not in request.files:
        make_response(jsonify({'result':'file is required.'}))

    file = request.files['file']
    fileName = file.filename
    if '' == fileName:
        make_response(jsonify({'result':'filename must not empty.'}))

    prompt = generate_caption_by_file(file)
    print("----------------------")
    print(prompt)

    output = getClassfication(prompt)
    print("========== ===========")
    print(output)

    return make_response(jsonify({'result':output}))

@app.errorhandler(werkzeug.exceptions.RequestEntityTooLarge)
def handle_over_max_file_size(error):
    print("werkzeug.exceptions.RequestEntityTooLarge")
    return 'result : file size is overed.'

if __name__ == "__main__":
  public_url = ngrok.connect(5000, hostname=NGROK_DOMAIN)
  print(f"ngrok URL: {public_url}")
  app.run(port=5000)
