In [None]:
!pip install ultralytics
from ultralytics import YOLO



In [None]:
!pip install flask flask-ngrok transformers




In [None]:
!curl -O https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz
!tar -xvzf ngrok-v3-stable-linux-amd64.tgz


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 14.1M  100 14.1M    0     0  10.5M      0  0:00:01  0:00:01 --:--:-- 10.5M
ngrok


In [None]:
!./ngrok config add-authtoken 2pgsaQ2Njsutr07XVkP2GYDlQ17_g4KMiSqYER9oMRcyLZo7


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
# Step 1: Install and Import Required Libraries
!sudo apt-get install -y tesseract-ocr
!pip install pytesseract easyocr shapely pandas opencv-python-headless pillow

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
pip install --upgrade jax jaxlib




In [None]:
from flask import Flask, request, jsonify
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor
from ultralytics import YOLO
import cv2
import easyocr
import pytesseract
import re
from PIL import Image
import io
from shapely.geometry import Point, Polygon
import numpy as np

# Flask 앱 초기화
app = Flask(__name__)

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 1: Vision Transformer (ViT)
vit_model_path = "/content/best_vit_model_final.pth"

# 모델 아키텍처 초기화
vit_model = ViTForImageClassification.from_pretrained(
    "google/vit-base-patch16-224",
    num_labels=3,  # 저장된 모델의 클래스 수
    ignore_mismatched_sizes=True
)

# 저장된 가중치 로드
vit_model.load_state_dict(torch.load(vit_model_path, map_location=device), strict=True)
vit_model.to(device)
vit_model.eval()

print("Model loaded and ready for inference.")

vit_feature_extractor = ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
vit_labels = ["drift", "fish", "move"]

# 모델 2: Object Detection (YOLOv11)
objdet_model_path = "/content/ver5_yolov11_refined_best.pt"
objdet_model = YOLO(objdet_model_path)

# 모델 3: OCR 관련 설정
easyocr_reader = easyocr.Reader(['en'])
boundary_points = [
    (125 + 25 / 60, 32 + 11 / 60),
    (124 + 8 / 60, 33 + 20 / 60),
    (124 + 0 / 60 + 30 / 3600, 34),
    (124 + 7 / 60 + 30 / 3600, 35),
    (124 + 30 / 60, 35 + 30 / 60),
    (124 + 30 / 60, 36 + 45 / 60),
    (124 + 20 / 60, 37),
    (126 + 45 / 60, 32 + 11 / 60)
]
boundary_polygon = Polygon(boundary_points)

def classify_vessel_position(longitude, latitude):
    vessel_point = Point(longitude, latitude)
    return "허가" if boundary_polygon.contains(vessel_point) else "불법"

def extract_coordinates(image):
    height, width = image.shape[:2]
    crop_height_ratio = 0.04
    image = image[int(height * (1 - crop_height_ratio)):height, 0:int(width * 0.5)]
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    easyocr_result = easyocr_reader.readtext(gray, detail=0)
    tesseract_result = pytesseract.image_to_string(gray, config="--psm 6")
    combined_text = " ".join(easyocr_result) + " " + tesseract_result
    cleaned_text = combined_text.replace(",", ".")
    lat_pattern = r"N\s*(\d{1,2})°\s*(\d{1,2}\.\d+)"
    lon_pattern = r"E\s*(\d{1,3})°\s*(\d{1,2}\.\d+)"
    lat_match = re.search(lat_pattern, cleaned_text)
    lon_match = re.search(lon_pattern, cleaned_text)
    if lat_match and lon_match:
        latitude = int(lat_match.group(1)) + float(lat_match.group(2)) / 60
        longitude = int(lon_match.group(1)) + float(lon_match.group(2)) / 60
        return latitude, longitude
    return None, None

# 통합된 엔드포인트
@app.route('/predict_all', methods=['POST'])
def predict_all():
    try:
        file = request.files['file']
        image_data = file.read()

        # Load image for all models
        image_vit = Image.open(io.BytesIO(image_data)).convert("RGB")
        image_cv = cv2.imdecode(np.frombuffer(image_data, np.uint8), cv2.IMREAD_COLOR)
        image_path = "/content/temp_image.jpg"
        image_vit.save(image_path)

        # Vision Transformer Prediction
        inputs = vit_feature_extractor(images=image_vit, return_tensors="pt").to(device)
        with torch.no_grad():
            vit_outputs = vit_model(**inputs)
            vit_probs = torch.softmax(vit_outputs.logits, dim=-1).cpu().numpy()[0]
        vit_result = {vit_labels[i]: float(vit_probs[i]) for i in range(len(vit_labels))}

        # Object Detection Prediction
        yolo_results = objdet_model.predict(source=image_path)
        yolo_predictions = [
            {
                "class": int(box.cls.cpu().item()),
                "confidence": float(box.conf.cpu().item()),
                "bbox": [float(coord) for coord in box.xyxy.cpu().tolist()[0]]
            }
            for box in yolo_results[0].boxes
        ]

        # OCR Prediction and Coordinate Classification
        latitude, longitude = extract_coordinates(image_cv)
        if latitude is not None and longitude is not None:
            classification = classify_vessel_position(longitude, latitude)
            ocr_result = {
                "latitude": latitude,
                "longitude": longitude,
                "classification": classification
            }
        else:
            ocr_result = {"Coordinates not found"}

        # Combine results
        combined_result = {
            "vit_prediction": vit_result,
            "object_detection": yolo_predictions,
            "ocr_and_classification": ocr_result
        }
        return jsonify(combined_result)
    except Exception as e:
        return jsonify({"error": str(e)})

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized because the shapes did not match:
- classifier.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([3]) in the model instantiated
- classifier.weight: found shape torch.Size([1000, 768]) in the checkpoint and torch.Size([3, 768]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded and ready for inference.


In [None]:
# Flask 서버를 백그라운드에서 실행하기 위한 threading
import threading

def run_flask():
    app.run(port=5000)

# Flask를 백그라운드에서 실행
flask_thread = threading.Thread(target=run_flask)
flask_thread.start()


 * Serving Flask app '__main__'


In [None]:
!./ngrok config add-authtoken 2pgsaQ2Njsutr07XVkP2GYDlQ17_g4KMiSqYER9oMRcyLZo7


 * Debug mode: off
Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


Address already in use
Port 5000 is in use by another program. Either identify and stop that program, or start the server with a different port.


In [None]:
!nohup ./ngrok http 5000 > /dev/null 2>&1 &



In [None]:
import time
import requests

# ngrok가 실행될 때까지 잠시 대기
time.sleep(2)

# ngrok의 Public URL 가져오기
response = requests.get("http://localhost:4040/api/tunnels")
data = response.json()
public_url = data['tunnels'][0]['public_url']
print(f"ngrok Public URL: {public_url}")


ngrok Public URL: https://edeb-34-83-211-224.ngrok-free.app


In [None]:
!ps aux | grep flask
!ps aux | grep ngrok


root        7990  0.0  0.0   7376  3460 ?        S    02:19   0:00 /bin/bash -c ps aux | grep flask
root        7992  0.0  0.0   6484  2280 ?        S    02:19   0:00 grep flask
root        5193  0.2  0.3 1273652 41880 ?       Sl   02:09   0:01 ./ngrok http 5000
root        7993  0.0  0.0   7376  3496 ?        S    02:19   0:00 /bin/bash -c ps aux | grep ngrok
root        7995  0.0  0.0   7376   280 ?        R    02:19   0:00 /bin/bash -c ps aux | grep ngrok
