<a href="https://colab.research.google.com/github/curlos/manga-panel-splitter/blob/main/Magi_V2_Model_Analyze_Images_(Manga_Panel_Splitter).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install transformers==4.40 pulp pyngrok python-dotenv accelerate psutil



In [5]:
import requests
from flask import Flask, request, jsonify
import os
import numpy as np
from transformers import AutoModel
import torch
from pyngrok import ngrok
import threading
import base64
import pdb
from google.colab import drive
from dotenv import load_dotenv
import socket
import subprocess
import psutil
import logging
import sys
import signal
import time

def load_env_and_set_ngrok_authtoken() -> None:
  '''
  Load the env variables from Google Drive and set the ngrok auth token.
  '''
  if not os.path.ismount('/content/drive'):
      drive.mount('/content/drive')

  env_path = '/content/drive/MyDrive/google_colab_env_files/.env'
  load_dotenv(env_path)

  ngrok_auth_token = os.getenv("NGROK_AUTH_TOKEN")

  if not ngrok_auth_token:
      raise ValueError("NGROK_AUTH_TOKEN is not set. Check your .env file in Google Drive ('google_colab_env_files/.env').")

  ngrok.set_auth_token(ngrok_auth_token)

load_env_and_set_ngrok_authtoken()

port = 8888

"""
Configure Flask logging to use sys.stdout. This logging will only work if the Colab code block is always running.
However, it usually won't always be running so this usually will have no visible effect.
"""
logging.basicConfig(
    stream=sys.stdout,
    level=logging.DEBUG,
    format="%(asctime)s [%(levelname)s]: %(message)s",
)
logger = logging.getLogger()

app = Flask(__name__)

"""
TODO: Possibly for the future but if I want to ever send all of the pages in a chapter (like 15-20 pages), -
- then I'd have to increase the max MB to something like 100MB.
The reason I didn't do this here is because that seems really large for a -
- single Flask request so that's why I'm instead currently only sending in one page at a time per request.
I'll probably keep it to 16MB as that seems safest but definitely something to look out for in the future.
"""
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # Increase limit to 16MB

# Initialize the Magi model
# TODO: In the future, try to run this with "cuda()".
# In theory, it would help the NVIDIA GPUs on Google Colab run at their best and so it would speed things up.
# I tried running this here earlier but unfortunately I kept getting "500" errors from the Flask HTTP request.
# But this is definitely something that'd be worth it if it worked.
# One additional note: When a GPU or CPU is connected to, all previous downloads are deleted.
# So, when running this file for the first time on a freshly connected CPU or GPU, -
# - this will have to download the Magi Model pytorch bin file again (It's around 2GB).
magi_model = AutoModel.from_pretrained(
            "ragavsachdeva/magiv2", trust_remote_code=True).eval()

def get_per_page_results(magi_model, chapter_pages, character_bank):
  """
  Run the predictions for the panels, texts, characters, etc. on the passed in array of chapter pages.
  This is where the biggest calculations are done and the biggest factor in the speed of the program.
  """
  # Set to "no_grad()" so that there's inference without tracking gradients.
  # Basically, this saves memory and computational resources by turning off gradient tracking.
  with torch.no_grad():
      per_page_results = magi_model.do_chapter_wide_prediction(
          chapter_pages, character_bank, use_tqdm=True, do_ocr=True
      )

  return per_page_results

def is_port_in_use(port):
  """
  Check if a port is already in use.
  """
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
      return s.connect_ex(("0.0.0.0", port)) == 0

def kill_process_on_port(port):
  try:
      # Find the process ID (PID) using netstat and taskkill
      result = subprocess.check_output(
          f"netstat -ano | findstr :{port}", shell=True
      ).decode()
      pid = int(result.strip().split()[-1])

      # Kill the process
      subprocess.call(["taskkill", "/F", "/PID", str(pid)])
      print(f"Port {port} is now free (process {pid} killed).")
  except subprocess.CalledProcessError:
      print(f"No process is using port {port}.")
  except Exception as e:
      print(f"Error freeing port {port}: {e}")

@app.route('/process-images-with-magi-model', methods=['POST'])
def process_images_with_magi_model():
  logger.info("Received request to /process-images-with-magi-model")

  try:
    # Parse JSON payload
    request_data = request.json
    encoded_arrays = request_data.get("chapter_pages_image_numpy_array")
    character_bank = request_data.get("character_bank")

    # Decode and reconstruct the arrays
    chapter_pages_image_numpy_array = [
        np.frombuffer(base64.b64decode(item["data"]), dtype=item["dtype"]).reshape(item["shape"])
        for item in encoded_arrays
    ]

    # Run Magi model on the file
    per_page_results = get_per_page_results(
        magi_model, chapter_pages_image_numpy_array, character_bank
    )

    return per_page_results
  except Exception as e:
    return jsonify({"error": str(e)}), 500

@app.route('/hello-world', methods=['GET'])
def hello_world():
  logger.info("Hello World endpoint hit")
  return 'Hello World!'

def run_flask():
  logger.info("Starting Flask server...")
  # Run Flask app without reloader (important for threading)
  app.run(host="0.0.0.0", port=port, debug=True, use_reloader=False)


def force_free_port(port):
    """
    Forcefully free a port by identifying and killing the process using it,
    while avoiding critical processes like Colab kernel processes.
    This is necessary because without this, I'd have to keep changing the port
    or manually kill them myself which is a pain in the butt.
    """
    for conn in psutil.net_connections(kind="inet"):
        if conn.laddr.port == port:
            try:
                # Get the process using the port
                process = psutil.Process(conn.pid)
                process_name = process.name().lower()
                process_cmdline = " ".join(process.cmdline())

                # Skip Colab kernel-related processes
                if "colab" in process_name or "python" in process_name and "kernel" in process_cmdline:
                    print(f"Skipping Colab or kernel-related process: {process_name} (PID {conn.pid})")
                    continue

                # Kill the process
                print(f"Killing process using port {port}: {process_name} (PID {conn.pid})")
                os.kill(conn.pid, signal.SIGKILL)
                print(f"Successfully killed process {process_name} (PID {conn.pid}).")
            except psutil.NoSuchProcess:
                print(f"No such process exists for PID {conn.pid}.")
            except psutil.AccessDenied:
                print(f"Permission denied to kill process using port {port}: PID {conn.pid}")
            except Exception as e:
                print(f"Failed to kill process on port {port}: {e}")
            return
    print(f"Port {port} is not in use.")

def stop_all_tunnels():
  """
  This was written to try to programatically stop all active NGROK tunnels programatically
  instead of me having to go to my NGROK account and manually stopping it the active tunnels/agents.
  However, this didn't really work. I've kept the code here just in case I want to try again later
  but this is not currently in use anywhere on this file.
  """
  try:
    # Fetch all active tunnels
    response = requests.get(f"http://127.0.0.1:{port}/api/tunnels")
    tunnels = response.json().get("tunnels", [])

    # Terminate each tunnel
    for tunnel in tunnels:
        tunnel_name = tunnel["name"]
        delete_url = f"http://127.0.0.1:{port}/api/tunnels/{tunnel_name}"
        requests.delete(delete_url)
        print(f"Terminated tunnel: {tunnel_name}")
  except Exception as e:
    print(f"Failed to terminate tunnels: {e}")

def monitor_logs():
  '''
  For statements to be logged to the console, this loop needs to be present and run forever.
  However, this loop will slow the Magi Model 10x.
  So, if one page would take on average 10 seconds, -
  it would now take around 100 seconds to compute.
  Only use this in case of an emergency where I encounter an error and absolutely
  must know where it's coming from or what caused it.
  The logger should be able to tell me what happened - it'll just be super slow.
  '''
  while True:
    pass


if __name__ == '__main__':
  # Check and free the port if in use
  force_free_port(port)

  # Start Flask in a thread to prevent blocking.
  # This is necessary because once I start the server, I need to expose the server to the internet use Ngrok.
  # However, if the server is running on the main thread first, then the code block will run forever.
  # And the problem with that is that it'll stop at that Flask line where it starts the server -
  # - and never go to the next line of code where ngrok connects the port so I never get to have access to the Flask server publicly
  # which means that my "main.py" local file can't send requests to the Colab Flask server unless I put it in a background thread
  # so it doesn't block ngrok.
  thread = threading.Thread(target=run_flask)
  thread.start()

  # Expose the Flask app to the internet using ngrok
  public_url = ngrok.connect(port)
  print("Public URL:", public_url)

  # This is where the monitor logs portion would come in if I need to monitor logs
  # BUT this should only be uncommented out in case of emergency!
  # monitor_logs()

Skipping Colab or kernel-related process: python3 (PID 4280)
Port 8888 is not in use.
 * Serving Flask app '__main__'
 * Debug mode: on


Address already in use
Port 8888 is in use by another program. Either identify and stop that program, or start the server with a different port.


Public URL: NgrokTunnel: "https://93c1-34-125-70-89.ngrok-free.app" -> "http://localhost:8888"
