In [1]:
import glob
import time
from tkinter.filedialog import askdirectory, askopenfilename

import cv2
import numpy as np
import torch
from mlflow import pytorch
from PIL import Image
from preprocessing.transformer import ScaleTransformer
from torchvision.transforms import Compose, Normalize, Resize, ToTensor

In [2]:
class Estimator:
    def __init__(self, mode, ds_num, transform, run_id=None):
        """
        Args:
            run_id: mlflow experiment id
            transform: How to transform image
            mode: [real-time: 0, movie: 1]
        """
        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.transform = transform
        self.scaler = ScaleTransformer(ds_num)
        self.mode = mode

        if run_id is None:
            run_id = askdirectory(initialdir="mlruns")
            
        self.model = pytorch.load_model("mlruns/2/" + run_id + "/artifacts/model")

        self.model.to(self.device)
        self.model.eval()

    def __call__(self, src):
        with torch.no_grad():
            if not self.mode:
                # TODO: np.ndarray -> PILimage
                pred = self.model()

            else:
                img = self.transform(src).unsqueeze(0).to(self.device)
                pred = self.model(img).detach().cpu()
                pred = self.scaler.inverse_transform(pred)

            return pred


#             cv2.drawMarker(im, (x_2d, y_2d),
#                            color=(255, 255, 0),
#                            markerType=cv2.MARKER_TILTED_CROSS,
#                            thickness=3)

#             txt0 = 'x: {:.2f} mm'.format(p[0][0])
#             txt1 = 'y: {:.2f} mm'.format(p[0][1])
#             txt2 = 'z: {:.2f} mm'.format(p[0][2])
#             txt3 = 'joint: {:.2f} deg'.format(p[2])

#             im = cv2.putText(im, txt0, (50, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (
#                 255,
#                 255,
#                 0,
#             ), 2)
#             im = cv2.putText(im, txt1, (50, 120), cv2.FONT_HERSHEY_SIMPLEX, 1, (
#                 255,
#                 255,
#                 0,
#             ), 2)
#             im = cv2.putText(im, txt2, (50, 170), cv2.FONT_HERSHEY_SIMPLEX, 1, (
#                 255,
#                 255,
#                 0,
#             ), 2)
#             im = cv2.putText(im, txt3, (50, 220), cv2.FONT_HERSHEY_SIMPLEX, 1, (
#                 255,
#                 255,
#                 0,
#             ), 2)

#             cv2.imshow('image', im)
#             cv2.waitKey(0)
#             cv2.destroyAllWindows()

  and should_run_async(code)


In [3]:
run_id = "f1fd2eea388148da8bc7ea3bed378b7a"

In [4]:
cmean = [0.485, 0.456, 0.406]
cstd = [0.229, 0.224, 0.225]
resize_shape = (224, 224)
transform = Compose([Resize(resize_shape), ToTensor(), Normalize(cmean, cstd)])

In [5]:
class Camera:
    def __init__(self):
        self.video = cv2.VideoCapture(0)
        with open("./evaluation/camera_config/camera_config.yaml") as f:
            cam_config = yaml.load(f, Loader=yaml.SafeLoader)
        w = cam_config["image_size"]["w"]
        h = cam_config["image_size"]["h"]
        self.mtx = np.load("evaluation/camera_config/intrinsic_parameter.npy")
        self.dist = np.load("evaluation/camera_config/distortion_parameter.npy")
        self.cam_param, _ = cv2.getOptimalNewCameraMatrix(
            self.mtx, self.dist, (w, h), 1, (w, h)
        )

    def get_frame(self):
        ret, frame = self.video.read()
        if ret:
            frame = cv2.undistort(frame, self.mtx, self.dist, None, self.cam_param)
            return frame
        else:
            return None

    def terminate(self):
        self.video.release()

In [10]:
def main():
    print(
        """
    --- Estimation Mode ---
    0: Real-time
    1: Movie (Please convert the video into the images (with a software such as ffmpeg), and they must be on './evaluation/eval_dataset')
    """
    )
    mode = int(input("Mode: "))
    run_id = input("Run ID: ")
    ds_num = int(input("Dataset: #"))
    if not mode:
        print("Initializing camera...")
        cam = Camera()
        print("Loading model...")
        estimator = Estimator(mode=0, run_id=run_id)

        while True:
            frame = cam.get_frame()
            if not frame:
                print("No signal")
            else:
                out = estimator(frame)
                print(out)

        print(
            """
        --- Operation ---
        
        """
        )
        open_video, img = cam.get_img()

    else:  # Movie
        eval_dir = "./evaluation/eval_dataset/*.jpg"
        print("Loading model...")
        estimator = Estimator(mode=1, ds_num=ds_num, transform=transform, run_id=run_id,)

        for img_path in glob.glob(eval_dir):
            img = Image.open(img_path)
            out = estimator(img)
            print(out)

        pass

  and should_run_async(code)


In [11]:
if __name__ =='__main__':
    main()


    --- Estimation Mode ---
    0: Real-time
    1: Movie (Please convert the video into the images (with a software such as ffmpeg), and they must be on './evaluation/eval_dataset')
    


Mode:  1
Run ID:  f1fd2eea388148da8bc7ea3bed378b7a
Dataset: # 24


Loading model...
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]
[-5.97047210e-01 -1.48915239e-01  5.25291014e+01  9.53263641e+02
  5.40531619e+02  5.16831875e-03 -3.04484367e-03  6.25660425e-01
 -7.02816310e+01  2.22153157e+01]