apchenstu · apchenstu · Feb 2, 2022 · Jan 11, 2022 · Jan 14, 2022 · Jan 14, 2022
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,14 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# dataset
+xgaze/
+
+# logs
+results/
+runs_fine_tuning/
+.vscode/
+
+# vim ctags file
+tags
diff --git a/README.md b/README.md
@@ -7,12 +7,14 @@ geometric and neural radiance fields for view synthesis, Moreover, if dense imag
 
 ## Installation
 
-#### Tested on Ubuntu 16.04 + Pytorch 1.8 + Pytorch Lignting 1.3.5
+#### Tested on Ubuntu 20.04 + Pytorch 1.10.1 + Pytorch Lignting 1.3.5
 
 Install environment:
 ```
-pip install pytorch-lightning, inplace_abn
-pip install imageio, pillow, scikit-image, opencv-python, config-argparse, lpips
+conda create -n mvsnerf python=3.8
+conda activate mvsnerf
+pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio==0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
+pip install pytorch-lightning==1.3.5 imageio pillow scikit-image opencv-python configargparse lpips kornia warmup_scheduler matplotlib test-tube imageio-ffmpeg
 ```
 
 

diff --git a/configs/pairs.th b/configs/pairs.th
diff --git a/data/llff.py b/data/llff.py
@@ -72,8 +72,10 @@ def center_poses(poses, blender2opencv):
         np.concatenate([poses, last_row], 1)  # (N_images, 4, 4) homogeneous coordinate
 
     poses_centered = np.linalg.inv(pose_avg_homo) @ poses_homo  # (N_images, 4, 4)
+    # 草，这个地方源代码没有乘这个blender2opencv，做这个操作相当于把相机转换到另一个坐标系了，和一般的nerf坐标系不同
     poses_centered = poses_centered @ blender2opencv
     poses_centered = poses_centered[:, :3]  # (N_images, 3, 4)
+    print('center in center_poses',poses_centered[:, :3, 3].mean(0))
 
     return poses_centered, np.linalg.inv(pose_avg_homo) @ blender2opencv
 
@@ -170,13 +172,15 @@ def __init__(self, args, split='train', spheric_poses=True, load_ref=False):
         self.define_transforms()
 
         self.blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        # self.blender2opencv = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]])
+
         if not load_ref:
             self.read_meta()
         self.white_back = False
 
     def read_meta(self):
         poses_bounds = np.load(os.path.join(self.root_dir, 'poses_bounds.npy'))  # (N_images, 17)
-        self.image_paths = sorted(glob.glob(os.path.join(self.root_dir, 'images_4/*')))
+        self.image_paths = sorted(glob.glob(os.path.join(self.root_dir, 'images/*')))
         # load full resolution image then resize
         if self.split in ['train', 'val']:
             print(len(poses_bounds) , len(self.image_paths),self.root_dir)
@@ -196,12 +200,14 @@ def read_meta(self):
         poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1)
         # (N_images, 3, 4) exclude H, W, focal
         self.poses, self.pose_avg = center_poses(poses, self.blender2opencv)
+        # print('pose_avg in read_meta', self.pose_avg)
         # self.poses = poses @ self.blender2opencv
 
         # Step 3: correct scale so that the nearest depth is at a little more than 1.0
         # See https://github.com/bmild/nerf/issues/34
         near_original = self.bounds.min()
         scale_factor = near_original * 0.75  # 0.75 is the default parameter
+        print('scale_factor', scale_factor)
         # the nearest depth is at 1/0.75=1.33
         self.bounds /= scale_factor
         self.poses[..., 3] /= scale_factor
@@ -267,7 +273,7 @@ def read_meta(self):
 
     def read_source_views(self, pair_idx=None, device=torch.device("cpu")):
         poses_bounds = np.load(os.path.join(self.root_dir, 'poses_bounds.npy'))  # (N_images, 17)
-        image_paths = sorted(glob.glob(os.path.join(self.root_dir, 'images_4/*')))
+        image_paths = sorted(glob.glob(os.path.join(self.root_dir, 'images/*')))
         # load full resolution image then resize
         if self.split in ['train', 'val']:
             assert len(poses_bounds) == len(image_paths), \
@@ -278,8 +284,10 @@ def read_source_views(self, pair_idx=None, device=torch.device("cpu")):
 
         # Step 1: rescale focal length according to training resolution
         H, W, focal = poses[0, :, -1]  # original intrinsics, same for all images
+        print('original focal', focal)
 
         focal = [focal* self.img_wh[0] / W, focal* self.img_wh[1] / H]
+        print('porcessed focal', focal)
 
         # Step 2: correct poses
         poses = np.concatenate([poses[..., 1:2], -poses[..., :1], poses[..., 2:4]], -1)

diff --git a/renderer.ipynb b/renderer.ipynb
@@ -7,7 +7,7 @@
    "outputs": [],
    "source": [
     "import sys,os,imageio,lpips\n",
-    "root = '/mnt/new_disk2/anpei/code/MVS-NeRF'\n",
+    "root = '/home/hengfei/Desktop/research/mvsnerf'\n",
     "os.chdir(root)\n",
     "sys.path.append(root)\n",
     "\n",
@@ -37,8 +37,8 @@
     "%load_ext autoreload\n",
     "%autoreload 2\n",
     "\n",
-    "torch.cuda.set_device(2)\n",
-    "os.environ['CUDA_VISIBLE_DEVICES'] = '2'"
+    "torch.cuda.set_device(1)\n",
+    "os.environ['CUDA_VISIBLE_DEVICES'] = '1'"
    ]
   },
   {
@@ -50,8 +50,22 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]\n",
-      "Loading model from: /home/anpei/anaconda3/lib/python3.7/site-packages/lpips/weights/v0.1/vgg.pth\n"
+      "Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Downloading: \"https://download.pytorch.org/models/vgg16-397923af.pth\" to /home/hengfei/.cache/torch/hub/checkpoints/vgg16-397923af.pth\n",
+      "100%|██████████| 528M/528M [02:05<00:00, 4.42MB/s] \n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading model from: /home/hengfei/miniconda3/envs/mvsnerf/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth\n"
      ]
     }
    ],
@@ -99,14 +113,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found ckpts ['./ckpts/mvsnerf-v0.tar']\n",
+      "Reloading from ./ckpts/mvsnerf-v0.tar\n",
+      "============> rendering dataset <===================\n",
+      "11 11 /home/hengfei/Desktop/research/mvsnerf/xgaze/xgaze_11images_cropped_colmapCODE\n",
+      "===> valing index: [0 5]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/hengfei/miniconda3/envs/mvsnerf/lib/python3.8/site-packages/torch/functional.py:445: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at  ../aten/src/ATen/native/TensorShape.cpp:2157.)\n",
+      "  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]\n",
+      "  0%|          | 0/2 [00:00<?, ?it/s]/tmp/ipykernel_7078/3929512484.py:111: FutureWarning: `multichannel` is a deprecated argument name for `structural_similarity`. It will be removed in version 1.0.Please use `channel_axis` instead.\n",
+      "  ssim.append( structural_similarity(rgb_rays, img, multichannel=True))\n",
+      "100%|██████████| 2/2 [00:25<00:00, 12.72s/it]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "=====> scene: xgaze_11images_cropped_colmapCODE mean psnr 20.593928452628838 ssim: 0.8184963464736938 lpips: 0.5154071301221848\n",
+      "=====> all mean psnr 20.593928452628838 ssim: 0.8184963464736938 lpips: 0.5154071301221848\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "psnr_all,ssim_all,LPIPS_vgg_all = [],[],[]\n",
-    "for i_scene, scene in enumerate([ 'room','fortress', 'flower','orchids','leaves','horns','trex','fern']):#\n",
+    "for i_scene, scene in enumerate([ 'xgaze_11images_cropped_colmapCODE']):#\n",
     "    psnr,ssim,LPIPS_vgg = [],[],[]\n",
-    "    cmd = f'--datadir /mnt/new_disk_2/anpei/Dataset/MVSNeRF/nerf_llff_data/{scene}  \\\n",
+    "    cmd = f'--datadir /home/hengfei/Desktop/research/mvsnerf/xgaze/{scene}  \\\n",
     "     --dataset_name llff \\\n",
     "     --net_type v0 --ckpt ./ckpts/mvsnerf-v0.tar '\n",
     "\n",
@@ -137,7 +189,7 @@
     "    val_idx = dataset.img_idx\n",
     "    \n",
     "    save_as_image = True\n",
-    "    save_dir = f'results/test3'\n",
+    "    save_dir = f'results/xgaze_11images_cropped_colmapCODE'\n",
     "    os.makedirs(save_dir, exist_ok=True)\n",
     "    MVSNet.train()\n",
     "    MVSNet = MVSNet.cuda()\n",
@@ -227,6 +279,13 @@
     "print(f'=====> all mean psnr {np.mean(psnr_all)} ssim: {np.mean(ssim_all)} lpips: {np.mean(LPIPS_vgg_all)}') "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### for room"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -1063,84 +1122,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "import json,torch\n",
     "import sys,os\n",
     "import numpy as np\n",
-    "root = '/mnt/new_disk2/anpei/code/MVS-NeRF'\n",
+    "root = '/home/hengfei/Desktop/research/mvsnerf'\n",
     "os.chdir(root)\n",
     "sys.path.append(root)\n",
     "pairs = torch.load('./configs/pairs.th')\n",
     "\n",
     "# llff\n",
-    "root_dir = '/mnt/new_disk_2/anpei/Dataset/MVSNeRF/nerf_llff_data/'\n",
-    "for scene in ['horns','leaves', 'room', 'fortress', 'trex', 'orchids','fern']:#\n",
-    "    poses_bounds = np.load(os.path.join(root_dir, scene, 'poses_bounds.npy'))  # (N_images, 17)\n",
+    "root_dir = '/home/hengfei/Desktop/research/mvsnerf/xgaze/'\n",
+    "for scene in ['xgaze_11images_cropped_colmapCODE']:#\n",
+    "    poses_bounds = np.load(os.path.join(root_dir, scene, 'poses_bounds.npy'))  # (N_images, 11)\n",
     "    poses = poses_bounds[:, :15].reshape(-1, 3, 5)  # (N_images, 3, 5)\n",
     "    poses = np.concatenate([poses[..., 1:2], - poses[..., :1], poses[..., 2:4]], -1)\n",
     "\n",
     "    ref_position = np.mean(poses[..., 3],axis=0, keepdims=True)\n",
     "    dist = np.sum(np.abs(poses[..., 3] - ref_position), axis=-1)\n",
-    "    pair_idx = np.argsort(dist)[:20]\n",
+    "    pair_idx = np.argsort(dist)[:11]\n",
     "#     pair_idx = torch.randperm(len(poses))[:20].tolist()\n",
     "\n",
     "    pairs[f'{scene}_test'] = pair_idx[::6]\n",
     "    pairs[f'{scene}_val'] = pair_idx[::6]\n",
-    "    pairs[f'{scene}_train'] = np.delete(pair_idx, range(0,20,6))\n",
-    "\n",
-    "\n",
-    "# nerf\n",
-    "center_view = {'lego':6,'ship':80,'drums':22,'mic':20,'chair':8,'materials':36,'hotdog':26,'ficus':38}\n",
-    "blender2opencv = np.array([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])\n",
-    "for i, scene in enumerate(['ship','drums','mic','chair','materials','lego','hotdog','ficus']):\n",
-    "    with open(f'/mnt/new_disk_2/anpei/Dataset/nerf_synthetic/{scene}/transforms_train.json', 'r') as f:\n",
-    "        meta = json.load(f)\n",
-    "    \n",
-    "    poses = []\n",
-    "    ref_idx = torch.randint(0,len(meta['frames']),(1,))\n",
-    "    ref_idx = center_view[f'{scene}']\n",
-    "    for frame in meta['frames']:\n",
-    "        pose = np.array(frame['transform_matrix']) @ blender2opencv\n",
-    "        poses += [pose]\n",
-    "    poses = np.stack(poses)\n",
-    "    \n",
-    "    # find nearest image idx\n",
-    "    viewing_dir = poses[:,:3,2]\n",
-    "    dis = np.sum(viewing_dir * poses[[ref_idx],:3,2], axis=-1)\n",
-    "    pair_idx = np.argsort(dis)[::-1][:20]\n",
-    "    \n",
-    "    pairs[f'{scene}_train'] = np.delete(pair_idx, range(0,20,6))\n",
-    "    pairs[f'{scene}_test'] = pair_idx[::6]\n",
-    "    pairs[f'{scene}_val'] = pair_idx[::6]\n",
+    "    pairs[f'{scene}_train'] = np.delete(pair_idx, range(0,11,6))\n",
     "\n",
-    "for i, scene in enumerate(['drums','lego']):\n",
-    "    train_view = list(list(torch.load(f'/mnt/new_disk_2/anpei/Dataset/MVSNeRF/Nerf/{scene}/img_pair_train_local2.th').values())[0]) + \\\n",
-    "        list(torch.load(f'/mnt/new_disk_2/anpei/Dataset/MVSNeRF/Nerf/{scene}/img_pair_train_local2.th').keys())\n",
-    "    pairs[f'{scene}_train'] = train_view[:3] + list(np.unique(train_view[3:]))[:13]\n",
-    "    pairs[f'{scene}_test'] = list(list(torch.load(f'/mnt/new_disk_2/anpei/Dataset/MVSNeRF/Nerf/{scene}/img_pair_val_local2.th').values())[0]) + \\\n",
-    "        list(torch.load(f'/mnt/new_disk_2/anpei/Dataset/MVSNeRF/Nerf/{scene}/img_pair_val_local2.th').keys())\n",
-    "    pairs[f'{scene}_val'] = pairs[f'{scene}_test']\n",
-    "    \n",
-    "# pairs[f'dtu_train'] = list(torch.load('/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Cameras/img_pair_train_local3.th').values())[0] + \\\n",
-    "#                     list(torch.load('/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Cameras/img_pair_train_local3.th').keys())\n",
-    "# pairs[f'dtu_val'] = pairs[f'dtu_test'] = list(torch.load('/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Cameras/img_pair_val_local3.th').values())[0] + \\\n",
-    "#                     list(torch.load('/mnt/data/new_disk/sungx/data/mvs_dataset/DTU/mvs_training/dtu/Cameras/img_pair_val_local3.th').keys())\n",
-    "    \n",
-    "# dtu\n",
-    "#      0-4\n",
-    "#    10 - 5 \n",
-    "#   11  -  18\n",
-    "#  27  xx   19\n",
-    "# 28    x    38\n",
-    "#48     -     39\n",
-    "pairs[f'dtu_train'] = [25,21,33,22,14,15,26,30,31,35,34,43,46,29,16,36]\n",
-    "pairs[f'dtu_val'] = [32,24,23,44]\n",
-    "pairs[f'dtu_test'] = [32,24,23,44]\n",
-    "\n",
-    "torch.save(pairs,'/mnt/new_disk_2/anpei/code/MVS-NeRF/configs/pairs.th')"
+    "torch.save(pairs,'/home/hengfei/Desktop/research/mvsnerf/configs/pairs.th')"
    ]
   },
   {
@@ -1720,7 +1730,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.6"
+   "version": "3.8.12"
   }
  },
  "nbformat": 4,