diff --git a/.gitignore b/.gitignore index c82f059..0f32944 100644 --- a/.gitignore +++ b/.gitignore @@ -3,8 +3,12 @@ *.swo *.swm +*.pth + *.lprof .cache __pycache__ src/__pycache__ + + diff --git a/README.md b/README.md index 24af544..fa9b688 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ from the paper [*Quo Vadis, Action Recognition? A New Model and the Kinetics Dat The original (and official!) tensorflow code can be found [here](https://github.com/deepmind/kinetics-i3d/). +To install the required libraries: `pip install -r requirements.txt` + The heart of the transfer is the `i3d_tf_to_pt.py` script Launch it with `python i3d_tf_to_pt.py --rgb` to generate the rgb checkpoint weight pretrained from ImageNet inflated initialization. @@ -14,6 +16,8 @@ To generate the flow weights, use `python i3d_tf_to_pt.py --flow`. You can also generate both in one run by using both flags simultaneously `python i3d_tf_to_pt.py --rgb --flow`. +If you have errors regarding Protobuf versions, try `PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python python i3d_tf_to_pt.py --rgb --flow`. + Note that the master version requires PyTorch 0.3 as it relies on the recent addition of ConstantPad3d that has been included in this latest release. If you want to use pytorch 0.2 checkout the branch pytorch-02 which contains a simplified model with even padding on all sides (and the corresponding pytorch weight checkpoints). diff --git a/i3d_pt_demo.py b/i3d_pt_demo.py index 74aa461..2b54678 100644 --- a/i3d_pt_demo.py +++ b/i3d_pt_demo.py @@ -9,10 +9,13 @@ def run_demo(args): + + device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + kinetics_classes = [x.strip() for x in open(args.classes_path)] def get_scores(sample, model): - sample_var = torch.autograd.Variable(torch.from_numpy(sample).cuda()) + sample_var = torch.autograd.Variable(torch.from_numpy(sample).to(device)) out_var, out_logit = model(sample_var) out_tensor = out_var.data.cpu() @@ -30,7 +33,7 @@ def get_scores(sample, model): i3d_rgb = I3D(num_classes=400, modality='rgb') i3d_rgb.eval() i3d_rgb.load_state_dict(torch.load(args.rgb_weights_path)) - i3d_rgb.cuda() + i3d_rgb.to(device) rgb_sample = np.load(args.rgb_sample_path).transpose(0, 4, 1, 2, 3) out_rgb_logit = get_scores(rgb_sample, i3d_rgb) @@ -40,7 +43,7 @@ def get_scores(sample, model): i3d_flow = I3D(num_classes=400, modality='flow') i3d_flow.eval() i3d_flow.load_state_dict(torch.load(args.flow_weights_path)) - i3d_flow.cuda() + i3d_flow.to(device) flow_sample = np.load(args.flow_sample_path).transpose(0, 4, 1, 2, 3) out_flow_logit = get_scores(flow_sample, i3d_flow) diff --git a/i3d_pt_profiling.py b/i3d_pt_profiling.py index 494a502..b413f0c 100644 --- a/i3d_pt_profiling.py +++ b/i3d_pt_profiling.py @@ -10,15 +10,17 @@ # Install using `pip install line_profiler` # Launch `kernprof -lv i3d_pt_profiling.py` +device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') @profile def run(model, dataloader, criterion, optimizer, frame_nb): + # Load data for i, (input_2d, target) in enumerate(dataloader): - optimizer.zero_grad + optimizer.zero_grad() # Prepare data for pytorch forward pass input_3d = input_2d.clone().unsqueeze(2).repeat(1, 1, frame_nb, 1, 1) - input_3d_var = torch.autograd.Variable(input_3d.cuda()) + input_3d_var = torch.autograd.Variable(input_3d.to(device)) # Pytorch forward pass out_pt, _ = model(input_3d_var) @@ -54,7 +56,7 @@ def run_profile(args): i3nception_pt.eval() i3nception_pt.load_state_dict(torch.load(args.rgb_weights_path)) i3nception_pt.train() - i3nception_pt.cuda() + i3nception_pt.to(device) l1_loss = torch.nn.L1Loss() sgd = torch.optim.SGD(i3nception_pt.parameters(), lr=0.001, momentum=0.9) diff --git a/model/model_flow.pth b/model/model_flow.pth index 5d552d2..7395976 100644 Binary files a/model/model_flow.pth and b/model/model_flow.pth differ diff --git a/model/model_rgb.pth b/model/model_rgb.pth index 2a7c22e..f3bb29d 100644 Binary files a/model/model_rgb.pth and b/model/model_rgb.pth differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5332a30 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,33 @@ +absl-py==2.1.0 +astor==0.8.1 +certifi==2022.12.7 +charset-normalizer==2.1.1 +dm-sonnet==1.13 +gast==0.2.2 +google-pasta==0.2.0 +grpcio==1.62.2 +h5py==2.10.0 +idna==3.4 +importlib-metadata==6.7.0 +Keras-Applications==1.0.8 +Keras-Preprocessing==1.1.2 +Markdown==3.4.4 +MarkupSafe==2.1.5 +numpy==1.18.5 +opt-einsum==3.3.0 +Pillow==9.3.0 +protobuf==4.24.4 +requests==2.28.1 +rope==0.18.0 +six==1.16.0 +tensorboard==1.15.0 +tensorflow==1.15.5 +tensorflow-estimator==1.15.1 +termcolor==2.3.0 +torch==1.13.1+cpu +torchvision==0.14.1+cpu +typing_extensions==4.7.1 +urllib3==1.26.13 +Werkzeug==2.2.3 +wrapt==1.16.0 +zipp==3.15.0 diff --git a/src/test_first_block.py b/src/test_first_block.py index c652f03..c397c02 100644 --- a/src/test_first_block.py +++ b/src/test_first_block.py @@ -6,6 +6,7 @@ from src import inflate def test_input_block(): + device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder('/sequoia/data1/yhasson/datasets/test-dataset', @@ -29,7 +30,7 @@ def test_input_block(): loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=False) frame_nb = 4 for i, (input_2d, target) in enumerate(loader): - target = target.cuda() + target = target.to(device) target_var = torch.autograd.Variable(target) input_2d_var = torch.autograd.Variable(input_2d) out2d = seq2d(input_2d_var)