Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

When using Caffe::set_mode(Caffe::GPU), the program doesn't work correctly? #55

Closed
lifeiteng opened this issue Jan 24, 2014 · 2 comments
Labels

Comments

@lifeiteng
Copy link

In order to get the probility output of softmax, I use the codes.
bash sh code:
GLOG_logtostderr=1 ../examples/TestLiFT.bin TestLiFT.prototxt lenet_iter_5000 10 "GPU" t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
It can work correctly if i use Caffe::set_mode(Caffe::CPU);
E0124 16:31:13.968103 8064 TestLiFT.cpp:114] result size = 1 result channel = 10
E0124 16:31:13.968116 8064 TestLiFT.cpp:117] label = 5
E0124 16:31:13.968125 8064 TestLiFT.cpp:119] 1.53695e-05
E0124 16:31:13.968137 8064 TestLiFT.cpp:119] 4.50447e-06
E0124 16:31:13.968147 8064 TestLiFT.cpp:119] 4.77117e-05
E0124 16:31:13.968157 8064 TestLiFT.cpp:119] 0.000178817
E0124 16:31:13.968166 8064 TestLiFT.cpp:119] 1.75801e-05
E0124 16:31:13.968176 8064 TestLiFT.cpp:119] 0.958509
E0124 16:31:13.968185 8064 TestLiFT.cpp:119] 0.00345705
E0124 16:31:13.968196 8064 TestLiFT.cpp:119] 2.1301e-06
E0124 16:31:13.968205 8064 TestLiFT.cpp:119] 0.0376869
E0124 16:31:13.968215 8064 TestLiFT.cpp:119] 8.0598e-05
E0124 16:31:13.968225 8064 TestLiFT.cpp:121] ---------------------
E0124 16:31:13.969791 8064 TestLiFT.cpp:114] result size = 1 result channel = 10
E0124 16:31:13.969805 8064 TestLiFT.cpp:117] label = 9
E0124 16:31:13.969813 8064 TestLiFT.cpp:119] 0.0042518
E0124 16:31:13.969825 8064 TestLiFT.cpp:119] 0.000264884
E0124 16:31:13.969835 8064 TestLiFT.cpp:119] 0.000825241
E0124 16:31:13.969844 8064 TestLiFT.cpp:119] 0.0278211
E0124 16:31:13.969854 8064 TestLiFT.cpp:119] 0.0487807
E0124 16:31:13.969863 8064 TestLiFT.cpp:119] 0.0230153
E0124 16:31:13.969873 8064 TestLiFT.cpp:119] 0.000117688
E0124 16:31:13.969883 8064 TestLiFT.cpp:119] 0.209391
E0124 16:31:13.969893 8064 TestLiFT.cpp:119] 0.150328
E0124 16:31:13.969902 8064 TestLiFT.cpp:119] 0.535204
E0124 16:31:13.969913 8064 TestLiFT.cpp:121] ---------------------

if not, the output of softmax will be the same for different image
E0124 16:32:27.603427 8087 TestLiFT.cpp:114] result size = 1 result channel = 10
E0124 16:32:27.603441 8087 TestLiFT.cpp:117] label = 5
E0124 16:32:27.604112 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604125 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604135 8087 TestLiFT.cpp:119] 1
E0124 16:32:27.604145 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604154 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604163 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604173 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604182 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604192 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604202 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.604212 8087 TestLiFT.cpp:121] ---------------------
E0124 16:32:27.604346 8087 TestLiFT.cpp:114] result size = 1 result channel = 10
E0124 16:32:27.604360 8087 TestLiFT.cpp:117] label = 9
E0124 16:32:27.605039 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605062 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605077 8087 TestLiFT.cpp:119] 1
E0124 16:32:27.605087 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605097 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605106 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605115 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605125 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605134 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605144 8087 TestLiFT.cpp:119] 0
E0124 16:32:27.605154 8087 TestLiFT.cpp:121] ---------------------

TestLiFT.cc
// Copyright 2013 Yangqing Jia
//
// This is a simple script that allows one to quickly test a network whose
// structure is specified by text format protocol buffers, and whose parameter
// are loaded from a pre-trained network.
// Usage:
// test_net net_proto pretrained_net_proto iterations [CPU/GPU]

include <cuda_runtime.h>

//#include
//#include

include

include "caffe/caffe.hpp"

include

include

include

using namespace caffe;

uint32_t swap_endian( uint32_t val )
{
val = ((val << 8) & 0xFF00FF00 ) | ((val >> 8) & 0xFF00FF );
return (val << 16) | (val >> 16);
}

int main(int argc, char** argv) {
if (argc < 7) {
LOG(ERROR) << "TestLiFT net_proto pretrained_net_proto iterations [CPU/GPU] image-data lable-data";
return 0;
}

cudaSetDevice(0);
Caffe::set_phase(Caffe::TEST);

if (argc == 7 && strcmp(argv[4], "GPU") == 0) {
    LOG(ERROR) << "Using GPU";
    Caffe::set_mode(Caffe::GPU);
} else {
    LOG(ERROR) << "Using CPU";
    Caffe::set_mode(Caffe::CPU);
}
//If don't set_mode(Caffe::CPU), the output will be the same. Why?
Caffe::set_mode(Caffe::CPU);// 
//Caffe::set_phase(Caffe::TEST); 

NetParameter test_net_param;
ReadProtoFromTextFile(argv[1], &test_net_param);
Net<float> caffe_test_net(test_net_param);
NetParameter trained_net_param;
ReadProtoFromBinaryFile(argv[2], &trained_net_param);
caffe_test_net.CopyTrainedLayersFrom(trained_net_param);

int total_iter = atoi(argv[3]);
LOG(ERROR) << "Running " << total_iter << "Iterations.";

// Open files
char* image_filename = argv[5];
char* label_filename = argv[6];

std::ifstream image_file(argv[5], std::ios::in | std::ios::binary);
std::ifstream label_file(argv[6], std::ios::in | std::ios::binary);
CHECK(image_file) << "Unable to open file " << image_filename;
CHECK(label_file) << "Unable to open file " << label_file;
// Read the magic and the meta data
uint32_t magic;
uint32_t num_items;
uint32_t num_labels;
uint32_t rows;
uint32_t cols;

image_file.read((char*)(&magic), 4);
magic = swap_endian(magic);
CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
label_file.read((char*)(&magic), 4);
magic = swap_endian(magic);
CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
image_file.read((char*)(&num_items), 4);
num_items = swap_endian(num_items);
label_file.read((char*)(&num_labels), 4);
num_labels = swap_endian(num_labels);
CHECK_EQ(num_items, num_labels);
image_file.read((char*)(&rows), 4);
rows = swap_endian(rows);
image_file.read((char*)(&cols), 4);
cols = swap_endian(cols);

char label;
float* pixels = new float[rows * cols]; 

LOG(INFO) << "A total of " << num_items << " items.";
LOG(INFO) << "Rows: " << rows << " Cols: " << cols;

uint8_t pixel;

for (int itemid = 0; itemid < total_iter; ++itemid) {
    // char -> float
    for (int i = 0; i < rows; ++i) {
        for (int j = 0; j < cols; ++j)
            {
                image_file.read((char*)&pixel, 1);  
                *(pixels+i*cols+j) = 0.00390625*static_cast<float>(pixel);
            }    
    }

    label_file.read(&label, 1);
    int label2 = (int)label;

    cudaMemcpy(caffe_test_net.input_blobs()[0]->mutable_gpu_data(), pixels, sizeof(float)*rows*cols, cudaMemcpyHostToDevice);
    //memcpy(caffe_test_net.input_blobs()[0]->mutable_cpu_data(), pixels, sizeof(float)*rows*cols);

    const vector<Blob<float>*>& result = caffe_test_net.ForwardPrefilled();

    LOG(ERROR) << "result size = " << result.size()
            << " result channel = " << result[0]->channels();
    Blob<float>* prob = result[0];
    LOG(ERROR) << "label = " << label2;
    for (int t = 0; t < prob->count(); ++t) {
        LOG(ERROR) << *(prob->cpu_data()+t) << " ";
    }
    LOG(ERROR) << "---------------------";
}

delete []pixels;

return 0;

}

@lifeiteng
Copy link
Author

TestLiFT.prototxt like this:

input: "data"
input_dim: 1
input_dim: 1
input_dim: 28
input_dim: 28
layers {
layer {
name: "conv1"
type: "conv"
num_output: 20
kernelsize: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
blobs_lr: 1.
blobs_lr: 2.
}
bottom: "data"
top: "conv1"
}
layers {
layer {
name: "pool1"
type: "pool"
kernelsize: 2
stride: 2
pool: MAX
}
bottom: "conv1"
top: "pool1"
}
layers {
layer {
name: "conv2"
type: "conv"
num_output: 50
kernelsize: 5
stride: 1
group: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
blobs_lr: 1.
blobs_lr: 2.
}
bottom: "pool1"
top: "conv2"
}
layers {
layer {
name: "pool2"
type: "pool"
kernelsize: 2
stride: 2
pool: MAX
}
bottom: "conv2"
top: "pool2"
}
layers {
layer {
name: "ip1"
type: "innerproduct"
num_output: 500
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
blobs_lr: 1.
blobs_lr: 2.
}
bottom: "pool2"
top: "ip1"
}
layers {
layer {
name: "relu1"
type: "relu"
}
bottom: "ip1"
top: "ip1"
}
layers {
layer {
name: "ip2"
type: "innerproduct"
num_output: 10
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
bottom: "ip1"
top: "ip2"
}
layers {
layer {
name: "prob"
type: "softmax"
}
bottom: "ip2"
top: "prob"
}

@shelhamer
Copy link
Member

Sorry, I can't replicate this and nothing else like it has been reported. The results are correct using the bundled train_net.bin and test_net.bin, so please check your code and config.

DVEfremov pushed a commit to DVEfremov/caffe that referenced this issue Feb 3, 2017


- behavior in else part (CPU verision) taken from BVLC.caffe master branch
DVEfremov pushed a commit to DVEfremov/caffe that referenced this issue Feb 3, 2017
naibaf7 added a commit that referenced this issue Feb 7, 2017
Compilation fails for caffe opencl branch in CPU_ONLY mode issue #55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants