Large diffs are not rendered by default.

@@ -1,6 +1,6 @@
classes=1000
train = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.valid.list
backup = /home/pjreddie/backup/
labels = data/imagenet.labels.list
names = data/imagenet.shortnames.list

Large diffs are not rendered by default.

@@ -1,4 +1,8 @@
[net]
# Training
# batch=64
# subdivisions=2
# Testing
batch=1
subdivisions=1
width=416
@@ -12,10 +16,11 @@ exposure = 1.5
hue=.1

learning_rate=0.001
max_batches = 120000
burn_in=1000
max_batches = 500200
policy=steps
steps=-1,100,80000,100000
scales=.1,10,.1,.1
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
@@ -104,7 +109,7 @@ batch_normalize=1
size=3
stride=1
pad=1
filters=1024
filters=512
activation=leaky

[convolutional]
@@ -115,14 +120,14 @@ filters=425
activation=linear

[region]
anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
bias_match=1
classes=80
coords=4
num=5
softmax=1
jitter=.2
rescore=1
rescore=0

object_scale=5
noobject_scale=1
@@ -53,12 +53,12 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
char **paths = (char **)list_to_array(plist);
printf("%d\n", plist->size);
int N = plist->size;
clock_t time;
double time;

load_args args = {0};
args.w = net.w;
args.h = net.h;
args.threads = 32;
args.threads = 64;
args.hierarchy = net.hierarchy;

args.min = net.min_crop;
@@ -85,14 +85,14 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,

int epoch = (*net.seen)/N;
while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
time=clock();
time = what_time_is_it_now();

pthread_join(load_thread, 0);
train = buffer;
load_thread = load_data(args);

printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
time = what_time_is_it_now();

float loss = 0;
#ifdef GPU
@@ -106,7 +106,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus,
#endif
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
printf("%ld, %.3f: %f, %f avg, %f rate, %lf seconds, %ld images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, *net.seen);
free_data(train);
if(*net.seen/N > epoch){
epoch = *net.seen/N;
@@ -670,7 +670,6 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
int *indexes = calloc(top, sizeof(int));
char buff[256];
char *input = buff;
int size = net.w;
while(1){
if(filename){
strncpy(input, filename, 256);
@@ -682,8 +681,8 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi
strtok(input, "\n");
}
image im = load_image_color(input, 0, 0);
image r = resize_min(im, size);
resize_network(&net, r.w, r.h);
image r = letterbox_image(im, net.w, net.h);
//resize_network(&net, r.w, r.h);
//printf("%d %d\n", r.w, r.h);

float *X = r.data;
@@ -342,7 +342,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, alphabet, 80);
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, 0, coco_classes, alphabet, 80);
save_image(im, "prediction");
show_image(im, "predictions");
free_image(im);
@@ -146,7 +146,7 @@ void oneoff(char *cfgfile, char *weightfile, char *outfile)
int c = net.layers[net.n - 2].c;
scal_cpu(oldn*c, .1, net.layers[net.n - 2].weights, 1);
scal_cpu(oldn, 0, net.layers[net.n - 2].biases, 1);
net.layers[net.n - 2].n = 9418;
net.layers[net.n - 2].n = 11921;
net.layers[net.n - 2].biases += 5;
net.layers[net.n - 2].weights += 5*c;
if(weightfile){
@@ -41,9 +41,8 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
//int N = plist->size;
char **paths = (char **)list_to_array(plist);

load_args args = {0};
args.w = net.w;
args.h = net.h;
load_args args = get_base_args(net);
args.coords = l.coords;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
@@ -52,13 +51,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
args.num_boxes = l.max_boxes;
args.d = &buffer;
args.type = DETECTION_DATA;
//args.type = INSTANCE_DATA;
args.threads = 8;

args.angle = net.angle;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;

pthread_t load_thread = load_data(args);
clock_t time;
int count = 0;
@@ -102,7 +97,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i
image im = float_to_image(net.w, net.h, 3, train.X.vals[zz]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[zz] + k*5);
box b = float_to_box(train.y.vals[zz] + k*5, 1);
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
draw_bbox(im, b, 1, 1,0,0);
}
@@ -130,7 +125,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i

i = get_current_batch(net);
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs);
if(i%1000==0){
if(i%100==0){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
@@ -342,7 +337,7 @@ void validate_detector_flip(char *datacfg, char *cfgfile, char *weightfile, char
network_predict(net, input.data);
int w = val[t].w;
int h = val[t].h;
get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, map, .5, 0);
get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@@ -473,7 +468,7 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out
network_predict(net, X);
int w = val[t].w;
int h = val[t].h;
get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, map, .5, 0);
get_region_boxes(l, w, h, net.w, net.h, thresh, probs, boxes, 0, 0, map, .5, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
if (coco){
print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
@@ -508,7 +503,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
srand(time(0));

list *plist = get_paths("data/voc.2007.test");
list *plist = get_paths("data/coco_val_5k.list");
char **paths = (char **)list_to_array(plist);

layer l = net.layers[net.n-1];
@@ -537,7 +532,7 @@ void validate_detector_recall(char *cfgfile, char *weightfile)
image sized = resize_image(orig, net.w, net.h);
char *id = basecfg(path);
network_predict(net, sized.data);
get_region_boxes(l, sized.w, sized.h, net.w, net.h, thresh, probs, boxes, 1, 0, .5, 1);
get_region_boxes(l, sized.w, sized.h, net.w, net.h, thresh, probs, boxes, 0, 1, 0, .5, 1);
if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);

char labelpath[4096];
@@ -684,11 +679,11 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
}
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
double time;
char buff[256];
char *input = buff;
int j;
float nms=.4;
float nms=.3;
while(1){
if(filename){
strncpy(input, filename, 256);
@@ -710,15 +705,20 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
float **masks = 0;
if (l.coords > 4){
masks = calloc(l.w*l.h*l.n, sizeof(float*));
for(j = 0; j < l.w*l.h*l.n; ++j) masks[j] = calloc(l.coords-4, sizeof(float *));
}

float *X = sized.data;
time=clock();
time=what_time_is_it_now();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_region_boxes(l, im.w, im.h, net.w, net.h, thresh, probs, boxes, 0, 0, hier_thresh, 1);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
get_region_boxes(l, im.w, im.h, net.w, net.h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1);
if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
//else if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, masks, names, alphabet, l.classes);
if(outfile){
save_image(im, outfile);
}
@@ -0,0 +1,25 @@
# Stupid python path shit.
# Instead just add darknet.py to somewhere in your python path
# OK actually that might not be a great idea, idk, work in progress
# Use at your own risk. or don't, i don't care

import sys, os
sys.path.append(os.path.join(os.getcwd(),'python/'))

import darknet as dn

net = dn.load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0)
meta = dn.load_meta("cfg/coco.data")
r = dn.detect(net, meta, "data/dog.jpg")
print r

# And then down here you could detect a lot more images like:
r = dn.detect(net, meta, "data/eagle.jpg")
print r
r = dn.detect(net, meta, "data/giraffe.jpg")
print r
r = dn.detect(net, meta, "data/horses.jpg")
print r
r = dn.detect(net, meta, "data/person.jpg")
print r

@@ -108,22 +108,6 @@ float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, si
return p;
}

void reset_rnn_state(network net, int b)
{
int i;
for (i = 0; i < net.n; ++i) {
#ifdef GPU
layer l = net.layers[i];
if(l.state_gpu){
fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1);
}
if(l.h_gpu){
fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1);
}
#endif
}
}

void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear, int tokenized)
{
srand(time(0));
@@ -194,7 +178,7 @@ void train_char_rnn(char *cfgfile, char *weightfile, char *filename, int clear,
if(rand()%64 == 0){
//fprintf(stderr, "Reset\n");
offsets[j] = rand_size_t()%size;
reset_rnn_state(net, j);
reset_network_state(net, j);
}
}

@@ -279,6 +263,54 @@ void test_char_rnn(char *cfgfile, char *weightfile, int num, char *seed, float t
printf("\n");
}

void test_tactic_rnn_multi(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file)
{
char **tokens = 0;
if(token_file){
size_t n;
tokens = read_tokens(token_file, &n);
}

srand(rseed);
char *base = basecfg(cfgfile);
fprintf(stderr, "%s\n", base);

network net = parse_network_cfg(cfgfile);
if(weightfile){
load_weights(&net, weightfile);
}
int inputs = net.inputs;

int i, j;
for(i = 0; i < net.n; ++i) net.layers[i].temperature = temp;
int c = 0;
float *input = calloc(inputs, sizeof(float));
float *out = 0;

while(1){
reset_network_state(net, 0);
while((c = getc(stdin)) != EOF && c != 0){
input[c] = 1;
out = network_predict(net, input);
input[c] = 0;
}
for(i = 0; i < num; ++i){
for(j = 0; j < inputs; ++j){
if (out[j] < .0001) out[j] = 0;
}
int next = sample_array(out, inputs);
if(c == '.' && next == '\n') break;
c = next;
print_symbol(c, tokens);

input[c] = 1;
out = network_predict(net, input);
input[c] = 0;
}
printf("\n");
}
}

void test_tactic_rnn(char *cfgfile, char *weightfile, int num, float temp, int rseed, char *token_file)
{
char **tokens = 0;
@@ -434,7 +466,7 @@ void vec_char_rnn(char *cfgfile, char *weightfile, char *seed)
int i;
char *line;
while((line=fgetl(stdin)) != 0){
reset_rnn_state(net, 0);
reset_network_state(net, 0);
for(i = 0; i < seed_len; ++i){
c = seed[i];
input[(int)c] = 1;
@@ -308,8 +308,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
//draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, 0, voc_names, alphabet, 20);
save_image(im, "predictions");
show_image(im, "predictions");

@@ -175,6 +175,7 @@ struct layer{
float coord_scale;
float object_scale;
float noobject_scale;
float mask_scale;
float class_scale;
int bias_match;
int random;
@@ -508,7 +509,7 @@ typedef struct{
} data;

typedef enum {
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA
CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA, TAG_DATA, OLD_CLASSIFICATION_DATA, STUDY_DATA, DET_DATA, SUPER_DATA, LETTERBOX_DATA, REGRESSION_DATA, SEGMENTATION_DATA, INSTANCE_DATA
} data_type;

typedef struct load_args{
@@ -530,6 +531,7 @@ typedef struct load_args{
int background;
int scale;
int center;
int coords;
float jitter;
float angle;
float aspect;
@@ -642,9 +644,10 @@ void save_weights_upto(network net, char *filename, int cutoff);
void load_weights_upto(network *net, char *filename, int start, int cutoff);

void zero_objectness(layer l);
void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int relative);
void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, float **masks, int only_objectness, int *map, float tree_thresh, int relative);
void free_network(network net);
void set_batch_network(network *net, int b);
void set_temp_network(network net, float t);
image load_image(char *filename, int w, int h, int c);
image load_image_color(char *filename, int w, int h);
image make_image(int w, int h, int c);
@@ -677,13 +680,15 @@ void random_distort_image(image im, float hue, float saturation, float exposure)
void fill_image(image m, float s);
image grayscale_image(image im);
void rotate_image_cw(image im, int times);
double what_time_is_it_now();
image rotate_image(image m, float rad);
void visualize_network(network net);
float box_iou(box a, box b);
void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
data load_all_cifar10();
box_label *read_boxes(char *filename, int *n);
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes);
box float_to_box(float *f, int stride);
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float **masks, char **names, image **alphabet, int classes);

matrix network_predict_data(network net, data test);
image **load_alphabet();
@@ -694,6 +699,12 @@ float *network_predict_p(network *net, float *input);
int network_width(network *net);
int network_height(network *net);
float *network_predict_image(network *net, image im);
void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs);
int num_boxes(network *net);
box *make_boxes(network *net);

void reset_network_state(network net, int b);
void reset_network_state(network net, int b);

char **get_labels(char *filename);
void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
@@ -709,6 +720,7 @@ image get_image_from_stream(CvCapture *cap);
void free_image(image m);
float train_network(network net, data d);
pthread_t load_data_in_thread(load_args args);
void load_data_blocking(load_args args);
list *get_paths(char *filename);
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves, int stride);
void change_leaves(tree *t, char *leaf_list);
@@ -1,4 +1,25 @@
from ctypes import *
import math
import random

def sample(probs):
s = sum(probs)
probs = [a/s for a in probs]
r = random.uniform(0, 1)
for i in range(len(probs)):
r = r - probs[i]
if r <= 0:
return i
return len(probs)-1

def c_array(ctype, values):
return (ctype * len(values))(*values)

class BOX(Structure):
_fields_ = [("x", c_float),
("y", c_float),
("w", c_float),
("h", c_float)]

class IMAGE(Structure):
_fields_ = [("w", c_int),
@@ -10,61 +31,97 @@ class METADATA(Structure):
_fields_ = [("classes", c_int),
("names", POINTER(c_char_p))]

lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
#lib = CDLL("/home/pjreddie/documents/darknet/libdarknet.so", RTLD_GLOBAL)
lib = CDLL("libdarknet.so", RTLD_GLOBAL)
lib.network_width.argtypes = [c_void_p]
lib.network_width.restype = c_int
lib.network_height.argtypes = [c_void_p]
lib.network_height.restype = c_int

def load_meta(f):
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA
return lib.get_metadata(f)

def load_net(cfg, weights):
load_network = lib.load_network_p
load_network.argtypes = [c_char_p, c_char_p, c_int]
load_network.restype = c_void_p
return load_network(cfg, weights, 0)

def load_img(f):
load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE
return load_image(f, 0, 0)

def letterbox_img(im, w, h):
letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE
return letterbox_image(im, w, h)

def predict(net, im):
pred = lib.network_predict_image
pred.argtypes = [c_void_p, IMAGE]
pred.restype = POINTER(c_float)
return pred(net, im)
predict = lib.network_predict_p
predict.argtypes = [c_void_p, POINTER(c_float)]
predict.restype = POINTER(c_float)

make_boxes = lib.make_boxes
make_boxes.argtypes = [c_void_p]
make_boxes.restype = POINTER(BOX)

free_ptrs = lib.free_ptrs
free_ptrs.argtypes = [POINTER(c_void_p), c_int]

num_boxes = lib.num_boxes
num_boxes.argtypes = [c_void_p]
num_boxes.restype = c_int

make_probs = lib.make_probs
make_probs.argtypes = [c_void_p]
make_probs.restype = POINTER(POINTER(c_float))

detect = lib.network_predict_p
detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))]

reset_rnn = lib.reset_rnn
reset_rnn.argtypes = [c_void_p]

load_net = lib.load_network_p
load_net.argtypes = [c_char_p, c_char_p, c_int]
load_net.restype = c_void_p

free_image = lib.free_image
free_image.argtypes = [IMAGE]

letterbox_image = lib.letterbox_image
letterbox_image.argtypes = [IMAGE, c_int, c_int]
letterbox_image.restype = IMAGE

load_meta = lib.get_metadata
lib.get_metadata.argtypes = [c_char_p]
lib.get_metadata.restype = METADATA

load_image = lib.load_image_color
load_image.argtypes = [c_char_p, c_int, c_int]
load_image.restype = IMAGE

predict_image = lib.network_predict_image
predict_image.argtypes = [c_void_p, IMAGE]
predict_image.restype = POINTER(c_float)

network_detect = lib.network_detect
network_detect.argtypes = [c_void_p, IMAGE, c_float, c_float, c_float, POINTER(BOX), POINTER(POINTER(c_float))]

def classify(net, meta, im):
out = predict(net, im)
out = predict_image(net, im)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
res = sorted(res, key=lambda x: -x[1])
return res

def detect(net, meta, im):
out = predict(net, im)
def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
im = load_image(image, 0, 0)
boxes = make_boxes(net)
probs = make_probs(net)
num = num_boxes(net)
network_detect(net, im, thresh, hier_thresh, nms, boxes, probs)
res = []
for i in range(meta.classes):
res.append((meta.names[i], out[i]))
for j in range(num):
for i in range(meta.classes):
if probs[j][i] > 0:
res.append((meta.names[i], probs[j][i], (boxes[j].x, boxes[j].y, boxes[j].w, boxes[j].h)))
res = sorted(res, key=lambda x: -x[1])
free_image(im)
free_ptrs(cast(probs, POINTER(c_void_p)), num)
return res

if __name__ == "__main__":
net = load_net("cfg/densenet.cfg", "/home/pjreddie/trained/densenet201.weights")
im = load_img("data/wolf.jpg")
meta = load_meta("cfg/imagenet1k.data")
r = classify(net, meta, im)
print r[:10]
#net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
#im = load_image("data/wolf.jpg", 0, 0)
#meta = load_meta("cfg/imagenet1k.data")
#r = classify(net, meta, im)
#print r[:10]
net = load_net("cfg/tiny-yolo.cfg", "tiny-yolo.weights", 0)
meta = load_meta("cfg/coco.data")
r = detect(net, meta, "data/dog.jpg")
print r


@@ -0,0 +1,37 @@
from darknet import *

def predict_tactic(net, s):
prob = 0
d = c_array(c_float, [0.0]*256)
tac = ''
if not len(s):
s = '\n'
for c in s[:-1]:
d[ord(c)] = 1
pred = predict(net, d)
d[ord(c)] = 0
c = s[-1]
while 1:
d[ord(c)] = 1
pred = predict(net, d)
d[ord(c)] = 0
pred = [pred[i] for i in range(256)]
ind = sample(pred)
c = chr(ind)
prob += math.log(pred[ind])
if len(tac) and tac[-1] == '.':
break
tac = tac + c
return (tac, prob)

def predict_tactics(net, s, n):
tacs = []
for i in range(n):
reset_rnn(net)
tacs.append(predict_tactic(net, s))
tacs = sorted(tacs, key=lambda x: -x[1])
return tacs

net = load_net("cfg/coq.test.cfg", "/home/pjreddie/backup/coq.backup", 0)
t = predict_tactics(net, "+++++\n", 10)
print t
@@ -698,9 +698,6 @@ extern "C" void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int
int minw = (w1 < w2) ? w1 : w2;
int minh = (h1 < h2) ? h1 : h2;
int minc = (c1 < c2) ? c1 : c2;
assert(w1 == w2);
assert(h1 == h2);
assert(c1 == c2);

int stride = w1/w2;
int sample = w2/w1;
@@ -892,19 +889,21 @@ __global__ void softmax_tree_kernel(float *input, int spatial, int batch, int st

extern "C" void softmax_tree(float *input, int spatial, int batch, int stride, float temp, float *output, tree hier)
{
//int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
//int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
int *tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
int *tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
/*
static int *tree_groups_size = 0;
static int *tree_groups_offset = 0;
if(!tree_groups_size){
tree_groups_size = cuda_make_int_array(hier.group_size, hier.groups);
tree_groups_offset = cuda_make_int_array(hier.group_offset, hier.groups);
}
*/
int num = spatial*batch*hier.groups;
softmax_tree_kernel<<<cuda_gridsize(num), BLOCK>>>(input, spatial, batch, stride, temp, output, hier.groups, tree_groups_size, tree_groups_offset);
check_error(cudaPeekAtLastError());
//cuda_free((float *)tree_groups_size);
//cuda_free((float *)tree_groups_offset);
cuda_free((float *)tree_groups_size);
cuda_free((float *)tree_groups_offset);
}

__global__ void softmax_kernel(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
@@ -6,7 +6,6 @@ typedef struct{
float dx, dy, dw, dh;
} dbox;

box float_to_box(float *f, int stride);
float box_rmse(box a, box b);
dbox diou(box a, box b);
box decode_box(box b, box anchor);
@@ -130,7 +130,11 @@ void cudnn_convolutional_setup(layer *l)
cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
cudnnSetTensor4dDescriptor(l->normTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1);
cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
#if CUDNN_MAJOR >= 6
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT);
#else
cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
#endif
cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
l->srcTensorDesc,
l->weightDesc,
@@ -295,6 +295,112 @@ void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int
free(boxes);
}

void load_rle(image im, int *rle, int n)
{
int count = 0;
int curr = 0;
int i,j;
for(i = 0; i < n; ++i){
for(j = 0; j < rle[i]; ++j){
im.data[count++] = curr;
}
curr = 1 - curr;
}
for(; count < im.h*im.w*im.c; ++count){
im.data[count] = curr;
}
}

void or_image(image src, image dest, int c)
{
int i;
for(i = 0; i < src.w*src.h; ++i){
if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1;
}
}

void exclusive_image(image src)
{
int k, j, i;
int s = src.w*src.h;
for(k = 0; k < src.c-1; ++k){
for(i = 0; i < s; ++i){
if (src.data[k*s + i]){
for(j = k+1; j < src.c; ++j){
src.data[j*s + i] = 0;
}
}
}
}
}

box bound_image(image im)
{
int x,y;
int minx = im.w;
int miny = im.h;
int maxx = 0;
int maxy = 0;
for(y = 0; y < im.h; ++y){
for(x = 0; x < im.w; ++x){
if(im.data[y*im.w + x]){
minx = (x < minx) ? x : minx;
miny = (y < miny) ? y : miny;
maxx = (x > maxx) ? x : maxx;
maxy = (y > maxy) ? y : maxy;
}
}
}
box b = {minx, miny, maxx-minx + 1, maxy-miny + 1};
//printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
return b;
}

void fill_truth_iseg(char *path, int num_boxes, float *truth, int classes, int w, int h, augment_args aug, int flip, int mw, int mh)
{
char labelpath[4096];
find_replace(path, "images", "mask", labelpath);
find_replace(labelpath, "JPEGImages", "mask", labelpath);
find_replace(labelpath, ".jpg", ".txt", labelpath);
find_replace(labelpath, ".JPG", ".txt", labelpath);
find_replace(labelpath, ".JPEG", ".txt", labelpath);
FILE *file = fopen(labelpath, "r");
if(!file) file_error(labelpath);
char buff[32788];
int id;
int i = 0;
image part = make_image(w, h, 1);
while((fscanf(file, "%d %s", &id, buff) == 2) && i < num_boxes){
int n = 0;
int *rle = read_intlist(buff, &n, 0);
load_rle(part, rle, n);
image sized = rotate_crop_image(part, aug.rad, aug.scale, aug.w, aug.h, aug.dx, aug.dy, aug.aspect);
if(flip) flip_image(sized);
box b = bound_image(sized);
if(b.w > 0){
image crop = crop_image(sized, b.x, b.y, b.w, b.h);
image mask = resize_image(crop, mw, mh);
truth[i*(4 + mw*mh + 1) + 0] = (b.x + b.w/2.)/sized.w;
truth[i*(4 + mw*mh + 1) + 1] = (b.y + b.h/2.)/sized.h;
truth[i*(4 + mw*mh + 1) + 2] = b.w/sized.w;
truth[i*(4 + mw*mh + 1) + 3] = b.h/sized.h;
int j;
for(j = 0; j < mw*mh; ++j){
truth[i*(4 + mw*mh + 1) + 4 + j] = mask.data[j];
}
truth[i*(4 + mw*mh + 1) + 4 + mw*mh] = id;
free_image(crop);
free_image(mask);
++i;
}
free_image(sized);
free(rle);
}
fclose(file);
free_image(part);
}


void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
{
char labelpath[4096];
@@ -443,7 +549,7 @@ matrix load_regression_labels_paths(char **paths, int n)
find_replace(labelpath, "JPEGImages", "targets", labelpath);
find_replace(labelpath, ".jpg", ".txt", labelpath);
find_replace(labelpath, ".png", ".txt", labelpath);

FILE *file = fopen(labelpath, "r");
fscanf(file, "%f", &(y.vals[i][0]));
fclose(file);
@@ -511,45 +617,6 @@ void free_data(data d)
}
}

void load_rle(image im, int *rle, int n)
{
int count = 0;
int curr = 0;
int i,j;
for(i = 0; i < n; ++i){
for(j = 0; j < rle[i]; ++j){
im.data[count++] = curr;
}
curr = 1 - curr;
}
for(; count < im.h*im.w*im.c; ++count){
im.data[count] = curr;
}
}

void or_image(image src, image dest, int c)
{
int i;
for(i = 0; i < src.w*src.h; ++i){
if(src.data[i]) dest.data[dest.w*dest.h*c + i] = 1;
}
}

void exclusive_image(image src)
{
int k, j, i;
int s = src.w*src.h;
for(k = 0; k < src.c-1; ++k){
for(i = 0; i < s; ++i){
if (src.data[k*s + i]){
for(j = k+1; j < src.c; ++j){
src.data[j*s + i] = 0;
}
}
}
}
}

image get_segmentation_image(char *path, int w, int h, int classes)
{
char labelpath[4096];
@@ -659,7 +726,7 @@ data load_data_seg(int n, char **paths, int m, int w, int h, int classes, int mi
return d;
}

data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int min, int max, float angle, float aspect, float hue, float saturation, float exposure, int div)
data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int boxes, int coords, int min, int max, float angle, float aspect, float hue, float saturation, float exposure)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
@@ -670,32 +737,22 @@ data load_data_iseg(int n, char **paths, int m, int w, int h, int classes, int m
d.X.vals = calloc(d.X.rows, sizeof(float*));
d.X.cols = h*w*3;


d.y.rows = n;
d.y.cols = h*w*classes/div/div;
d.y.vals = calloc(d.X.rows, sizeof(float*));
d.y = make_matrix(n, (coords+1)*boxes);

for(i = 0; i < n; ++i){
image orig = load_image_color(random_paths[i], 0, 0);
augment_args a = random_augment_args(orig, angle, aspect, min, max, w, h);
a.dx = 0;
a.dy = 0;
image sized = rotate_crop_image(orig, a.rad, a.scale, a.w, a.h, a.dx, a.dy, a.aspect);

int flip = rand()%2;
if(flip) flip_image(sized);
random_distort_image(sized, hue, saturation, exposure);
d.X.vals[i] = sized.data;
//show_image(sized, "image");

image mask = get_segmentation_image(random_paths[i], orig.w, orig.h, classes);
//image mask = make_image(orig.w, orig.h, classes+1);
image sized_m = rotate_crop_image(mask, a.rad, a.scale/div, a.w/div, a.h/div, a.dx, a.dy, a.aspect);

if(flip) flip_image(sized_m);
d.y.vals[i] = sized_m.data;
fill_truth_iseg(random_paths[i], boxes, d.y.vals[i], classes, orig.w, orig.h, a, flip, 14, 14);

free_image(orig);
free_image(mask);

/*
image rgb = mask_to_rgb(sized_m, classes);
@@ -950,6 +1007,8 @@ void *load_thread(void *ptr)
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == INSTANCE_DATA){
*a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SEGMENTATION_DATA){
*a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale);
} else if (a.type == REGION_DATA){
@@ -20,6 +20,10 @@ static int demo_classes;
static float **probs;
static box *boxes;
static network net;
static network net2;
static float **probs2;
static box *boxes2;
static float **predictions2;
static image buff [3];
static image buff_letter[3];
static int buff_index = 0;
@@ -30,14 +34,11 @@ static float demo_thresh = 0;
static float demo_hier = .5;
static int running = 0;

static int demo_delay = 0;
static int demo_frame = 3;
static int demo_detections = 0;
static float **predictions;
static int demo_index = 0;
static int demo_done = 0;
static float *last_avg2;
static float *last_avg;
static float *avg;
double demo_time;

@@ -61,12 +62,11 @@ void *detect_in_thread(void *ptr)

memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
mean_arrays(predictions, demo_frame, l.outputs, avg);
l.output = last_avg2;
if(demo_delay == 0) l.output = avg;
l.output = avg;
if(l.type == DETECTION){
get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
} else if (l.type == REGION){
get_region_boxes(l, buff[0].w, buff[0].h, net.w, net.h, demo_thresh, probs, boxes, 0, 0, demo_hier, 1);
get_region_boxes(l, buff[0].w, buff[0].h, net.w, net.h, demo_thresh, probs, boxes, 0, 0, 0, demo_hier, 1);
} else {
error("Last layer must produce detections\n");
}
@@ -77,7 +77,7 @@ void *detect_in_thread(void *ptr)
printf("\nFPS:%.1f\n",fps);
printf("Objects:\n\n");
image display = buff[(buff_index+2) % 3];
draw_detections(display, demo_detections, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
draw_detections(display, demo_detections, demo_thresh, boxes, probs, 0, demo_names, demo_alphabet, demo_classes);

demo_index = (demo_index + 1)%demo_frame;
running = 0;
@@ -97,12 +97,7 @@ void *display_in_thread(void *ptr)
show_image_cv(buff[(buff_index + 1)%3], "Demo", ipl);
int c = cvWaitKey(1);
if (c != -1) c = c%256;
if (c == 10){
if(demo_delay == 0) demo_delay = 60;
else if(demo_delay == 5) demo_delay = 0;
else if(demo_delay == 60) demo_delay = 5;
else demo_delay = 0;
} else if (c == 27) {
if (c == 27) {
demo_done = 1;
return 0;
} else if (c == 82) {
@@ -135,7 +130,6 @@ void *detect_loop(void *ptr)

void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen)
{
demo_delay = delay;
demo_frame = avg_frames;
predictions = calloc(demo_frame, sizeof(float*));
image **alphabet = load_alphabet();
@@ -179,8 +173,6 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
int j;

avg = (float *) calloc(l.outputs, sizeof(float));
last_avg = (float *) calloc(l.outputs, sizeof(float));
last_avg2 = (float *) calloc(l.outputs, sizeof(float));
for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));

boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
@@ -213,14 +205,96 @@ void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const ch
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
if(!prefix){
if(count % (demo_delay+1) == 0){
fps = 1./(get_wall_time() - demo_time);
demo_time = get_wall_time();
float *swap = last_avg;
last_avg = last_avg2;
last_avg2 = swap;
memcpy(last_avg, avg, l.outputs*sizeof(float));
}
fps = 1./(get_wall_time() - demo_time);
demo_time = get_wall_time();
display_in_thread(0);
}else{
char name[256];
sprintf(name, "%s_%08d", prefix, count);
save_image(buff[(buff_index + 1)%3], name);
}
pthread_join(fetch_thread, 0);
pthread_join(detect_thread, 0);
++count;
}
}

void demo_compare(char *cfg1, char *weight1, char *cfg2, char *weight2, float thresh, int cam_index, const char *filename, char **names, int classes, int delay, char *prefix, int avg_frames, float hier, int w, int h, int frames, int fullscreen)
{
demo_frame = avg_frames;
predictions = calloc(demo_frame, sizeof(float*));
image **alphabet = load_alphabet();
demo_names = names;
demo_alphabet = alphabet;
demo_classes = classes;
demo_thresh = thresh;
demo_hier = hier;
printf("Demo\n");
net = load_network(cfg1, weight1, 0);
set_batch_network(&net, 1);
pthread_t detect_thread;
pthread_t fetch_thread;

srand(2222222);

if(filename){
printf("video file: %s\n", filename);
cap = cvCaptureFromFile(filename);
}else{
cap = cvCaptureFromCAM(cam_index);

if(w){
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, w);
}
if(h){
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, h);
}
if(frames){
cvSetCaptureProperty(cap, CV_CAP_PROP_FPS, frames);
}
}

if(!cap) error("Couldn't connect to webcam.\n");

layer l = net.layers[net.n-1];
demo_detections = l.n*l.w*l.h;
int j;

avg = (float *) calloc(l.outputs, sizeof(float));
for(j = 0; j < demo_frame; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));

boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes+1, sizeof(float));

buff[0] = get_image_from_stream(cap);
buff[1] = copy_image(buff[0]);
buff[2] = copy_image(buff[0]);
buff_letter[0] = letterbox_image(buff[0], net.w, net.h);
buff_letter[1] = letterbox_image(buff[0], net.w, net.h);
buff_letter[2] = letterbox_image(buff[0], net.w, net.h);
ipl = cvCreateImage(cvSize(buff[0].w,buff[0].h), IPL_DEPTH_8U, buff[0].c);

int count = 0;
if(!prefix){
cvNamedWindow("Demo", CV_WINDOW_NORMAL);
if(fullscreen){
cvSetWindowProperty("Demo", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
} else {
cvMoveWindow("Demo", 0, 0);
cvResizeWindow("Demo", 1352, 1013);
}
}

demo_time = get_wall_time();

while(!demo_done){
buff_index = (buff_index + 1) %3;
if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");
if(!prefix){
fps = 1./(get_wall_time() - demo_time);
demo_time = get_wall_time();
display_in_thread(0);
}else{
char name[256];
@@ -77,6 +77,7 @@ void gemm_nn(int M, int N, int K, float ALPHA,
float *C, int ldc)
{
int i,j,k;
#pragma omp parallel for
for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[i*lda+k];
@@ -93,6 +94,7 @@ void gemm_nt(int M, int N, int K, float ALPHA,
float *C, int ldc)
{
int i,j,k;
#pragma omp parallel for
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
register float sum = 0;
@@ -110,6 +112,7 @@ void gemm_tn(int M, int N, int K, float ALPHA,
float *C, int ldc)
{
int i,j,k;
#pragma omp parallel for
for(i = 0; i < M; ++i){
for(k = 0; k < K; ++k){
register float A_PART = ALPHA*A[k*lda+i];
@@ -126,6 +129,7 @@ void gemm_tt(int M, int N, int K, float ALPHA,
float *C, int ldc)
{
int i,j,k;
#pragma omp parallel for
for(i = 0; i < M; ++i){
for(j = 0; j < N; ++j){
register float sum = 0;
@@ -190,15 +190,14 @@ image **load_alphabet()
return alphabets;
}

void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes)
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, float **masks, char **names, image **alphabet, int classes)
{
int i;

for(i = 0; i < num; ++i){
int class = max_index(probs[i], classes);
float prob = probs[i][class];
if(prob > thresh){

int width = im.h * .006;

if(0){
@@ -237,6 +236,15 @@ void draw_detections(image im, int num, float thresh, box *boxes, float **probs,
draw_label(im, top + width, left, label, rgb);
free_image(label);
}
if (masks){
image mask = float_to_image(14, 14, 1, masks[i]);
image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h);
image tmask = threshold_image(resized_mask, .5);
embed_image(tmask, im, left, top);
free_image(mask);
free_image(resized_mask);
free_image(tmask);
}
}
}
}
@@ -933,8 +941,8 @@ augment_args random_augment_args(image im, float angle, float aspect, int low, i

float dx = (im.w*scale/aspect - w) / 2.;
float dy = (im.h*scale - w) / 2.;
if(dx < 0) dx = 0;
if(dy < 0) dy = 0;
//if(dx < 0) dx = 0;
//if(dy < 0) dy = 0;
dx = rand_uniform(-dx, dx);
dy = rand_uniform(-dy, dy);

@@ -1419,10 +1427,13 @@ float get_pixel(image m, int x, int y, int c)
}
float get_pixel_extend(image m, int x, int y, int c)
{
if(x < 0 || x >= m.w || y < 0 || y >= m.h) return 0;
/*
if(x < 0) x = 0;
if(x >= m.w) x = m.w-1;
if(y < 0) y = 0;
if(y >= m.h) y = m.h-1;
*/
if(c < 0 || c >= m.c) return 0;
return get_pixel(m, x, y, c);
}
@@ -82,6 +82,27 @@ void reset_momentum(network net)
#endif
}

void reset_network_state(network net, int b)
{
int i;
for (i = 0; i < net.n; ++i) {
#ifdef GPU
layer l = net.layers[i];
if(l.state_gpu){
fill_gpu(l.outputs, 0, l.state_gpu + l.outputs*b, 1);
}
if(l.h_gpu){
fill_gpu(l.outputs, 0, l.h_gpu + l.outputs*b, 1);
}
#endif
}
}

void reset_rnn(network *net)
{
reset_network_state(*net, 0);
}

float get_current_rate(network net)
{
size_t batch_num = get_current_batch(net);
@@ -173,7 +194,7 @@ network make_network(int n)
network net = {0};
net.n = n;
net.layers = calloc(net.n, sizeof(layer));
net.seen = calloc(1, sizeof(int));
net.seen = calloc(1, sizeof(size_t));
net.t = calloc(1, sizeof(int));
net.cost = calloc(1, sizeof(float));
return net;
@@ -302,6 +323,15 @@ float train_network(network net, data d)
return (float)sum/(n*batch);
}

void set_temp_network(network net, float t)
{
int i;
for(i = 0; i < net.n; ++i){
net.layers[i].temperature = t;
}
}


void set_batch_network(network *net, int b)
{
net->batch = b;
@@ -464,6 +494,38 @@ float *network_predict(network net, float *input)
return net.output;
}

int num_boxes(network *net)
{
layer l = net->layers[net->n-1];
return l.w*l.h*l.n;
}

box *make_boxes(network *net)
{
layer l = net->layers[net->n-1];
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
return boxes;
}

float **make_probs(network *net)
{
int j;
layer l = net->layers[net->n-1];
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
return probs;
}

void network_detect(network *net, image im, float thresh, float hier_thresh, float nms, box *boxes, float **probs)
{
network_predict_image(net, im);
layer l = net->layers[net->n-1];
if(l.type == REGION){
get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, 0, 0, 0, hier_thresh, 0);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
}
}

float *network_predict_p(network *net, float *input)
{
return network_predict(*net, input);
@@ -296,6 +296,7 @@ layer parse_region(list *options, size_params params)
l.coord_scale = option_find_float(options, "coord_scale", 1);
l.object_scale = option_find_float(options, "object_scale", 1);
l.noobject_scale = option_find_float(options, "noobject_scale", 1);
l.mask_scale = option_find_float(options, "mask_scale", 1);
l.class_scale = option_find_float(options, "class_scale", 1);
l.bias_match = option_find_int_quiet(options, "bias_match",0);

@@ -1061,6 +1062,17 @@ void load_convolutional_weights(layer l, FILE *fp)
fill_cpu(l.n, 0, l.rolling_mean, 1);
fill_cpu(l.n, 0, l.rolling_variance, 1);
}
if(0){
int i;
for(i = 0; i < l.n; ++i){
printf("%g, ", l.rolling_mean[i]);
}
printf("\n");
for(i = 0; i < l.n; ++i){
printf("%g, ", l.rolling_variance[i]);
}
printf("\n");
}
}
fread(l.weights, sizeof(float), num, fp);
//if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
@@ -100,6 +100,15 @@ float delta_region_box(box truth, float *x, float *biases, int n, int index, int
return iou;
}

void delta_region_mask(float *truth, float *x, int n, int index, float *delta, int stride, int scale)
{
int i;
for(i = 0; i < n; ++i){
delta[index + i*stride] = scale*(truth[i] - x[index + i*stride]);
}
}


void delta_region_class(float *output, float *delta, int index, int class, int classes, tree *hier, float scale, int stride, float *avg_cat)
{
int i, n;
@@ -206,6 +215,7 @@ void forward_region_layer(const layer l, network net)
delta_region_class(l.output, l.delta, class_index, class, l.classes, l.softmax_tree, l.class_scale, l.w*l.h, &avg_cat);
if(l.output[obj_index] < .3) l.delta[obj_index] = l.object_scale * (.3 - l.output[obj_index]);
else l.delta[obj_index] = 0;
l.delta[obj_index] = 0;
++class_count;
onlyclass = 1;
break;
@@ -279,6 +289,10 @@ void forward_region_layer(const layer l, network net)

int box_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 0);
float iou = delta_region_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, l.delta, l.coord_scale * (2 - truth.w*truth.h), l.w*l.h);
if(l.coords > 4){
int mask_index = entry_index(l, b, best_n*l.w*l.h + j*l.w + i, 4);
delta_region_mask(net.truth + t*(l.coords + 1) + b*l.truths + 5, l.output, l.coords - 4, mask_index, l.delta, l.w*l.h, l.mask_scale);
}
if(iou > .5) recall += 1;
avg_iou += iou;

@@ -347,7 +361,7 @@ void correct_region_boxes(box *boxes, int n, int w, int h, int netw, int neth, i
}
}

void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, int only_objectness, int *map, float tree_thresh, int relative)
void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, float **probs, box *boxes, float **masks, int only_objectness, int *map, float tree_thresh, int relative)
{
int i,j,n,z;
float *predictions = l.output;
@@ -382,10 +396,16 @@ void get_region_boxes(layer l, int w, int h, int netw, int neth, float thresh, f
for(j = 0; j < l.classes; ++j){
probs[index][j] = 0;
}
int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords);
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords);
int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4);
float scale = l.background ? 1 : predictions[obj_index];
boxes[index] = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
if(masks){
for(j = 0; j < l.coords - 4; ++j){
masks[index][j] = l.output[mask_index + j*l.w*l.h];
}
}

int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background);
if(l.softmax_tree){
@@ -440,11 +460,18 @@ void forward_region_layer_gpu(const layer l, network net)
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
activate_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
if(l.coords > 4){
index = entry_index(l, b, n*l.w*l.h, 4);
activate_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC);
}
index = entry_index(l, b, n*l.w*l.h, l.coords);
if(!l.background) activate_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC);
}
}
if (l.softmax_tree){
int index = entry_index(l, 0, 0, l.coords + 1);
softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
/*
int mmin = 9000;
int mmax = 0;
int i;
@@ -453,59 +480,58 @@ void forward_region_layer_gpu(const layer l, network net)
if (group_size < mmin) mmin = group_size;
if (group_size > mmax) mmax = group_size;
}
printf("%d %d %d \n", l.softmax_tree->groups, mmin, mmax);
int index = entry_index(l, 0, 0, l.coords + 1);
softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
//printf("%d %d %d \n", l.softmax_tree->groups, mmin, mmax);
*/
/*
// TIMING CODE
int zz;
int number = 1000;
int count = 0;
int i;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
count += group_size;
int group_size = l.softmax_tree->group_size[i];
count += group_size;
}
printf("%d %d\n", l.softmax_tree->groups, count);
{
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int index = entry_index(l, 0, 0, 5);
softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
}
cudaDeviceSynchronize();
printf("Good GPU Timing: %f\n", what_time_is_it_now() - then);
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int index = entry_index(l, 0, 0, 5);
softmax_tree(net.input_gpu + index, l.w*l.h, l.batch*l.n, l.inputs/l.n, 1, l.output_gpu + index, *l.softmax_tree);
}
cudaDeviceSynchronize();
printf("Good GPU Timing: %f\n", what_time_is_it_now() - then);
}
{
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
int index = entry_index(l, 0, 0, count);
softmax_gpu(net.input_gpu + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
count += group_size;
}
}
cudaDeviceSynchronize();
printf("Bad GPU Timing: %f\n", what_time_is_it_now() - then);
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
int index = entry_index(l, 0, 0, count);
softmax_gpu(net.input_gpu + index, group_size, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output_gpu + index);
count += group_size;
}
}
cudaDeviceSynchronize();
printf("Bad GPU Timing: %f\n", what_time_is_it_now() - then);
}
{
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
count += group_size;
}
}
cudaDeviceSynchronize();
printf("CPU Timing: %f\n", what_time_is_it_now() - then);
double then = what_time_is_it_now();
for(zz = 0; zz < number; ++zz){
int i;
int count = 5;
for (i = 0; i < l.softmax_tree->groups; ++i) {
int group_size = l.softmax_tree->group_size[i];
softmax_cpu(net.input + count, group_size, l.batch, l.inputs, l.n*l.w*l.h, 1, l.n*l.w*l.h, l.temperature, l.output + count);
count += group_size;
}
*/
}
cudaDeviceSynchronize();
printf("CPU Timing: %f\n", what_time_is_it_now() - then);
}
*/
/*
int i;
int count = 5;
@@ -526,12 +552,6 @@ void forward_region_layer_gpu(const layer l, network net)
return;
}

float *truth_cpu = 0;
if(net.truth_gpu){
int num_truth = l.batch*l.truths;
truth_cpu = calloc(num_truth, sizeof(float));
cuda_pull_array(net.truth_gpu, truth_cpu, num_truth);
}
cuda_pull_array(l.output_gpu, net.input, l.batch*l.inputs);
forward_region_layer(l, net);
//cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
@@ -546,6 +566,10 @@ void backward_region_layer_gpu(const layer l, network net)
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
gradient_array_gpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC, l.delta_gpu + index);
if(l.coords > 4){
index = entry_index(l, b, n*l.w*l.h, 4);
gradient_array_gpu(l.output_gpu + index, (l.coords - 4)*l.w*l.h, LOGISTIC, l.delta_gpu + index);
}
index = entry_index(l, b, n*l.w*l.h, l.coords);
if(!l.background) gradient_array_gpu(l.output_gpu + index, l.w*l.h, LOGISTIC, l.delta_gpu + index);
}