## 1.Reliability Fusion Map (RFM)

<br>

1. `matrix` is global variable from csv file <br>
    `f1, f2, f3...` is CNN confidence vector; <br>
    `p` = CNN predict label; <br>
    `y` = label of patch; <br>
    `texture_quality` = patch texture quality factor, see formula(2) in paper
    <hr>
    
2. Fusion step: `filter_texture()`->`fushion_map()`->`fusion_denisty()`; <br>
    `filter_texture()`: filter low texture patch predictions of CNN, we set threshold_texture=0;<br>
    `fushion_map()`: fusion step1, see formula(3) in paper; <br>
    `fusion_denisty()`: fusion step2, see formula(4), (5) in paper; <br>
    <hr>

3. Output matrix structure is the same as input matrix <br>
    <hr>


## 2. Step-by-Step Clustering
   Clustering step: `k-means` -> `filter_denisty` -> `filter_texture`
   <hr>
    

## 3.About

This localization for step by step clustering algorithm proposed in [11] (see experiment 2-2 in paper).

<br>


## 4.Reference
[11] L. Bondi, S. Lameri, D. Gu ̈era, P. Bestagini, E. J. Delp, and S. Tubaro, “Tampering detection and localization through clus- tering of camera-based cnn features,” in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops, 2017, pp. 1855–1864.


<br>

> $matrix=[f1, f2, f3, f4, f5... p, y, texture_quality]

In [349]:
import os, math, seaborn as sns, numpy as np, pandas as pd, matplotlib.pyplot as plt, matplotlib.image as mpimg
from collections import Counter
sns.set()

# matrix=[f1, f2, f3, f4, f5, p, y, texture_quality]

In [325]:
root_path = "/data/experiment"
scope_name = "pre-train_64x64" # The input csv is 64x64 non-overlap patches

csv_root = os.path.join(root_path, scope_name)
ground_root = os.path.join(root_path, "ground")
splice_root = os.path.join(root_path, "splicing")

In [276]:
def kmeans(matrix, theta, label, w, h):
    '''
    k-mean clustering (see paper [11], formula (2), (3), (4))
    @data=[f1, f2, f3, f4, f5, p, y, quality]
    @theta∈[0.0, 1.0]
    '''
    # calculate index: same=(M=0), diff=(M!=0)
    same_index = (matrix[:, 5] == label)
    diff_index = (matrix[:, 5] != label)
    
    # f-mean confidence
    f_mean = 1.0 * np.sum(matrix[same_index, 0: 5], axis=0) / np.sum(same_index)
    
    # d(i, j) see paper [11], formula (3)
    tmp_data = matrix[:, 0:5].copy()
    tmp_data[same_index] = 0
    (tmp1, tmp2) = (tmp_data, tmp_data.copy())
    
    # tmp1=molecule; tmp2=denominator
    tmp1[diff_index] -= f_mean
    tmp2[diff_index] += f_mean
    tmp1 = np.sum(np.abs(tmp1), axis=1)
    tmp2 = np.sum(np.abs(tmp2), axis=1)
    tmp2[tmp2 == 0] = 1.0
    d = tmp1 / tmp2
    
    # calculate placeholder
    flag = (d < theta)
    matrix[flag, 5] = label
    matrix[flag, :5] = 0
    matrix[flag, label] = 1
    return matrix

def filter_denisty(matrix, label, w, h):    
    '''
    filter_denisty method in paper [11] Sec. 3.3 step4
    @matrix=[f1, f2, f3, f4, f5, p, y, texture_quality]
    '''
    flag = np.where(matrix[:, 5] != label)
    matrix = np.reshape(matrix, (h, w, -1))
    source_matrix = matrix.copy()
    offset = [
        [0, 1], [1, 0], [1, 1]
    ]
    for index in list(flag[0]):
        x = int(index % w)
        y = int(index / w)
        
        score, cnt = 0, 0
        for p in offset:
            _x = x + p[1]
            _y = y + p[0]
            
            # 检测是否在边界，如果不是边界要计算
            if (_x >= 0) and (_y >= 0) and (_x < w) and (_y < h):
                cnt += 1
                # 预测为篡改，得分+1
                if source_matrix[_y][_x][5] != label:
                    score += 1

        # update confidence & label
        if score != cnt:
            matrix[y, x, :5] = 0
            matrix[y, x, label] = 1
            matrix[y, x, 5] = label
    return np.reshape(matrix, (w*h, -1))

def filter_texture(matrix, label, threshold=0.4):
    '''
    Filter low texture patches, (see paper [11], formula (5))
    @matrix=[f1, f2, f3, f4, f5, p, y, texture_quality]
    '''
    # get placeholder
    flag = (matrix[:, 7] < threshold)
    matrix[flag, :5] = 0
    matrix[flag, label] = 1
    return matrix

def local_detect_region(data, label, w, h):
    '''
    Calculate forged region coordinates
    @return mark_x, mark_y
    @param mark_x=[min_x, max_x], mark_y=[min_y, max_y]
    '''
    data = np.reshape(data, (h, w, -1))
    mark_x, mark_y = [w-1, 0], [h-1, 0]
    flag = False
    
    for j in range(h):
        for i in range(w):
            if data[j, i, 6] != label:
                flag = True
                mark_x[0] = min(mark_x[0], i)
                mark_x[1] = max(i, mark_x[1])
                mark_y[0] = min(mark_y[0], j)
                mark_y[1] = max(j, mark_y[1])
    if not flag:
        mark_x, mark_y = (0, w-1), (0, h-1)
    return mark_x, mark_y

def local_accuracy(data, label, mark_x, mark_y, w, h):
    '''
    Calculate local accuracy
    '''
    area = (mark_x[1] - mark_x[0] + 1) * (mark_y[1] - mark_y[0] + 1)
    tmp_data = np.reshape(data, (h, w, -1))[mark_y[0]: mark_y[1], mark_x[0]: mark_x[1], 5:7].copy()
    tmp_data = np.reshape(tmp_data, (-1, 2))

    same_count = np.sum(np.equal(tmp_data[:, 0], tmp_data[:, 1]))
    return same_count, area

def show_map(matrix, w, h, label, threshold=0):
    '''
    show hotmap
    @matrix=[f1, f2, f3, f4, f5, p, y]
    '''
    label_list = np.ones([w*h], dtype=np.int32) * label
    predict_list = matrix[0: w*h, 5]
    
    diff_matrix = np.array(np.abs(label_list - predict_list), dtype=np.bool) * 1
    conf_matrix = np.max(matrix[:, 0:5], axis=1)
    
    list_matrix = np.multiply(conf_matrix, diff_matrix)
    list_matrix[list_matrix < threshold] = 0
    
    plt.clf()
    f, ax = plt.subplots(figsize=(16, 10))
    xmap = np.reshape(list_matrix, (h, w))
    return sns.heatmap(xmap, linewidths=.01, cmap='YlGnBu')

def show_binary(matrix, w, h):
    '''
    show binary hotmap (mask)
    @matrix=[f1, f2, f3, f4, f5, p, y]
    '''
    matrix = np.reshape(matrix, (h, w, -1))
    plt.clf()
    f, ax = plt.subplots(figsize=(16, 10))
    xmap = np.reshape(matrix[:, :, 6], (h, w)).astype(np.int32)
    return sns.heatmap(xmap, linewidths=.01, cmap='YlGnBu')

In [346]:
# Local & global accuracy of localization

def get_label(name):
    name_label = {
        "Agfa": 0,
        "Kodak": 1,
        "Nikon": 2,
        "Rollei": 3,
        "Sony": 4
    }
    name_list = name.split("_")
    return (name_label[name_list[0]], name_label[name_list[4]])    

cnt, score, threshold_1, threshold_2 = 0, 0, 0.7, 0.0
result = np.zeros([len(os.listdir(csv_root)), 4])

for name in os.listdir(csv_root):
    print(cnt + 1, name)
    
    (_w, _h) = (int(int(name.split("_")[-2])), int(int(name.split("_")[-1].split(".")[-2])))
    (w, h) = int(_w / 64), int(_h / 64)
    
    # @raw_data=[f1, f2, f3, f4, f5, p, y, texture]
    raw_data = pd.read_csv(csv_root + "/" + name).values[:w*h, 1:9]
    
    # get label
    label1, label2 = get_label(name)
    
    # f-L1 clustering1
    f_data = kmeans(raw_data, threshold_1, label2, w, h)
    # texture clustering2
    t_data = filter_texture(f_data, label2, threshold_2)
    # denisty clustering3
    d_data = filter_denisty(t_data, label2, w, h)
    
    d_data[d_data[:, 5] != label2] = label1
    
    # final map
    show_binary(raw_data, w, h)
    
    full_count = np.sum(np.equal(d_data[:, 5], t_data[:, 6]))
    full_area = w*h
    
    mark_x, mark_y = local_detect_region(d_data, label2, w, h)
    (local_count, local_area) = local_accuracy(d_data, label2, mark_x, mark_y, w, h)
    
    result[cnt, :] = np.array(list([local_count, float(local_count/local_area), full_count, float(full_count/full_area)]))
    print("=> cnt={:d} result={:.4f}, {:.4f}".format(cnt, result[cnt, 1], result[cnt, 3]))
    
    cnt += 1

1 Sony_DSC-H50_0_47035_Agfa_Sensor505-x_0_1674_1540036585_2592_1944.csv
=> cnt=0 result=0.5556, 0.9850
2 Agfa_Sensor505-x_0_1633_Kodak_M1063_0_9407_1540013622_3664_2748.csv
=> cnt=1 result=0.5714, 0.9962
3 Kodak_M1063_0_9457_Rollei_RCP-7325XS_0_42268_1540044934_3072_2304.csv
=> cnt=2 result=0.8799, 0.9890
4 Kodak_M1063_0_9583_Rollei_RCP-7325XS_0_42309_1540045731_3072_2304.csv
=> cnt=3 result=0.7394, 0.9826
5 Kodak_M1063_0_9367_Rollei_RCP-7325XS_0_42204_1540044982_3072_2304.csv
=> cnt=4 result=0.8000, 0.9977
6 Sony_DSC-H50_0_47009_Rollei_RCP-7325XS_0_42216_1540044405_3072_2304.csv
=> cnt=5 result=0.7971, 0.9838
7 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42306_1540022925_3072_2304.csv
=> cnt=6 result=0.7143, 0.9728
8 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42219_1540022859_3072_2304.csv
=> cnt=7 result=0.1845, 0.9242
9 Rollei_RCP-7325XS_0_42267_Sony_DSC-H50_0_47004_1540033882_3456_2592.csv
=> cnt=8 result=0.9426, 0.9958
10 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42194_1540023187_3072_2304.cs

=> cnt=107 result=0.6481, 0.9962
109 Kodak_M1063_0_9457_Rollei_RCP-7325XS_0_42218_1540045930_3072_2304.csv
=> cnt=108 result=0.8571, 0.9913
110 Rollei_RCP-7325XS_0_42252_Sony_DSC-H50_0_47004_1540028207_3456_2592.csv
=> cnt=109 result=0.8133, 0.9944
111 Kodak_M1063_0_9406_Nikon_D200_0_15008_1540018510_3872_2592.csv
=> cnt=110 result=0.6667, 0.9992
112 Nikon_D200_0_15026_Kodak_M1063_0_9470_1540040429_3664_2748.csv
=> cnt=111 result=0.8586, 0.9908
113 Nikon_D200_0_15137_Rollei_RCP-7325XS_0_42203_1540023220_3072_2304.csv
=> cnt=112 result=0.8529, 0.9919
114 Sony_DSC-H50_0_46912_Agfa_Sensor505-x_0_1620_1540036933_2592_1944.csv
=> cnt=113 result=0.7444, 0.9817
115 Agfa_Sensor505-x_0_1796_Kodak_M1063_0_9407_1540014147_3664_2748.csv
=> cnt=114 result=0.4167, 0.9975
116 Agfa_Sensor505-x_0_1695_Kodak_M1063_0_9385_1540014092_2748_3664.csv
=> cnt=115 result=0.5000, 1.0000
117 Sony_DSC-H50_0_46953_Agfa_Sensor505-x_0_1788_1540036890_2592_1944.csv
=> cnt=116 result=0.8571, 0.9975
118 Sony_DSC-H50_0_4

=> cnt=215 result=0.7443, 0.9821
217 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42222_1540022516_3072_2304.csv
=> cnt=216 result=0.0128, 0.9161
218 Kodak_M1063_0_9457_Nikon_D200_0_14984_1540106210_3872_2592.csv
=> cnt=217 result=0.8615, 0.9933
219 Sony_DSC-H50_0_47032_Agfa_Sensor505-x_0_1681_1540035625_2592_1944.csv
=> cnt=218 result=0.8170, 0.9850
220 Agfa_Sensor505-x_0_1667_Kodak_M1063_0_9409_1540013657_3664_2748.csv
=> cnt=219 result=0.5758, 0.9946
221 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42220_1540022267_3072_2304.csv
=> cnt=220 result=0.2615, 0.9508
222 Nikon_D200_0_15135_Rollei_RCP-7325XS_0_42362_1540022396_3072_2304.csv
=> cnt=221 result=0.7143, 0.9797
223 Kodak_M1063_0_9583_Rollei_RCP-7325XS_0_42319_1540045760_3072_2304.csv
=> cnt=222 result=0.8333, 0.9936
224 Rollei_RCP-7325XS_0_42337_Sony_DSC-H50_0_47012_1540034567_3456_2592.csv
=> cnt=223 result=0.9348, 0.9894
225 Nikon_D200_0_15026_Kodak_M1063_0_9531_1540040642_3664_2748.csv
=> cnt=224 result=0.8221, 0.9916
226 Nikon_D200_0_1

=> cnt=324 result=0.8541, 0.9889
326 Kodak_M1063_0_9557_Rollei_RCP-7325XS_0_42307_1540046132_3072_2304.csv
=> cnt=325 result=0.8091, 0.9792
327 Sony_DSC-H50_0_46951_Rollei_RCP-7325XS_0_42207_1540044278_3072_2304.csv
=> cnt=326 result=0.6875, 0.9884
328 Kodak_M1063_0_9579_Nikon_D200_0_14902_1540107165_3872_2592.csv
=> cnt=327 result=0.8262, 0.9867
329 Kodak_M1063_0_9457_Rollei_RCP-7325XS_0_42216_1540045419_3072_2304.csv
=> cnt=328 result=0.8611, 0.9907
330 Nikon_D200_0_15026_Rollei_RCP-7325XS_0_42236_1540022532_3072_2304.csv
=> cnt=329 result=0.1648, 0.9184
331 Nikon_D200_0_14908_Kodak_M1063_0_9457_1540040775_2748_3664.csv
=> cnt=330 result=0.7888, 0.9829
332 Kodak_M1063_0_9583_Rollei_RCP-7325XS_0_42322_1540046173_3072_2304.csv
=> cnt=331 result=0.8182, 0.9907
333 Sony_DSC-H50_0_47002_Agfa_Sensor505-x_0_1627_1540035024_2592_1944.csv
=> cnt=332 result=0.7097, 0.9967
334 Rollei_RCP-7325XS_0_42250_Sony_DSC-H50_0_46972_1540023710_3456_2592.csv
=> cnt=333 result=0.9356, 0.9787
335 Agfa_Senso

In [347]:
np.sum(result[:, 1]) / 400

0.733743428382813

In [348]:
np.sum(result[:, 3]) / 400

0.9829811912593985