# Model Performance Statistics
This example notebook shows statistics, including box plots, mean and variance, for model performance. You can run an experimnet multiple times and use this notebook to analyze the metrics results distribution.

## Specify Model Performance
You can train models multiple times (say 10 times) using the same Synthetic data. And copy model performances down below. For each metric, you can copy metric value into a list (Each column represents metric value for one run).
1. In the baseline performance session, copy the baseline model performance.
2. In the new performance session, copy the model you want to compare.

## Baseline and New Model Statistics

In [73]:
import pandas as pd

indices = {0: "mAP V1", 1: "mAP V2", 2: "mAP50 V1", 3: "mAP50 V2", 4: "mAR V1", 5: "mAR V2"}
def collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new):
    performance = pd.DataFrame()
    mAP_base = pd.Series(mean_ap_base)
    mAP50_base = pd.Series(mean_ap_50_base)
    mAR_base = pd.Series(mean_ar_base)
    mAP_new = pd.Series(mean_ap_new)
    mAP50_new = pd.Series(mean_ap_50_new)
    mAR_new = pd.Series(mean_ar_new)
    performance = performance.append(mAP_base.describe()[1:3],ignore_index=True)
    performance = performance.append(mAP_new.describe()[1:3],ignore_index=True)
    performance = performance.append(mAP50_base.describe()[1:3],ignore_index=True)
    performance = performance.append(mAP50_new.describe()[1:3],ignore_index=True)
    performance = performance.append(mAR_base.describe()[1:3],ignore_index=True)
    performance = performance.append(mAR_new.describe()[1:3],ignore_index=True)
    performance = performance.rename(index=indices)
    return performance

def single_collection(mean_ap_base, mean_ap_50_base, mean_ar_base):
    performance = pd.DataFrame()
    mAP_base = pd.Series(mean_ap_base)
    mAP50_base = pd.Series(mean_ap_50_base)
    mAR_base = pd.Series(mean_ar_base)
    performance = performance.append(mAP_base.describe()[1:3],ignore_index=True)
    performance = performance.append(mAP50_base.describe()[1:3],ignore_index=True)
    performance = performance.append(mAR_base.describe()[1:3],ignore_index=True)
    performance = performance.rename(index={0:"mAP", 1:"mAP@50", 2:"mAR"})
    return performance

<IPython.core.display.Javascript object>

In [2]:
# P value
# A p-value is the probability that the results from your sample data occurred by chance. 
# P-values are from 0 to 1. Low p-values are good; They indicate your data did not occur by chance. 
# In most cases, a p-value of 0.05 is accepted to mean the data is valid.
from scipy import stats

def p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new):
    t2, p_value = stats.ttest_ind(mean_ap_base, mean_ap_new)
    print("mAP:")
    print(f"p_value = {p_value: .4f}")
    t2, p_value = stats.ttest_ind(mean_ap_50_base, mean_ap_50_new)
    print("mAP@IOU50:")
    print(f"p_value = {p_value: .4f}")
    t2, p_value = stats.ttest_ind(mean_ar_base, mean_ar_new)
    print("mAR:")
    print(f"p_value = {p_value: .4f}")

In [61]:
import plotly.graph_objects as go

def performance_box_plot(title=None, data=None, y_range=[0, 1.0]):
    fig = go.Figure(layout=go.Layout(title=go.layout.Title(text=title)))
    fig.update_yaxes(range=y_range)
    for i, model in enumerate(data["model"].unique()):
        df_plot = data[data["model"] == model]
        fig.add_trace(go.Box(x=df_plot["metric"], y=df_plot["performance"],
                            name=model))
    fig.update_layout(boxmode='group', xaxis_tickangle=0, xaxis_title="Evaluation metrics", yaxis_title="Performance value",font=dict(
        size=16,
    ))
    return fig

def single_performance_box_plot(title=None, data=None, y_range=[0, 1.0]):
    fig = go.Figure(layout=go.Layout(title=go.layout.Title(text=title)))
    fig.update_yaxes(range=y_range)
    for i, model in enumerate(data["model"].unique()):
        df_plot = data[data["model"] == model]
        fig.add_trace(go.Box(y=df_plot["performance"], name=model))
    fig.update_layout(boxmode='group', xaxis_tickangle=0, xaxis_title="Evaluation metrics", yaxis_title="Performance value",font=dict(
        size=16,
    ))
    fig.update_layout(showlegend=False)
    return fig


def performance_plot(title,mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0, 0.5]):
    names = list(indices.values())
    fig = go.Figure(layout=go.Layout(title=go.layout.Title(text=title)))
    fig.update_yaxes(range=y_range)
    fig.add_trace(go.Box(y=mean_ap_base, name=names[0], marker_color = 'indianred'))
    fig.add_trace(go.Box(y=mean_ap_new, name=names[1], marker_color = 'lightseagreen'))
    fig.add_trace(go.Box(y=mean_ap_50_base, name=names[2], marker_color = 'indianred'))
    fig.add_trace(go.Box(y=mean_ap_50_new, name=names[3], marker_color = 'lightseagreen'))
    fig.add_trace(go.Box(y=mean_ar_base, name=names[4], marker_color = 'indianred'))
    fig.add_trace(go.Box(y=mean_ar_new, name=names[5], marker_color = 'lightseagreen'))
    return fig

def single_performance_plot(title, mean_ap, mean_ap_50, mean_ar, y_range=[0.5, 1.0]):
    fig = go.Figure(layout=go.Layout(title=go.layout.Title(text=title)))
    fig.update_yaxes(range=y_range)
    fig.add_trace(go.Box(y=mean_ap, name="mAP"))
    fig.add_trace(go.Box(y=mean_ap_50, name="mAP@IOU50"))
    fig.add_trace(go.Box(y=mean_ar, name="mAR"))
    return fig

In [56]:
def improve(perf):
    i1 = (perf["mean"][1] - perf["mean"][0]) / perf["mean"][0]
    i2 = (perf["mean"][3] - perf["mean"][2]) / perf["mean"][2]
    i3 = (perf["mean"][5] - perf["mean"][4]) / perf["mean"][4]
    print(f"mAP is improved by {i1 * 100:.2f}%")
    print(f"mAP@IOU50 is improved by {i2 * 100:.2f}%")
    print(f"mAR is improved by {i3 * 100:.2f}%")

In [47]:
columns = ["performance", "model", "metric"]
def add_data(data=None, model="", mean_ap=[], mean_ap_50=[], mean_ar=[]):
    for perf in mean_ap:
        data.loc[len(data)] = [perf, model, "mAP"]
    for perf in mean_ap_50:
        data.loc[len(data)] = [perf, model, "mAP@IOU50"]
    for perf in mean_ar:
        data.loc[len(data)] = [perf, model, "mAR"]

In [48]:
data = pd.DataFrame(columns=columns)
# 1. real-trained model
mean_ap_real = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50_real = [0.748,0.726,0.717,0.694,0.711]
mean_ar_real = [0.586,0.580,0.569,0.548,0.566]
add_data(data=data, model="real-trained (760 real images)", mean_ap=mean_ap_real, mean_ap_50=mean_ap_50_real, mean_ar=mean_ar_real)
# 2. synth-trained model
mean_ap_synth = [0.3759, 0.3693, 0.3877, 0.4009, 0.3715]
mean_ap_50_synth = [0.5367, 0.5156, 0.5433, 0.5667, 0.5262]
mean_ar_synth = [0.4756, 0.4827, 0.5079, 0.5006, 0.4699]
add_data(data=data, model="synth-trained (400K synth images)", mean_ap=mean_ap_synth, mean_ap_50=mean_ap_50_synth, mean_ar=mean_ar_synth)
# 3. fine-tuned model on synth + 10% data
mean_ap_fine_tune1 = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_fine_tune1 = [0.703,0.702,0.696,0.709,0.717]
mean_ar_fine_tune1 = [0.641,0.631,0.631,0.638,0.638]
add_data(data=data, model="fine-tuned (synth + 76 real images)", mean_ap=mean_ap_fine_tune1, mean_ap_50=mean_ap_50_fine_tune1, mean_ar=mean_ar_fine_tune1)
# 4. fine-tuned model on synth + 50% data
mean_ap_fine_tune2 = [0.6394, 0.6424, 0.6417, 0.6503, 0.6463]
mean_ap_50_fine_tune2 = [0.8110, 0.8123, 0.8098, 0.8208, 0.8187]
mean_ar_fine_tune2 = [0.7318, 0.7270, 0.7355, 0.7371, 0.7299]
add_data(data=data, model="fine-tuned (synth + 380 real images)", mean_ap=mean_ap_fine_tune2, mean_ap_50=mean_ap_50_fine_tune2, mean_ar=mean_ar_fine_tune2)
# 5. fine-tuned model on synth + 100% data
mean_ap_fine_tune3 = [0.6839, 0.6762, 0.6802, 0.6900, 0.6877]
mean_ap_50_fine_tune3 = [0.8547, 0.8451, 0.8507, 0.8611, 0.8595]
mean_ar_fine_tune3 = [0.7597, 0.7481, 0.7575, 0.7627, 0.7564]
add_data(data=data, model="fine-tuned (synth + 760 real images)", mean_ap=mean_ap_fine_tune3, mean_ap_50=mean_ap_50_fine_tune3, mean_ar=mean_ar_fine_tune3)

In [57]:
performance_box_plot(title="Model Performance Comparison", data=data, y_range=[0.3, 0.9])

In [66]:
mean_ap_data = pd.DataFrame(columns=columns)
add_data(data=mean_ap_data, model="real-trained<br>(760 real images)", mean_ap=mean_ap_real)
add_data(data=mean_ap_data, model="synth-trained<br>(400K synth images)", mean_ap=mean_ap_synth)
add_data(data=mean_ap_data, model="fine-tuned<br>(synth + 76<br>real images)", mean_ap=mean_ap_fine_tune1)
add_data(data=mean_ap_data, model="fine-tuned<br>(synth + 380<br>real images)", mean_ap=mean_ap_fine_tune2)
add_data(data=mean_ap_data, model="fine-tuned<br>(synth + 760<br>real images)", mean_ap=mean_ap_fine_tune3)
single_performance_box_plot(title="Model Performance Comparison", data=mean_ap_data, y_range=[0.3, 0.9])

## Real-trained model vs fine-tuned model on 10%

In [8]:
mean_ap_base = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50_base = [0.748,0.726,0.717,0.694,0.711]
mean_ar_base = [0.586,0.580,0.569,0.548,0.566]

mean_ap_new = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_new = [0.703,0.702,0.696,0.709,0.717]
mean_ar_new = [0.641,0.631,0.631,0.638,0.638]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.44962,0.019831
mAP V2,0.5282,0.005975
mAP50 V1,0.7192,0.019892
mAP50 V2,0.7054,0.007956
mAR V1,0.5698,0.014636
mAR V2,0.6358,0.00455


In [9]:
fig = performance_plot(f"Real-trained model vs fine-tuned model on 10%", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 0.9])
fig.show()

In [10]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.1877
mAR:
p_value =  0.0000


In [11]:
improve(performance)

mAP is improved by 17.48%
mAP@IOU50 is improved by -1.92%
mAR is improved by 11.58%


## Real-trained model vs fine-tuned model on 50%

In [12]:
mean_ap_base = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50_base = [0.748,0.726,0.717,0.694,0.711]
mean_ar_base = [0.586,0.580,0.569,0.548,0.566]

mean_ap_new = [0.6394, 0.6424, 0.6417, 0.6503, 0.6463]
mean_ap_50_new = [0.8110, 0.8123, 0.8098, 0.8208, 0.8187]
mean_ar_new = [0.7318, 0.7270, 0.7355, 0.7371, 0.7299]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.44962,0.019831
mAP V2,0.64402,0.004301
mAP50 V1,0.7192,0.019892
mAP50 V2,0.81452,0.004912
mAR V1,0.5698,0.014636
mAR V2,0.73226,0.004104


In [13]:
fig = performance_plot(f"Real-trained model vs fine-tuned model on 50%", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 0.9])
fig.show()

In [14]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.0000
mAR:
p_value =  0.0000


In [15]:
improve(performance)

mAP is improved by 43.24%
mAP@IOU50 is improved by 13.25%
mAR is improved by 28.51%


## Real-trained model vs fine-tuned model on 100%

In [16]:
mean_ap_base = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50_base = [0.748,0.726,0.717,0.694,0.711]
mean_ar_base = [0.586,0.580,0.569,0.548,0.566]

mean_ap_new = [0.6839, 0.6762, 0.6802, 0.6900, 0.6877]
mean_ap_50_new = [0.8547, 0.8451, 0.8507, 0.8611, 0.8595]
mean_ar_new = [0.7597, 0.7481, 0.7575, 0.7627, 0.7564]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.44962,0.019831
mAP V2,0.6836,0.005572
mAP50 V1,0.7192,0.019892
mAP50 V2,0.85422,0.006539
mAR V1,0.5698,0.014636
mAR V2,0.75688,0.005466


In [17]:
fig = performance_plot(f"Real-trained model vs fine-tuned model on 10%", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 0.9])
fig.show()

In [18]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.0000
mAR:
p_value =  0.0000


In [19]:
improve(performance)

mAP is improved by 52.04%
mAP@IOU50 is improved by 18.77%
mAR is improved by 32.83%


## fine-tuned model on 10% vs fine-tuned model on 50%

In [20]:
# 3. fine-tuned model on synth + 10% data
mean_ap_base = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_base = [0.703,0.702,0.696,0.709,0.717]
mean_ar_base = [0.641,0.631,0.631,0.638,0.638]

# 4. fine-tuned model on synth + 50% data
mean_ap_new = [0.6394, 0.6424, 0.6417, 0.6503, 0.6463]
mean_ap_50_new = [0.8110, 0.8123, 0.8098, 0.8208, 0.8187]
mean_ar_new = [0.7318, 0.7270, 0.7355, 0.7371, 0.7299]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.5282,0.005975
mAP V2,0.64402,0.004301
mAP50 V1,0.7054,0.007956
mAP50 V2,0.81452,0.004912
mAR V1,0.6358,0.00455
mAR V2,0.73226,0.004104


In [21]:
fig = performance_plot(f"Fine-tuned model on 10% vs 50%", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 0.9])
fig.show()

In [22]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.0000
mAR:
p_value =  0.0000


In [23]:
improve(performance)

mAP is improved by 21.93%
mAP@IOU50 is improved by 15.47%
mAR is improved by 15.17%


## Fine-tuned model on 10% vs 100%

In [24]:
# 3. fine-tuned model on synth + 10% data
mean_ap_base = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_base = [0.703,0.702,0.696,0.709,0.717]
mean_ar_base = [0.641,0.631,0.631,0.638,0.638]

# 5. fine-tuned model on synth + 100% data
mean_ap_new = [0.6839, 0.6762, 0.6802, 0.6900, 0.6877]
mean_ap_50_new = [0.8547, 0.8451, 0.8507, 0.8611, 0.8595]
mean_ar_new = [0.7597, 0.7481, 0.7575, 0.7627, 0.7564]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.5282,0.005975
mAP V2,0.6836,0.005572
mAP50 V1,0.7054,0.007956
mAP50 V2,0.85422,0.006539
mAR V1,0.6358,0.00455
mAR V2,0.75688,0.005466


In [25]:
fig = performance_plot(f"Fine-tuned model on 10% vs 100%", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 0.9])
fig.show()

In [26]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.0000
mAR:
p_value =  0.0000


In [27]:
improve(performance)

mAP is improved by 29.42%
mAP@IOU50 is improved by 21.10%
mAR is improved by 19.04%


# Appendix

### Synth-trained model on different size of data

In [75]:
data = pd.DataFrame(columns=columns)
# 1. 40K synth-trained model
mean_ap_small = [0.290,0.336,0.288,0.310,0.332]
mean_ap_50_small = [0.471,0.519,0.472,0.498,0.529]
mean_ar_small = [0.414,0.452,0.404,0.441,0.455]
add_data(data, "synth-trained (40K synth images)", mean_ap_small, mean_ap_50_small, mean_ar_small)
# 100K synth-trained model
mean_ap_synth = [0.360,0.360,0.362,0.364,0.372]
mean_ap_50_synth = [0.543,0.541,0.530,0.532,0.545]
mean_ar_synth = [0.476,0.476,0.471,0.480,0.480]
add_data(data, "synth-trained (100K synth images)", mean_ap_synth, mean_ap_50_synth, mean_ar_synth)

# 400K synth-trained model
mean_ap_synth = [0.3759, 0.3693, 0.3877, 0.4009, 0.3715]
mean_ap_50_synth = [0.5367, 0.5156, 0.5433, 0.5667, 0.5262]
mean_ar_synth = [0.4756, 0.4827, 0.5079, 0.5006, 0.4699]
add_data(data, "synth-trained (400K synth images)", mean_ap_synth, mean_ap_50_synth, mean_ar_synth)
performance_box_plot("Model Performance Comparison", data, y_range=[0.2, 0.6])

### Fine-tuned model on different size of synth data + 76 real data

In [78]:
data = pd.DataFrame(columns=columns)
# 1. 40K synth +  model
mean_ap = [0.483,0.468,0.464,0.464,0.467]
mean_ap_50 = [0.702,0.684,0.685,0.680,0.688]
mean_ar = [0.600,0.591,0.583,0.589,0.592]
add_data(data, "fine-tuned (40K synth + 76 real images)", mean_ap, mean_ap_50, mean_ar)

# 3. fine-tuned model on 400K synth + 76 data
mean_ap_fine_tune1 = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_fine_tune1 = [0.703,0.702,0.696,0.709,0.717]
mean_ar_fine_tune1 = [0.641,0.631,0.631,0.638,0.638]
add_data(data=data, model="fine-tuned (400K synth + 76 real images)", mean_ap=mean_ap_fine_tune1, mean_ap_50=mean_ap_50_fine_tune1, mean_ar=mean_ar_fine_tune1)
performance_box_plot("Model Performance Comparison", data, y_range=[0.3, 0.9])

### Fine-tuned model on different size of synth data + 380 real data

In [80]:
data = pd.DataFrame(columns=columns)
# 1. 40K synth + model
mean_ap = [0.568,0.552,0.565,0.568,0.543]
mean_ap_50 = [0.787,0.780,0.791,0.781,0.766]
mean_ar = [0.670,0.657,0.666,0.670,0.659]
add_data(data, "fine-tuned (40K synth + 380 real images)", mean_ap, mean_ap_50, mean_ar)

# 4. fine-tuned model on synth + 50% data
mean_ap_fine_tune2 = [0.6394, 0.6424, 0.6417, 0.6503, 0.6463]
mean_ap_50_fine_tune2 = [0.8110, 0.8123, 0.8098, 0.8208, 0.8187]
mean_ar_fine_tune2 = [0.7318, 0.7270, 0.7355, 0.7371, 0.7299]
add_data(data=data, model="fine-tuned (synth + 380 real images)", mean_ap=mean_ap_fine_tune2, mean_ap_50=mean_ap_50_fine_tune2, mean_ar=mean_ar_fine_tune2)
performance_box_plot("Model Performance Comparison", data, y_range=[0.3, 0.9])

### Fine-tuned model on different size of synth data + 760 real data

In [82]:
data = pd.DataFrame(columns=columns)
# 1. 40K synth +  model
mean_ap = [0.589,0.608,0.606,0.589,0.608]
mean_ap_50 = [0.813,0.832,0.832,0.816,0.833]
mean_ar = [0.681,0.689,0.693,0.682,0.692]
add_data(data, "fine-tuned (40K synth + 76 real images)", mean_ap, mean_ap_50, mean_ar)

# 5. fine-tuned model on synth + 100% data
mean_ap_fine_tune3 = [0.6839, 0.6762, 0.6802, 0.6900, 0.6877]
mean_ap_50_fine_tune3 = [0.8547, 0.8451, 0.8507, 0.8611, 0.8595]
mean_ar_fine_tune3 = [0.7597, 0.7481, 0.7575, 0.7627, 0.7564]
add_data(data=data, model="fine-tuned (synth + 760 real images)", mean_ap=mean_ap_fine_tune3, mean_ap_50=mean_ap_50_fine_tune3, mean_ar=mean_ar_fine_tune3)
performance_box_plot("Model Performance Comparison", data, y_range=[0.3, 0.9])

In [28]:
# fine-tune medium
mean_ap_base = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_base = [0.703,0.702,0.696,0.709,0.717]
mean_ar_base = [0.641,0.631,0.631,0.638,0.638]

mean_ap_new = [0.6394, 0.6424, 0.6417, 0.6503, 0.6463]
mean_ap_50_new = [0.8110, 0.8123, 0.8098, 0.8208, 0.8187]
mean_ar_new = [0.7318, 0.7270, 0.7355, 0.7371, 0.7299]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.5282,0.005975
mAP V2,0.64402,0.004301
mAP50 V1,0.7054,0.007956
mAP50 V2,0.81452,0.004912
mAR V1,0.6358,0.00455
mAR V2,0.73226,0.004104


In [29]:
fig = performance_plot(f"Fine-tuned model (trained on V1 vs V2) on 50% real dataset", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.5, 1.0])
fig.show()

In [30]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.0000
mAR:
p_value =  0.0000


In [31]:
improve(performance)

mAP is improved by 21.93%
mAP@IOU50 is improved by 15.47%
mAR is improved by 15.17%


# Appendix

In [32]:
# fine-tune full
mean_ap_base = [0.6887, 0.6863, 0.6915, 0.6925, 0.6925]
mean_ap_50_base = [0.8801, 0.8701, 0.8744, 0.8796, 0.8814]
mean_ar_base = [0.7541, 0.7474, 0.7562, 0.7536, 0.7537]

mean_ap_new = [0.6839, 0.6762, 0.6802, 0.6900, 0.6877]
mean_ap_50_new = [0.8547, 0.8451, 0.8507, 0.8611, 0.8595]
mean_ar_new = [0.7597, 0.7481, 0.7575, 0.7627, 0.7564]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.6903,0.002724
mAP V2,0.6836,0.005572
mAP50 V1,0.87712,0.004744
mAP50 V2,0.85422,0.006539
mAR V1,0.753,0.003304
mAR V2,0.75688,0.005466


In [33]:
fig = performance_plot(f"Fine-tuned model (trained on V1 vs V2) on 100% real dataset", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.5, 1.0])
fig.show()

In [34]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0421
mAP@IOU50:
p_value =  0.0002
mAR:
p_value =  0.2114


In [35]:
mean_ap = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50 = [0.748,0.726,0.717,0.694,0.711]
mean_ar = [0.586,0.580,0.569,0.548,0.566]
mean_ap = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50 = [0.703,0.702,0.696,0.709,0.717]
mean_ar = [0.641,0.631,0.631,0.638,0.638]

In [36]:
# real-trained
mean_ap_base = [0.475,0.460,0.451,0.423, 0.4391]
mean_ap_50_base = [0.748,0.726,0.717,0.694,0.711]
mean_ar_base = [0.586,0.580,0.569,0.548,0.566]
# fine-tuned 10%
mean_ap_new = [0.528,0.525,0.521,0.530,0.537]
mean_ap_50_new = [0.703,0.702,0.696,0.709,0.717]
mean_ar_new = [0.641,0.631,0.631,0.638,0.638]

performance = collection(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)
performance

Unnamed: 0,mean,std
mAP V1,0.44962,0.019831
mAP V2,0.5282,0.005975
mAP50 V1,0.7192,0.019892
mAP50 V2,0.7054,0.007956
mAR V1,0.5698,0.014636
mAR V2,0.6358,0.00455


In [37]:
fig = performance_plot(f"Fine-tuned model (trained on V1 vs V2) on 100% real dataset", mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new, y_range=[0.3, 1.0])
fig.show()

In [38]:
p_value_calculation(mean_ap_base, mean_ap_50_base, mean_ar_base, mean_ap_new, mean_ap_50_new, mean_ar_new)

mAP:
p_value =  0.0000
mAP@IOU50:
p_value =  0.1877
mAR:
p_value =  0.0000


In [39]:
# mean_ap = [0.3759, 0.3693, 0.3877, 0.4009, 0.3715]
# mean_ap_50 = [0.5367, 0.5156, 0.5433, 0.5667, 0.5262]
# mean_ar = [0.4756, 0.4827, 0.5079, 0.5006, 0.4699]
performance = single_collection(mean_ap, mean_ap_50, mean_ar)
performance

Unnamed: 0,mean,std
mAP,0.5282,0.005975
mAP@50,0.7054,0.007956
mAR,0.6358,0.00455


In [40]:
single_performance_plot(f"Model performance trained on SynthDet V2", mean_ap, mean_ap_50, mean_ar, y_range=[0, 0.6])