In [1]:
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
plotly.io.kaleido.scope.mathjax = None

# Passengership: 3
# Cargo: 1
# Tanker: 0
# Tug: 2

In [2]:
data = pd.read_csv("deepship_folds.csv")
data

Unnamed: 0,files,labels,folds
0,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,3,7
1,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,3,5
2,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,3,4
3,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,3,0
4,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,3,4
...,...,...,...
53497,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,0,7
53498,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,0,6
53499,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,0,1
53500,/home/thales/src/ajaac/datasets/DeepShip/DeepS...,0,3


In [3]:
data = data.drop(columns="files")
data.columns = ["class_i", "fold"]

label_mapping = {0: "Tanker", 1: "Cargo", 2: "Tug", 3: "Passengership"}
data["class"] = data["class_i"].map(label_mapping)
data = data.drop(columns="class_i")

In [4]:
data

Unnamed: 0,fold,class
0,7,Passengership
1,5,Passengership
2,4,Passengership
3,0,Passengership
4,4,Passengership
...,...,...
53497,7,Tanker
53498,6,Tanker
53499,1,Tanker
53500,3,Tanker


In [5]:
counts_per_fold = data.groupby(["fold", "class"]).size().reset_index(name="count")
counts_per_fold

Unnamed: 0,fold,class,count
0,0,Cargo,1232
1,0,Passengership,1354
2,0,Tanker,983
3,0,Tug,965
4,1,Cargo,1522
5,1,Passengership,1731
6,1,Tanker,1885
7,1,Tug,360
8,2,Cargo,1177
9,2,Passengership,1291


In [6]:
darker_colors = {
    "Cargo": "#1f77b4",
    "Passengership": "#ff7f0e",
    "Tanker": "#2ca02c",
    "Tug": "#d62728"
}

custom_colors = {
    "Cargo": "#357ABD",        # Slightly deeper blue
    "Passengership": "#FF7F4E", # Mid-tone orange
    "Tanker": "#4CA64C",       # Balanced green
    "Tug": "#C64747"           # Moderate red
}

In [7]:
counts_per_fold_abbr = counts_per_fold.replace({
    "Cargo": "C", 
    "Passengership": "P", 
    "Tanker": "Ta", 
    "Tug": "Tu"
})

custom_colors_abbr = {
    "C": "#357ABD",        # Slightly deeper blue
    "P": "#FF7F4E", # Mid-tone orange
    "Ta": "#4CA64C",       # Balanced green
    "Tu": "#C64747"           # Moderate red
}

In [8]:

bar = px.bar(counts_per_fold, 
    x="class",
    y="count", 
    color="class",
    facet_col="fold",  # Facet by the 'fold' column
    labels={"fold": "Fold", "count": "Number of samples", "class": "Class"},
    # title="Counts of samples across 10-fold split by class",
    color_discrete_map=custom_colors,
    facet_col_wrap=5,
    facet_row_spacing=0.06
)

bar.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
bar.for_each_xaxis(lambda x: x.update({'title': ''}))
bar.for_each_xaxis(lambda x: x.update(tickangle=90, ticks="outside", tickcolor="white", ticklen=7))
bar.update_layout(
    showlegend=False,
    width=600,
    height=800,
    margin=dict(
        t=30,
        b=170,
        r=20,
    )
)
bar.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=0.5, 
    yref='paper',
    y=-.24,
    text='Class',
    font=dict(size=14)
)

bar.write_image("10_folds_counts_facet.pdf")
bar.show()

In [9]:
counts_per_fold

Unnamed: 0,fold,class,count
0,0,Cargo,1232
1,0,Passengership,1354
2,0,Tanker,983
3,0,Tug,965
4,1,Cargo,1522
5,1,Passengership,1731
6,1,Tanker,1885
7,1,Tug,360
8,2,Cargo,1177
9,2,Passengership,1291


In [10]:
wide_df = counts_per_fold.pivot(index="fold", columns="class", values="count").reset_index()
wide_df

class,fold,Cargo,Passengership,Tanker,Tug
0,0,1232,1354,983,965
1,1,1522,1731,1885,360
2,2,1177,1291,1513,1360
3,3,1512,1253,1955,3006
4,4,1082,2533,1610,584
5,5,1482,918,879,2554
6,6,1400,1265,1564,838
7,7,1436,1709,1408,1057
8,8,1206,1232,1450,670
9,9,488,965,1367,706


In [11]:
data

Unnamed: 0,fold,class
0,7,Passengership
1,5,Passengership
2,4,Passengership
3,0,Passengership
4,4,Passengership
...,...,...
53497,7,Tanker
53498,6,Tanker
53499,1,Tanker
53500,3,Tanker


In [12]:
class_counts = data.groupby(["class", "fold"]).size().reset_index(name="count")
class_counts

Unnamed: 0,class,fold,count
0,Cargo,0,1232
1,Cargo,1,1522
2,Cargo,2,1177
3,Cargo,3,1512
4,Cargo,4,1082
5,Cargo,5,1482
6,Cargo,6,1400
7,Cargo,7,1436
8,Cargo,8,1206
9,Cargo,9,488


In [13]:
box = px.box(class_counts,
    x="count",
    y="class",
    color="class",
    labels={"fold": "Fold", "count": "Number of samples", "class": "Class"},
    # title="Spread of samples across 10-fold split by class",
    color_discrete_map=custom_colors
)

box = box.update_layout(
    margin=dict(
        t=20,
        b=30,
        r=20
    ),
    showlegend=False
)

box.show()

In [14]:
box.write_image("10_fold_spread.pdf")

In [15]:
class_counts

Unnamed: 0,class,fold,count
0,Cargo,0,1232
1,Cargo,1,1522
2,Cargo,2,1177
3,Cargo,3,1512
4,Cargo,4,1082
5,Cargo,5,1482
6,Cargo,6,1400
7,Cargo,7,1436
8,Cargo,8,1206
9,Cargo,9,488


In [16]:
class_counts["fold"] = class_counts["fold"].astype(str)

In [17]:
bar2 = px.bar(
    class_counts,
    x="fold",         
    y="count",         
    color="class",      
    labels={"fold": "Fold", "count": "Number of samples", "class": "Class"},
    color_discrete_map=custom_colors,
    facet_col="class",
    # title="Sample Counts per Class Across 10 Folds",
)

bar2.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
bar2.for_each_xaxis(lambda x: x.update({'title': ''}))
bar2.update_layout(
    showlegend=False,
    margin=dict(
        t=30,
        b=60,
        r=20
    )
)
bar2.add_annotation(
    showarrow=False,
    xanchor='center',
    xref='paper', 
    x=0.5, 
    yref='paper',
    y=-0.13,
    text='Fold',
    font=dict(size=14)
)

bar2.show()

In [18]:
bar2.write_image('10_class_counts_facet.pdf')

In [19]:
deepship_df = pd.read_csv("deepship.csv")
box = px.box(deepship_df,
    x="duration_min",
    y="class",
    color="class",
    labels={"class": "Class", "duration_min": "Vessel recording durations (min)"},
    color_discrete_map=custom_colors
)

box = box.update_layout(
    margin=dict(
        t=20,
        b=30,
        r=20
    ),
    showlegend=False
)

box.show()

In [20]:
box.write_image("deepship_duration_spread.pdf")

In [21]:
deepship_df[deepship_df["class"] == "Cargo"].sort_values(by="duration_min")

Unnamed: 0.1,Unnamed: 0,class,ship_name,num_files,duration_s,duration_min
24,24,Cargo,GLOVIS_CHORUS,1,184,3.066667
37,37,Cargo,MARGUERITE_ACE,1,186,3.100000
22,22,Cargo,GALLEON,1,194,3.233333
58,58,Cargo,ULTRA_EXCELLENCE,1,198,3.300000
1,1,Cargo,AFRICAN_KITE,1,199,3.316667
...,...,...,...,...,...,...
14,14,Cargo,CAPRICORNUS_LEADER,2,811,13.516667
39,39,Cargo,MORNING_MENAD,2,854,14.233333
52,52,Cargo,SEASPAN_SWIFT,10,2938,48.966667
51,51,Cargo,SEASPAN_RELIANT,12,3298,54.966667


In [22]:
deepship_df

Unnamed: 0.1,Unnamed: 0,class,ship_name,num_files,duration_s,duration_min
0,0,Cargo,ADVENTURE_1,1,250,4.166667
1,1,Cargo,AFRICAN_KITE,1,199,3.316667
2,2,Cargo,AMIS_FORTUNE,1,347,5.783333
3,3,Cargo,AMSTEL_OSPREY,1,354,5.900000
4,4,Cargo,ANASTASIA,2,729,12.150000
...,...,...,...,...,...,...
244,244,Tug,SEASPAN_EAGLE,6,3596,59.933333
245,245,Tug,SEASPAN_OSPREY,1,534,8.900000
246,246,Tug,SEASPAN_QUEEN,3,1599,26.650000
247,247,Tug,SEASPAN_RAVEN,15,8693,144.883333


In [23]:
deepship_subset_summary = {
    "class": ["Cargo", "Passengership", "Tanker", "Tug"],
    "ships": [62, 43, 128, 16],
    "recordings": [106, 180, 234, 62],
    "total_time": [628, 715, 735, 606]
}

subset = pd.DataFrame(deepship_subset_summary)
subset

Unnamed: 0,class,ships,recordings,total_time
0,Cargo,62,106,628
1,Passengership,43,180,715
2,Tanker,128,234,735
3,Tug,16,62,606


In [29]:
## Create figure
fig = go.Figure()

# Adding bars for ships (solid colors)
fig.add_trace(go.Bar(
    x=subset["class"],
    y=subset["ships"],
    name="Ships",
    marker=dict(
        color=[custom_colors[cls] for cls in subset["class"]],
    )
))

# Adding bars for recordings (lighter colors with hatch pattern)
fig.add_trace(go.Bar(
    x=subset["class"],
    y=subset["recordings"],
    name="Recordings",
    marker=dict(
        color=[custom_colors[cls] for cls in subset["class"]],
        opacity=0.9,
        pattern_shape="/",
    )
))

fig.update_layout(
    xaxis_title="Class",
    yaxis=dict(title="Number of Ships / Recordings"),
    barmode="group",
    legend=dict(
        yanchor="top",
        y=0.95,
        xanchor="left",
        x=0.01
    ),
    margin=dict(
        l=50,
        r=30,
        t=30,
        b=30
    )
)

fig.write_image("deepship_class_analysis.pdf")

fig.show()

In [51]:
WIDTH = 400
HEIGHT = 400

# Ships chart
fig1 = go.Figure()
fig1.add_trace(go.Bar(
    x=subset["class"],
    y=subset["ships"],
    name="Ships",
    marker=dict(
        color=[custom_colors[cls] for cls in subset["class"]],
    )
))
fig1.update_layout(
    width=WIDTH,
    height=HEIGHT,
    xaxis_title="Class",
    yaxis=dict(title="Number of Ships"),
    margin=dict(l=50, r=30, t=30, b=30)
)

fig1.show()
fig1.write_image("num_ships_per_class.pdf")

# Recordings chart
fig2 = go.Figure()
fig2.add_trace(go.Bar(
    x=subset["class"],
    y=subset["recordings"],
    name="Recordings",
    marker=dict(
        color=[custom_colors[cls] for cls in subset["class"]],
    )
))
fig2.update_layout(
    width=WIDTH,
    height=HEIGHT,
    xaxis_title="Class",
    yaxis=dict(title="Number of Recordings"),
    margin=dict(l=50, r=30, t=30, b=30)
)
fig2.show()
fig2.write_image("num_recordings.pdf")

