In [1]:
from google.colab import drive
drive.mount('/content/drive')
!cp "/content/drive/My Drive/sorted_single_face_elypse_results.csv" /tmp/sorted_single_face_elypse_results.csv
!ls /tmp/sorted_single_face_elypse_results.csv

Mounted at /content/drive
ls: cannot access '/tmp/results_avarages.csv': No such file or directory


In [2]:
import pandas as pd
import altair as alt
import numpy as np

import plotly.graph_objects as go
from scipy.interpolate import griddata

In [12]:
df = pd.read_csv('/tmp/sorted_single_face_elypse_results.csv')
df

Unnamed: 0,ID,Width,Height,Depth,TemporalPooling,Epochs,Seed,SpatialPooling,AvgClassificationRate,AvgDeviation
0,17475050673,3,3,2,2,800,6,8,65.8008,7.0887
1,17475047233,3,3,2,2,800,13,8,66.5317,6.0030
2,17475047233,3,3,2,2,800,42,8,67.9674,5.4885
3,17475050673,3,3,2,2,800,44,8,64.5194,5.6383
4,17475047233,3,3,2,2,800,45,8,64.8923,4.1440
...,...,...,...,...,...,...,...,...,...,...
292,17475993123,7,7,5,3,800,45,8,66.5902,6.9724
293,17476662393,7,7,5,3,800,59,8,67.6665,5.6394
294,17475993123,7,7,5,3,800,93,8,67.2358,4.2374
295,17475993123,7,7,5,3,800,96,8,64.6159,6.7638


In [None]:
chart = alt.Chart(df).mark_boxplot().encode(
    x='TemporalPooling:N',
    y='AvgClassificationRate:Q',
    color='TemporalPooling:N'
).properties(
    title='Distribution of AvgClassificationRate by TemporalPooling'
)
chart

<h2> Concluding Temporal Pooling </h2>

This chart indicates that the best results were achieved using temporal pooling.
<br>
This is demonstrated by the tightest interval of values and the highest median by far
<br>
The chart points towards using **4 as Temporal Pooling**


In [6]:
chart = alt.Chart(df).mark_boxplot().encode(
    x='Depth:N',
    y='AvgClassificationRate:Q',
    color='Depth:N'
).properties(
    title='Distribution of AvgClassificationRate by Depth'
)
chart

In [15]:

# Filter where TemporalPooling == 4
filtered_df = df[df['TemporalPooling'] == 4]

# Create the chart: Depth vs AvgClassificationRate
chart = alt.Chart(filtered_df).mark_boxplot().encode(
    x='Depth:N',
    y='AvgClassificationRate:Q',
    color='Depth:N'
).properties(
    title='Distribution of AvgClassificationRate by Depth (TemporalPooling = 4)'
)

chart

<h2>Concluding Depth</h2>

<p>
In the chart showing values where <strong>TemporalPooling = 4</strong>, there are too few data points for both depth 2 and 3. As a result, the conclusions drawn from these specific depths are somewhat inconclusive.
</p>

<p>
In the chart using all available values, the interval of averages across different depths is the most inconsistent and broadest. This suggests that, in general, using depth 2 does not yield the best results.
</p>

<p>
This leaves using <strong>3 as the recommended depth</strong>.
</p>


In [17]:
chart = alt.Chart(df).mark_boxplot().encode(
    x='Width:N',
    y='AvgClassificationRate:Q',
    color='Width:N'
).properties(
    title='Distribution of AvgClassificationRate by Filter Size'
)
chart

In [18]:
filtered_df = df[(df['Depth'] == 3) & (df['TemporalPooling'] == 4)]
chart = alt.Chart(filtered_df).mark_boxplot().encode(
    x='Width:N',
    y='AvgClassificationRate:Q',
    color='Width:N'
).properties(
    title='Distribution of AvgClassificationRate by Filter Size'
)
chart

In [None]:
# Filter TemporalPooling == 4
df_filtered = df[df['TemporalPooling'] == 4]

# Create a grid of Width x Depth (since TemporalPooling fixed)
width_grid = np.array([3, 5, 7])
depth_grid = np.array([2, 3, 4])
W, D = np.meshgrid(width_grid, depth_grid)

# Interpolate classification rates on grid
points = df_filtered[['Width', 'Depth']].values
values = df_filtered['AvgClassificationRate'].values

# Use 'nearest' to avoid NaNs in small discrete space
grid_rates = griddata(points, values, (W, D), method='nearest')

# Create 3D scatter plot
fig = go.Figure()

# Scatter points from your original data
fig.add_trace(go.Scatter3d(
    x=df_filtered['Width'],
    y=df_filtered['Depth'],
    z=df_filtered['AvgClassificationRate'],
    mode='markers',
    marker=dict(size=8, color=df_filtered['AvgClassificationRate'], colorscale='Viridis', colorbar=dict(title='AvgClassificationRate'))
))

# Surface plot from the interpolated grid (for visualization)
fig.add_trace(go.Surface(
    x=W,
    y=D,
    z=grid_rates,
    colorscale='Viridis',
    opacity=0.5,
    showscale=False
))

fig.update_layout(scene=dict(
    xaxis_title='Width',
    yaxis_title='Depth',
    zaxis_title='AvgClassificationRate'
))

fig.show()

<h2>3D Chart Interpretation</h2>

<p>
This 3D chart helps better visualize how the distribution of classification rates varies with kernel size and depth.
</p>

<p>
When considering the distribution of values:
<ul>
  <li>For a filter size of <strong>3</strong>, the data is the most evenly distributed.</li>
  <li>For a filter size of <strong>5</strong>, the distribution is the most uneven, but it achieves the highest result.</li>
  <li>For a filter size of <strong>7</strong>, the data is fairly evenly distributed, except for one outlier that lowers the average classification rate. Despite this, the overall results are strong, making filter size 7 a competitive option.</li>
</ul>
</p>

<p>
Except for the outlier, a filter size of <strong>7</strong> appears to be the best choice based on both distribution and performance.
</p>

**To conclude, the most fitting hyperparameters for the training would be Height = 7, Width = 7, Depth = 3 and Temporal Pooling = 4**