# Reduction of Dataset for Analysis

In [None]:
import sys
!{sys.executable} -m pip install matplotlib
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install scipy
!{sys.executable} -m pip install openpyxl

import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
import numpy as np
import shutil
from scipy import signal, stats



In [None]:
path_in = Path("/run/media/wernerfeiler/Frei SSD/Ruan_ml_data/W3-6400-3200-nr25")
list_paths = list(path_in.glob("*.png"))

In [None]:
list_paths.sort()
range_list = list(range(0, len(list_paths), 5))
len(list_paths)

In [None]:
list_paths_red = list(np.array(list_paths)[range_list])
len(list_paths_red)

In [None]:
for p in list_paths_red:
    pass
    # shutil.copy(p, Path("/home/wernerfeiler/run_ml_sandvision/data_reduced")/p.name)

# Analysis of the Output Excel Sheet

In [None]:
file_name = Path.cwd() / "data_reduced/data_reduced__eval.xlsx"
df = pd.read_excel(file_name).sort_values("Unnamed: 0").set_index("Unnamed: 0")
df.head(5)

In [None]:
df["diff_angle"] = df["angle_h_abs_chamber"] - df["angle_h_abs_sand"] 
list_time_progress = [int(x.split("_")[-1]) for x in df.index]
list_time_progress_zero = [x-min(list_time_progress) for x in list_time_progress]
df["rel_time"] = list_time_progress_zero

# summary graph of angles
plt.style.use('seaborn-v0_8')
plt.plot(df["rel_time"], df["diff_angle"], label="diff angle", color="#75e0dd")
plt.plot(df["rel_time"], df["angle_h_abs_sand"], label="sand angle", color="#dea645")
plt.plot(df["rel_time"], df["angle_h_abs_chamber"], label="chamber angle", color="#e03636")
plt.xlabel("time [ms]")
plt.ylabel("angle [deg]")
plt.legend()
plt.savefig("all_angles__W3_example.png", bbox_inches="tight", dpi=150)
plt.show()

In [None]:
# creating a maxima-list
max_signal=list(signal.find_peaks(df["angle_h_abs_sand"], distance=20, prominence=3)[0])
max_signal

In [None]:
# creating a minima list
list_peaks = list(df["angle_h_abs_sand"])
max_val = max(list_peaks)
min_to_max = [abs(x-max_val) for x in list_peaks]
min_signal = list(signal.find_peaks(min_to_max, distance=20, prominence=3)[0])
min_signal

In [None]:
plt.style.use('seaborn-v0_8')
plt.scatter(np.array(df["rel_time"])[max_signal], np.array(df["angle_h_abs_sand"])[max_signal], label="local maxima", marker="v", color="#d4396a", zorder=2)
plt.scatter(np.array(df["rel_time"])[min_signal], np.array(df["angle_h_abs_sand"])[min_signal], label="local minima", marker="v", color="#404078", zorder=2)
plt.plot(df["rel_time"], df["angle_h_abs_sand"], label="sand angle", zorder=1, color="#dea645")
plt.xlabel("time [ms]")
plt.ylabel("angle [deg]")
plt.legend()
plt.savefig("sand_angle__W3_example.png", bbox_inches="tight", dpi=150)
plt.show()

In [None]:
# first 2 beaks originate from initial sand-packing --> sand loses robustness post first sand-slide (sand is clumpy and does not rejoin the "main sand-body")
arr_local_maxima = np.array(df["angle_h_abs_sand"])[max_signal][2::]
min_val = min(arr_local_maxima)
max_val = max(arr_local_maxima)
avg_val = sum(arr_local_maxima)/len(arr_local_maxima)
std = stats.tstd(arr_local_maxima)

arr_local_minima = np.array(df["angle_h_abs_sand"])[min_signal][1:-1]
min_val_m = min(arr_local_minima)
max_val_m = max(arr_local_minima)
avg_val_m = sum(arr_local_minima)/len(arr_local_minima)
std_m = stats.tstd(arr_local_minima)

print(f"""
sand properties
---------------
    avg-maxima: {round(avg_val, 2)}±{round(std, 2)} deg
    min-maxima: {round(min_val, 2)} deg
    max-maxima: {round(max_val, 2)} deg

    avg-minima: {round(avg_val_m, 2)}±{round(std_m, 2)} deg
    min-minima: {round(min_val_m, 2)} deg
    max-minima: {round(max_val_m, 2)} deg
""")