In [1]:
import pandas as pd
import os

# Define the directory containing the .txt files
directory = '/home/julian/Documents/local-methane-data-diego/2ndVisit/txt/'

# Initialize an empty DataFrame to store all data
all_data = pd.DataFrame()

# Iterate over each file in the directory
for filename in os.listdir(directory):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        # Read the .txt file into a DataFrame
        df = pd.read_csv(file_path)
        # Select only the date and measured level columns
        df = df.iloc[:, [0, 1]]
        # Append the data to the all_data DataFrame
        all_data = pd.concat([all_data, df])

# Convert the date column to datetime format
#all_data.iloc[:, 0] = pd.to_datetime(all_data.iloc[:, 0], format="%H:%M:%S.%f")

# Sort the DataFrame by the date column
all_data.sort_values(by=all_data.columns[0], inplace=True)

# Save the combined and sorted data to a new .txt file
all_data.to_csv('combined_sorted_data.txt', index=False, header=True)

In [2]:
import numpy as np
all_data['hour'] = all_data['DATE'].apply(lambda x: int(float((x.split(':')[0]))))
all_data['minute'] = all_data['DATE'].apply(lambda x: int(float(x.split(':')[1])))
all_data['second'] = all_data['DATE'].apply(lambda x: float(x.split(':')[2]))

all_data['hour_sin'] = np.sin(2 * np.pi * all_data['hour'] / 24)
all_data['hour_cos'] = np.cos(2 * np.pi * all_data['hour'] / 24)
all_data['minute_sin'] = np.sin(2 * np.pi * all_data['minute'] / 60)
all_data['minute_cos'] = np.cos(2 * np.pi * all_data['minute'] / 60)
all_data['second_sin'] = np.sin(2 * np.pi * all_data['second'] / 60)
all_data['second_cos'] = np.cos(2 * np.pi * all_data['second'] / 60)

all_data.drop(columns=['hour', 'minute', 'second'], inplace=True)

In [9]:
all_data = all_data.drop(columns=['DATE'])

In [11]:
all_data.to_csv('combined_sorted_data.txt', index=False, header=False)

In [None]:
import plotly.express as px

# Plot the data
fig = px.line(all_data, x='DATE', y='MEAS', title='Measured Levels Over Time')

# Show the plot
fig.show()

Example of one subsignal

In [44]:
start_signal_1 = '18:27:06.500'
stop_signal_1 = '18:29:20.000'

start_signal_2 = '18:21:21.500'
stop_signal_2 = '18:22:10.000'

In [45]:
df = all_data.DATE == start_signal_1
idx_start_1 = [cnt for cnt, i in enumerate(df.values) if i][0]
df = all_data.DATE == stop_signal_1
idx_stop_1 = [cnt for cnt, i in enumerate(df.values) if i][0]

df = all_data.DATE == start_signal_2
idx_start_2 = [cnt for cnt, i in enumerate(df.values) if i][0]
df = all_data.DATE == stop_signal_2
idx_stop_2 = [cnt for cnt, i in enumerate(df.values) if i][0]

In [None]:
print(idx_start_1, idx_stop_1)
print(idx_start_2, idx_stop_2)

In [None]:
print(all_data.iloc[idx_start_1,:2], all_data.iloc[idx_stop_1,:2])
print(all_data.iloc[idx_start_2,:2], all_data.iloc[idx_stop_2,:2])

In [48]:
signal_1 = all_data.iloc[idx_start_1:idx_stop_1, 1]
signal_2 = all_data.iloc[idx_start_2:idx_stop_2, 1]

In [None]:
signal_1.

In [None]:
signal_1.plot()

In [None]:
signal_2.plot()

# Plot best and worst predictions of Algorithms in TSA

In [1]:
import numpy as np

algo_name = "TiDE"
path_to_file = "/users/jdvillegas/slurm_files/results_job_88245/results/"
filename = f"long_term_forecast_methaneInterp_{algo_name}_custom_ftS_sl250_ll5_pl40_dm128_nh8_el2_dl2_df2048_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0_epochs_5000"
#filename = f"long_term_forecast_methaneInterp_{algo_name}_custom_ftS_sl130_ll5_pl30_dm128_nh8_el3_dl3_df2048_expand2_dc4_fc3_ebtimeF_dtTrue_Exp_0_epochs_1000"

pred = np.load(f"{path_to_file}/{filename}/pred.npy", allow_pickle=True)
true = np.load(f"{path_to_file}/{filename}/true.npy", allow_pickle=True)

In [9]:
import os
os.getcwd()
import sys
sys.path.append("/users/jdvillegas/repos/Time-Series-Library-Fork/utils")

In [None]:
from metrics import metric
mae, mse, rmse, mape, mspe = metric(pred, true)
print(f"mae: {mae}, mse: {mse}, rmse: {rmse}, mape: {mape}, mspe: {mse}")

mae: 7.286428451538086, mse: 153.2476806640625, rmse: 12.379324913024902, mape: 0.4391450583934784, mspe: 153.2476806640625


In [69]:
np.max(true, axis=1)

array([[ 8.27498  ],
       [ 8.000696 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.813562 ],
       [ 7.782083 ],
       [ 7.6722207],
       [ 7.431124 ],
       [ 7.027719 ],
       [ 6.5878797],
       [ 6.7771792],
       [ 6.876661 ],
       [ 6.9347167],
       [ 7.0003123],
       [ 7.0931373],
       [ 7.196668 ],
       [ 7.2762437],
       [ 7.308148 ],
       [ 7.308148 ],
       [ 7.308148 ],
       [ 7.308148 ],
       [ 7.347342 ],
       [ 7.407624 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.421573 ],
       [ 7.42

In [29]:
def n_lowest_values_with_indices(arr, n):
    if n <= 0:
        return np.array([]), np.array([])  # Return empty arrays if n is not positive
    indices = np.argsort(arr)[:n]  # Get indices of the n smallest elements
    values = arr[indices]          # Get the values using those indices
    return values, indices

In [60]:
def n_highest_values_with_indices(arr, n):
    if n <= 0:
        return np.array([]), np.array([])  # Return empty arrays if n is not positive
    indices = np.argsort(arr)[-n:][::-1]  # Get indices of the n largest elements
    values = arr[indices]                 # Get the values using those indices
    return values, indices

In [66]:
n_mins = 10
diff_vec = np.abs(pred - true)
vec_mae = np.mean(diff_vec.squeeze(), axis=1)
print(vec_mae.shape)

min_mae_vals, idx_min_mae_vals = n_lowest_values_with_indices(vec_mae, n_mins)
max_mae_vals, idx_max_mae_vals = n_highest_values_with_indices(vec_mae, n_mins)

print(f"Best Min {n_mins} MAE values: {min_mae_vals}")
print(f"Best Idxs of Min {n_mins} MAE values: {idx_min_mae_vals}")

print(f"Worst Min {n_mins} MAE values: {max_mae_vals}")
print(f"Worst Idxs of Min {n_mins} MAE values: {idx_max_mae_vals}")

(711,)
Best Min 10 MAE values: [0.53018534 0.58827955 0.6474005  0.7050573  0.7506792  0.77693367
 0.8644452  0.8791235  0.887962   0.8929523 ]
Best Idxs of Min 10 MAE values: [116 115 129 128 573 130 117 680 574 102]
Worst Min 10 MAE values: [39.894726 39.781    39.577457 39.290596 38.89747  38.559093 38.037544
 37.74037  37.231537 36.954655]
Worst Idxs of Min 10 MAE values: [205 206 204 207 203 208 202 209 201 210]


In [67]:
import plotly.graph_objects as go
import plotly.subplots as sp
import numpy as np

# What to plot:
best_or_worst_preds = "Worst"

# Define the grid layout
n_rows = 2
n_cols = 4
num_figures = n_rows * n_cols  # Ensure the grid can accommodate all figures

# Create a subplot figure
if best_or_worst_preds == "Best":
    fig = sp.make_subplots(rows=n_rows, cols=n_cols, 
                        subplot_titles=[f"Sequence # {i}" for i in idx_min_mae_vals],
                        horizontal_spacing=0.03, vertical_spacing=0.2)
elif best_or_worst_preds == "Worst":
    fig = sp.make_subplots(rows=n_rows, cols=n_cols, 
                        subplot_titles=[f"Sequence # {i}" for i in idx_max_mae_vals],
                        horizontal_spacing=0.03, vertical_spacing=0.2)

# Add traces to each subplot
for i in range(num_figures):
    row = (i // n_cols) + 1
    col = (i % n_cols) + 1
    
    #idx_meas = np.random.randint(low=0, high=pred.shape[0])
    if best_or_worst_preds == "Best":
        idx_meas = idx_min_mae_vals[i]
    elif best_or_worst_preds == "Worst":
        idx_meas = idx_max_mae_vals[i]

    fig.add_trace(go.Scatter(
        x=np.linspace(0, pred.shape[1] - 1, pred.shape[1]),
        y=pred[idx_meas, :, 0],
        name="Predicted",
        mode="lines",
        line=dict(color="blue"),
    ), row=row, col=col)
    
    fig.add_trace(go.Scatter(
        x=np.linspace(0, true.shape[1] - 1, true.shape[1]),
        y=true[idx_meas, :, 0],
        name="True",
        mode="lines",
        line=dict(color="red"),
    ), row=row, col=col)

      # Add individual legend annotations
    fig.add_annotation(
        x=20,
        y=1.15,
        xref=f"x{i + 1}",
        yref=f"y{i + 1}",
        showarrow=False,
        text="<b>Legend:</b> Blue: Predicted, Red: True",
        font=dict(size=10),
        align="center",
    )

# Update layout
fig.update_layout(
    showlegend=False,
    height=n_rows * 300,  # Adjust the height dynamically
    width=n_cols * 350,   # Adjust the width dynamically
    title_text=f"{best_or_worst_preds} {num_figures} predicted Methane Concentration Over Time",
    paper_bgcolor="lightgray",  # Background color outside the plots
    plot_bgcolor="whitesmoke"   # Background color inside the plots
)

# Update individual axes styles
for i in range(1, n_rows * n_cols + 1):
    fig['layout'][f'xaxis{i}'].update(
        title_text="Time step",  # Label for x-axis
        showgrid=True,  # Show gridlines
        gridcolor="lightgray",
        zerolinecolor="darkgray",  # Zero-line color
        linecolor="black",         # Axis line color
        tickcolor="black",         # Tick mark color
    )
    fig['layout'][f'yaxis{i}'].update(
        title_text="Methane concentration",
        showgrid=True,
        gridcolor="lightgray",
        zerolinecolor="darkgray",
        linecolor="black",
        tickcolor="black",
    )
# Show the figure
fig.show()

# Plot all time set data with the gaps

In [44]:
import os
os.getcwd()
import sys
sys.path.append("/users/jdvillegas/wearme-models-pujc-methane-concentration/classes")

In [45]:
from DataManager import DataManager

mydm = DataManager()
filled_nans_all_visits_meas = mydm.make_meas_with_nans(path_to_root_folder_all_meas="/users/jdvillegas/wearme-models-pujc-methane-concentration")


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.



In [46]:
len(filled_nans_all_visits_meas)

3

In [47]:
import plotly.express as px

# Plot using Plotly
fig = px.line(filled_nans_all_visits_meas[], x="date", y="METHANE", title="Time vs Value", markers=True)

# Show the plot
fig.show()

SyntaxError: invalid syntax. Perhaps you forgot a comma? (3971888143.py, line 4)