In [88]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
import bibtexparser
import json
import openpyxl

In [32]:
dataset_folder = "data"
file = "unsupervised_collection.json"
# Cargar el archivo JSON exportado desde Zotero
with open(os.path.join(dataset_folder,file), 'r') as f:
    data = json.load(f)

# Extraer los datos relevantes
records = []
for item in data['items']:
    record = {
        'Citation Key': item.get('citationKey', 'N/A'),
        'Title': item.get('title', 'N/A'),
        'Author': ', '.join([creator['lastName'] for creator in item.get('creators', []) if 'lastName' in creator]),
        'Publication Year': item.get('date', 'N/A'),
        'Tags': item.get('tags')
        # Agrega más campos según sea necesario
    }
    records.append(record)

# Crear un DataFrame
df = pd.DataFrame(records)

In [37]:
df['Publication Year'] = df['Publication Year'].str.extract(r'(\d{4})')

In [34]:
# Convertir la columna 'Tags' de una lista de diccionarios a una lista simple
df['Tags'] = df['Tags'].apply(lambda x: [tag['tag'] for tag in x] if isinstance(x, list) else [])

In [38]:
df

Unnamed: 0,Citation Key,Title,Author,Publication Year,Tags
0,alnegheimishM2ADMultiSensorMultiSystem2025,M2AD: Multi-Sensor Multi-System Anomaly Detect...,"Alnegheimish, Chandrayan, He, Pradhan, Reimher...",2025,"[1D input, Anomaly Score: Dynamic, Arch type: ..."
1,arellano-espitiaDeepCompactClusteringBasedAnom...,Deep-Compact-Clustering Based Anomaly Detectio...,"Arellano-Espitia, Delgado-Prieto, Gonzalez-Abr...",2021,"[1D input, Anomaly detection, Anomaly Score: D..."
2,baidyaAnomalyDetectionTime2023,Anomaly Detection in Time Series Data Using Re...,"Baidya, Jeong",2023,"[Anomaly detection, Transformer, Autoencoder, ..."
3,chiBearingFaultDiagnosis2022,Bearing Fault Diagnosis for Time-Varying Syste...,"Chi, Yang, Shao, Zhang",2022,"[1D input, Anomaly detection, Anomaly Score: S..."
4,ellefsenOnlineFaultDetection2020,Online Fault Detection in Autonomous Ferries: ...,"Ellefsen, Han, Cheng, Holmeset, Aesoy, Zhang",2020,"[1D input, Anomaly detection, Anomaly Score: D..."
5,gargEvaluationAnomalyDetection2022,An Evaluation of Anomaly Detection and Diagnos...,"Garg, Zhang, Samaran, Savitha, Foo",2022,"[1D input, Anomaly detection, Anomaly Score: D..."
6,helsenFleetwideConditionMonitoring2018,Fleet-wide condition monitoring combining vibr...,"Helsen, Peeters, Verstraeten, Verbeke, Gioia, ...",2018,"[1D input, Anomaly detection, Anomaly Score: S..."
7,huNovelVehicleGearbox2021,A Novel Vehicle Gearbox Fault Diagnosis Approa...,"Hu, Huang, Rashed, Kheshti",2021,"[1D input, Anomaly detection, Anomaly Score: S..."
8,kangFaultAnomalyDetection2024,Fault anomaly detection method of aero-engine ...,"Kang, Chen, Wang, Sheng, Wei",2024,"[1D input, Aero engine, Anomaly detection, Ano..."
9,kimSemiSupervisedAutoencoderAuxiliary2020,A Semi-Supervised Autoencoder With an Auxiliar...,"Kim, Jo, Kim, Park, Jeong, Han, Kim, Youn",2020,"[1D input, Anomaly detection, Anomaly Score: D..."


In [24]:
dataset_folder = "data"
file = "unsupervised_collection.csv"

df = pd.read_csv(os.path.join(dataset_folder,file))

In [15]:
df["Manual Tags"] = df["Tags"].fillna("")
df["TagList"] = df["Manual Tags"].apply(lambda x: [tag.strip() for tag in x.split(";") if tag.strip()])

In [40]:
Method_type = ['Reconstruction','Forecasting-based','Representation','Hybrid']
Architecture_type = ['Autoencoder', 'GAN', 'Variable Autoencoder','U-net', 'Normalizing Flow', 'Arch type: Custom Transformer', 'Arch type: Direct model', 'Arch type: SL', 'Autoencoder + Diffusion Model', 'Autoencoder + Transformer']
Base_model = ['MLP','CNN', 'LSTM', 'Transformer', 'Ensemble', 'Ensemble (CNN + Attention + LSTM)', 'GMM','IMMSC' ,'TCN','MOPA']
Component_type = ['Generic / Benchmark data', 'bearings', 'Aero engine', '3D printer', 'Chemical plant', 'Diesel engine','Engine', 'Gearbox', 'Marine engine', 'Power plant', 'ECG data', 'IoT devices','Vehicle','Wind turbine']
Interpretability = ['Interpretability: Not addressed','Output residuals', 'Interpretability: Attention + residuals', 'Interpretability: Latent + Output residuals', 'Interpretability: Latent space', 'Interpretability: Output residuals']
Data_input = ['1D input', '2D input']
Data_channels = ['Univariate', 'Multivariate']
Fleet_data = ['Fleet data', 'Single entity']
Anomaly_score = ['Anomaly Score: Static', 'Anomaly Score: Dynamic']


In [41]:
def find_tag(tags, category_list):
    for tag in tags:
        if tag in category_list:
            return tag
    return None

In [74]:
# Crear un nuevo DataFrame con las categorías
new_data = []
missing_tags = []
for _, row in df.iterrows():
    tags = row['Tags']
    paper_data = {
        'Paper title': f"\\cite{{{row['Citation Key']}}}",
        'Publication Year': row['Publication Year'],
        'Method_type': find_tag(tags, Method_type),
        'Architecture_type': find_tag(tags, Architecture_type),
        'Base_model': find_tag(tags, Base_model),
        'Component_type': find_tag(tags, Component_type),
        'Interpretability': find_tag(tags, Interpretability),
        'Data_input': find_tag(tags, Data_input),
        'Data_channels': find_tag(tags, Data_channels),
        'Fleet_data': find_tag(tags, Fleet_data),
        'Anomaly_score': find_tag(tags, Anomaly_score),
    }
    
    # Verificar si falta alguna categoría
    if None in paper_data.values():
        missing_categories = [key for key, value in paper_data.items() if value is None]
        warnings.warn(f"El paper '{row['Title']}' no tiene tags para las categorías: {', '.join(missing_categories)}")
        missing_tags.append({'Paper title': row['Title'], 'Missing categories': missing_categories})
        
    new_data.append(paper_data)

# Crear el nuevo DataFrame
new_df = pd.DataFrame(new_data)

# Mostrar los artículos con categorías faltantes
if missing_tags:
    print("Artículos con categorías faltantes:")
    for item in missing_tags:
        print(f"- {item['Title']}: {', '.join(item['Missing categories'])}")


In [75]:
new_df['Paper title'][0]

'\\cite{alnegheimishM2ADMultiSensorMultiSystem2025}'

In [76]:
new_df['Interpretability'] = new_df['Interpretability'].replace({
    'Interpretability: Not addressed': 'Not addressed',
    'Interpretability: Attention + residuals': 'Attention + residuals',
    'Interpretability: Latent + Output residuals': 'Latent + residuals',
    'Interpretability: Latent space': 'Latent space',
    'Interpretability: Output residuals': 'Output residuals'
})

In [77]:
new_df['Architecture_type'] = new_df['Architecture_type'].replace({
    'Arch type: Custom Transformer': 'Custom Transformer',
    'Arch type: Direct model': 'Direct model',
    'Arch type: SL': 'SL',
    'Autoencoder + Diffusion Model': 'AE + Diffusion',
    'Autoencoder + Transformer': 'AE + Transformer'
})

In [78]:
new_df['Anomaly_score'] = new_df['Anomaly_score'].replace({
    'Anomaly Score: Static': 'Static',
    'Anomaly Score: Dynamic': 'Dynamic'
})

In [79]:
new_df['Component_type'] = new_df['Component_type'].replace({
    'Generic / Benchmark data': 'Benchmark data'
})

In [82]:
new_df = new_df.sort_values(by='Publication Year', ascending=False)

In [83]:
new_df.head()

Unnamed: 0,Paper title,Publication Year,Method_type,Architecture_type,Base_model,Component_type,Interpretability,Data_input,Data_channels,Fleet_data,Anomaly_score
0,\cite{alnegheimishM2ADMultiSensorMultiSystem2025},2025,Forecasting-based,Direct model,LSTM,Benchmark data,Output residuals,1D input,Multivariate,Fleet data,Dynamic
29,\cite{yanUnsupervisedLearningMachinery2024},2024,Reconstruction,Autoencoder,MLP,bearings,Not addressed,1D input,Multivariate,Single entity,Dynamic
18,\cite{namBreakingTimeFrequencyGranularity2024},2024,Reconstruction,Direct model,Transformer,Benchmark data,Not addressed,1D input,Multivariate,Fleet data,Static
14,\cite{milkovicFRAnomalyFlowbasedRapid2024},2024,Representation,Normalizing Flow,CNN,Benchmark data,Output residuals,2D input,Univariate,Single entity,Static
13,\cite{miaoReconstructionbasedAnomalyDetection2...,2024,Reconstruction,GAN,Transformer,Benchmark data,Output residuals,1D input,Multivariate,Fleet data,Static


In [89]:
new_df.to_excel(os.path.join(dataset_folder, 'unsupervised_collection.xlsx'), index=False)

In [85]:
latex_df = new_df[['Paper title', 'Publication Year', 'Method_type', 'Architecture_type', 'Base_model']]

In [90]:
# Exportar a LaTeX
latex_table = latex_df.to_latex(index=False)
print(latex_table)

# Opcional: guardar en un archivo .tex
with open('table.tex', 'w') as f:
    f.write(r'\begin{landscape}' + '\n')
    f.write(r'\begin{longtable}{lllll}' + '\n')  # Define las columnas de la tabla
    f.write(r'\caption{Tabla simplificada con columnas clave}\label{tab:my_table} \\ ' + '\n')
    f.write(r'\toprule' + '\n')
    f.write(r'Paper title & Publication Year & Method type & Architecture type & Base model \\ ' + '\n')
    f.write(r'\midrule' + '\n')
    f.write(r'\endfirsthead' + '\n')
    f.write(r'\toprule' + '\n')
    f.write(r'Paper title & Publication Year & Method type & Architecture type & Base model \\ ' + '\n')
    f.write(r'\midrule' + '\n')
    f.write(r'\endhead' + '\n')
    f.write(r'\bottomrule' + '\n')
    f.write(r'\endfoot' + '\n')
    f.write(latex_table + '\n')  # Aquí se inserta el contenido de la tabla
    f.write(r'\end{longtable}' + '\n')
    f.write(r'\end{landscape}' + '\n')

\begin{tabular}{lllll}
\toprule
Paper title & Publication Year & Method_type & Architecture_type & Base_model \\
\midrule
\cite{alnegheimishM2ADMultiSensorMultiSystem2025} & 2025 & Forecasting-based & Direct model & LSTM \\
\cite{yanUnsupervisedLearningMachinery2024} & 2024 & Reconstruction & Autoencoder & MLP \\
\cite{namBreakingTimeFrequencyGranularity2024} & 2024 & Reconstruction & Direct model & Transformer \\
\cite{milkovicFRAnomalyFlowbasedRapid2024} & 2024 & Representation & Normalizing Flow & CNN \\
\cite{miaoReconstructionbasedAnomalyDetection2024} & 2024 & Reconstruction & GAN & Transformer \\
\cite{leeExplainableTimeSeries2024} & 2024 & Representation & Autoencoder & MLP \\
\cite{najafiAttentionAutoencoderHybrid2024} & 2024 & Forecasting-based & Autoencoder & Transformer \\
\cite{kangFaultAnomalyDetection2024} & 2024 & Representation & Custom Transformer & Transformer \\
\cite{yangSelfSupervisedLearningSignal2023} & 2023 & Reconstruction & Autoencoder & Transformer \\
\cite{