# Import libraries

In [1]:
import sys
from pathlib import Path
sys.path.insert(0,'..')
import glob
#!pip install openpyxl

In [2]:
import os
import sys
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
import h5py

from functools import partial
from multiprocessing import Pool

from lhcsmapi.Time import Time
from lhcsmapi.metadata import signal_metadata
from lhcsmapi.pyedsl.dbsignal.post_mortem.PmDbRequest import PmDbRequest
from lhcsmapi.analysis.RbCircuitQuery import RbCircuitQuery

from src.acquisitions.current_voltage_diode_leads_nxcals import CurrentVoltageDiodeLeadsNXCALS
from src.acquisitions.current_voltage_diode_leads_pm import CurrentVoltageDiodeLeadsPM
from src.acquisitions.ee_t_res_pm import EETResPM
from src.acquisitions.ee_u_dump_res_pm import EEUDumpResPM
from src.acquisitions.leads import Leads
from src.acquisitions.pc_pm import PCPM
from src.acquisitions.qh_pm import QHPM
from src.acquisitions.voltage_logic_iqps import VoltageLogicIQPS
from src.acquisitions.voltage_nqps import VoltageNQPS
from src.acquisitions.voltage_nxcals import VoltageNXCALS

from src.utils.utils import log_acquisition
from src.utils.hdf_tools import acquisition_to_hdf5, load_from_hdf_with_regex
from src.utils.mp3_excel_processing import get_fgc_timestamp, get_fgc_timestamp_missing

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
Deprecated in NumPy 1.20

# Read the (clean) MP3 file

In [3]:
mp3_fpa_df_raw = pd.read_excel("../data/RB_TC_extract_2021_11_22.xlsx")
len(mp3_fpa_df_raw)

4409

In [4]:
# First row contains units, 9 rows contain only "Before Notebooks" and "After Notebooks" information, which need to be droped
mp3_fpa_df = mp3_fpa_df_raw.dropna(subset = ['Date (FGC)', 'Circuit Name'])
mp3_fpa_df_raw.iloc[~mp3_fpa_df_raw.index.isin(mp3_fpa_df.index)] #show droped rows

Unnamed: 0,Circuit Name,Circuit Family,Period,Date (FGC),Time (FGC),FPA Reason,Timestamp_PIC,Delta_t(FGC-PIC),Delta_t(EE_even-PIC),Delta_t(EE_odd-PIC),...,Outer cable type,I_Q_SM18,dI_Q_Acc,dI_Q_LHC,Comment,Analysis performed by,lhcsmapi version,lhcsm notebook version,Unnamed: 46,Unnamed: 47
0,,RB,,2008-04-01 00:00:00,13:14:15,,,[ms],[ms],[ms],...,,[A],[A],[A],,,,,,
1,RB.A12,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
169,RB.A23,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
264,RB.A34,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
545,RB.A45,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
897,RB.A56,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1146,RB.A67,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1268,RB.A78,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1430,RB.A81,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1581,,RB,After Notebooks,,,,,,,,...,,,,,,,After Notebooks,,,


# Find FGC timestamp for each FPA event in MP3 excel

In [5]:
mp3_fpa_df['timestamp_fgc'] = mp3_fpa_df.apply(get_fgc_timestamp, axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# Find missing fgc timestamps in mp3 file

In [6]:
# some fgc timestamps have wrong hours
mp3_fpa_df_primary_missing = mp3_fpa_df[(mp3_fpa_df.timestamp_fgc.isna()) & (mp3_fpa_df['Nr in Q event'] == 1)]
mp3_fpa_df_primary_missing['timestamp_fgc'] = mp3_fpa_df_primary_missing.apply(get_fgc_timestamp_missing, axis=1)
found_fgc_timestamps_df = mp3_fpa_df_primary_missing["timestamp_fgc"].dropna()
mp3_fpa_df.loc[found_fgc_timestamps_df.index, "timestamp_fgc"] = mp3_fpa_df_primary_missing["timestamp_fgc"].dropna().values


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [7]:
print(f"{len(mp3_fpa_df)} unique fgc events") # 841 Primary quenches?
print(f"{len(mp3_fpa_df[mp3_fpa_df['Nr in Q event']==1])} mp3 primary quench entries") # 834 primary quenches with correct notes

4399 unique fgc events
918 mp3 primary quench entries


# Select events to download

In [8]:
mp3_fpa_df_unique = mp3_fpa_df.drop_duplicates(subset=['timestamp_fgc', 'Circuit Name'])

In [9]:
# Time.to_string(1514761200000000000), Time.to_unix_timestamp('2018-01-01 00:00:00+01:00')
mp3_fpa_df_period = mp3_fpa_df_unique[mp3_fpa_df_unique['timestamp_fgc'] >= 1388530800000000000].reset_index(drop=True) 
len(mp3_fpa_df_period)

828

In [10]:
signal_groups = [PCPM, VoltageNXCALS, VoltageNQPS, VoltageLogicIQPS, EEUDumpResPM, QHPM]
file_dir = Path('/eos/project/m/ml-for-alarm-system/private/RB_signals')

## Test query

In [11]:
context_path = file_dir / "data_test"
fpa_identifier = {'circuit_type': 'RB',
                 'circuit_name': 'RB.A78',
                 'timestamp_fgc': 1616962174400000000}

In [None]:
for signal_group in signal_groups:
    group = signal_group(**fpa_identifier, spark=spark)
    acquisition_to_hdf5(acquisition=group, 
                        file_dir=file_dir, 
                        context_dir_name="context_test",
                        failed_queries_dir_name="failed_test",
                        data_dir_name="data_test")
        
log_acquisition(identifier=fpa_identifier, log_data={"download_complete": True}, log_path=context_path)

## Load data

In [None]:
file_name = f"{fpa_identifier['circuit_type']}_{fpa_identifier['circuit_name']}_{fpa_identifier['timestamp_fgc']}.hdf5"
file_path = file_dir / Path('data_test') / file_name

signals = ['I_MEAS','VoltageNQPS.*U_DIODE','VoltageNXCALS.*U_DIODE','I_EARTH_PCNT', 'IEARTH.I_EARTH','U_QS0','U_1','U_2','I_HDS', 'U_HDS','EEUDumpResPM']
len(signals)

In [None]:
data = load_from_hdf_with_regex(file_path)

In [None]:
fig = plt.figure(figsize=(20, 15))
for i, s in enumerate(signals):
    fig.add_subplot(4, 3, i+1)
    n_signals = 0
    for df in data:
        if bool(re.search(s, df.columns.values[0])):
            plt.plot(df.index.values, df.values)
            n_signals += 1
    plt.title(f"{s} ({n_signals})")
plt.show()

## Query all FGC events

In [None]:
for index, row in mp3_fpa_df_period.iterrows():
    fpa_identifier = {'circuit_type': row['Circuit Family'],
                      'circuit_name': row['Circuit Name'],
                      'timestamp_fgc': int(row['timestamp_fgc'])}

    for signal_group in signal_groups:
        group = signal_group(**fpa_identifier, spark=spark)
        acquisition_to_hdf5(acquisition=group, 
                            file_dir=file_dir)

    log_acquisition(identifier=fpa_identifier, log_data={"download_complete": True}, log_path=context_path)