# Import libraries

In [1]:
import sys
from pathlib import Path
sys.path.insert(0,'..')
import glob
#!pip install openpyxl

In [2]:
import os
import sys
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import h5py

from functools import partial
from multiprocessing import Pool

# sigmon env: /eos/project/l/lhcsm/public/1.5.20.sh
from lhcsmapi.Time import Time
from lhcsmapi.metadata.SignalMetadata import SignalMetadata
from lhcsmapi.pyedsl.dbsignal.post_mortem.PmDbRequest import PmDbRequest
from lhcsmapi.analysis.RbCircuitQuery import RbCircuitQuery

from src.acquisitions.current_voltage_diode_leads_nxcals import CurrentVoltageDiodeLeadsNXCALS
from src.acquisitions.current_voltage_diode_leads_pm import CurrentVoltageDiodeLeadsPM
from src.acquisitions.ee_t_res_pm import EETResPM
from src.acquisitions.ee_u_dump_res_pm import EEUDumpResPM
from src.acquisitions.leads import Leads
from src.acquisitions.pc_pm import PCPM
from src.acquisitions.qh_pm import QHPM
from src.acquisitions.voltage_logic_iqps import VoltageLogicIQPS
from src.acquisitions.voltage_logic_nqps import VoltageNQPS
from src.acquisitions.voltage_nxcals import VoltageNXCALS

from src.utils.utils import log_acquisition
from src.utils.hdf_tools import acquisition_to_hdf5

# Read the (clean) MP3 file

In [3]:
mp3_fpa_df_raw = pd.read_excel("./RB_TC_extract_2021_11_22.xlsx")
len(mp3_fpa_df_raw)

4409

In [4]:
# First row contains units, 9 rows contain only "Before Notebooks" and "After Notebooks" information, which need to be droped
mp3_fpa_df = mp3_fpa_df_raw.dropna(subset = ['Date (FGC)', 'Circuit Name'])
mp3_fpa_df_raw.iloc[~mp3_fpa_df_raw.index.isin(mp3_fpa_df.index)] #show droped rows

Unnamed: 0,Circuit Name,Circuit Family,Period,Date (FGC),Time (FGC),FPA Reason,Timestamp_PIC,Delta_t(FGC-PIC),Delta_t(EE_even-PIC),Delta_t(EE_odd-PIC),...,Outer cable type,I_Q_SM18,dI_Q_Acc,dI_Q_LHC,Comment,Analysis performed by,lhcsmapi version,lhcsm notebook version,Unnamed: 46,Unnamed: 47
0,,RB,,2008-04-01 00:00:00,13:14:15,,,[ms],[ms],[ms],...,,[A],[A],[A],,,,,,
1,RB.A12,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
169,RB.A23,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
264,RB.A34,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
545,RB.A45,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
897,RB.A56,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1146,RB.A67,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1268,RB.A78,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1430,RB.A81,RB,Before Notebooks,,,,,,,,...,,,,,,,Before Notebooks,,,
1581,,RB,After Notebooks,,,,,,,,...,,,,,,,After Notebooks,,,


# Find FGC timestamp for each FPA event in MP3 excel

In [5]:
def find_real_fgc_timestamp(circuit_name, fgc_datetime):
    
    fgc_timestamp = Time.to_unix_timestamp(fgc_datetime)
    metadata_fgc = SignalMetadata.get_circuit_signal_database_metadata(
            'RB', circuit_name, 'PC', 'PM', timestamp_query=fgc_timestamp)

    one_sec_in_ns = 1e9 
    start_time = fgc_timestamp - one_sec_in_ns
    end_time = fgc_timestamp + one_sec_in_ns
 
    source_timestamp_fgc = PmDbRequest.find_events(metadata_fgc['source'],
                                                   metadata_fgc['system'],
                                                   metadata_fgc['className'],
                                                   t_start=start_time,
                                                   t_end=end_time)

    return [(circuit_name, el[1]) for el in source_timestamp_fgc]

# getting the list of timestamps
def get_fgc_timestamp(d):
    date_time_str = f"{d['Date (FGC)']} {d['Time (FGC)']}".replace("00:00:00 ","")
    
    real_fgc_timestamps = find_real_fgc_timestamp(d['Circuit Name'], date_time_str)
    if len(real_fgc_timestamps) > 1:
        print(f"Found more than one event for timestamp {date_time_str}")
    elif not real_fgc_timestamps:
        print(f"Found no event for timestamp {date_time_str}")
        return None
    _, real_fgc_timestamp = real_fgc_timestamps[0]
    return int(real_fgc_timestamp)

In [6]:
mp3_fpa_df['timestamp_fgc'] = mp3_fpa_df.apply(get_fgc_timestamp, axis=1)

Found no event for timestamp 2008-08-19 12:13:28
Found no event for timestamp 2008-08-19 18:53:12
Found no event for timestamp 2008-08-19 20:15:07
Found no event for timestamp 2008-08-19 20:15:07
Found no event for timestamp 2008-08-20 18:41:36
Found no event for timestamp 2008-08-22 09:59:25
Found no event for timestamp 2008-08-22 09:59:25
Found no event for timestamp 2008-08-22 09:59:25
Found no event for timestamp 2008-07-08 08:30:10
Found no event for timestamp 2008-07-08 14:39:16
Found no event for timestamp 2008-07-09 17:14:45
Found no event for timestamp 2008-07-11 14:42:00
Found no event for timestamp 2008-07-15 21:20:30
Found no event for timestamp 2008-07-15 21:20:30
Found no event for timestamp 2008-07-15 21:20:30
Found no event for timestamp 2008-07-15 21:20:30
Found no event for timestamp 2008-07-15 21:20:30
Found no event for timestamp 2008-08-09 02:19:48
Found no event for timestamp 2008-09-07 17:34:03
Found no event for timestamp 2009-12-04 11:19:48
Found no event for t

Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-07 17:53:50
Found no event for timestamp 2008-05-09 15:13:38
Found no event for timestamp 2008-05-09 15:13:38
Found no event for timestamp 2008-05-09 15:13:38
Found no event for timestamp 2008-05-09 15:13:38
Found no event for timestamp 2008-05-15 19:43:14
Found no event for timestamp 2008-05-15 19:43:14
Found no event for timestamp 2008-05-16 17:22:27
Found no event for timestamp 2008-05-16 17:22:27
Found no event for timestamp 2008-05-16 17:22:27
Found no event for timestamp 2008-05-16 17:22:27
Found no event for timestamp 2008-05-19 12:34:10
Found no event for timestamp 2008-05-19 12:34:10
Found no event for timestamp 2008-05-19 12:34:10
Found no event for timestamp 2008-05-19 12:34:10
Found no event for t

Found no event for timestamp 2021-06-15 00:10:53.600000
Found no event for timestamp 2021-06-15 00:10:53.600000
Found no event for timestamp 2021-06-15 00:10:53.600000
Found no event for timestamp 2021-06-15 00:10:53.600000
Found no event for timestamp 2021-06-15 00:10:53.600000
Found no event for timestamp 2021-06-19 00:25:03.400000
Found no event for timestamp 2021-06-19 00:25:03.400000
Found no event for timestamp 2021-06-19 00:25:03.400000
Found no event for timestamp 2021-06-19 00:25:03.400000
Found no event for timestamp 2021-06-19 00:25:03.400000
Found no event for timestamp 2021-06-19 00:25:03.400000



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# Find missing fgc timestamps in mp3 file

In [7]:
mp3_fpa_df_primary_missing = mp3_fpa_df[(mp3_fpa_df.timestamp_fgc.isna()) & (mp3_fpa_df['Nr in Q event']==1)]
print(f"missing primary events: {len(mp3_fpa_df_primary_missing)}")
mp3_fpa_df_primary_missing.Period.value_counts()

missing primary events: 88


HWC 2008    75
HWC 2007     6
HWC 2021     4
HWC 2015     2
HWC 2009     1
Name: Period, dtype: int64

In [8]:
def get_fgc_timestamp_missing(d):
    date_time_str = f"{d['Date (FGC)']} {d['Time (FGC)']}".replace("00:00:00","")
    for t in range(6,24):
        date_time_str_new = date_time_str.replace(" 00:",f" {t}:")
        real_fgc_timestamps = find_real_fgc_timestamp(d['Circuit Name'], date_time_str_new)
        if real_fgc_timestamps:
            print(f"Found event for timestamp {date_time_str_new}")
            _, real_fgc_timestamp = real_fgc_timestamps[0]
            return int(real_fgc_timestamp)

In [9]:
mp3_fpa_df_primary_missing['timestamp_fgc'] = mp3_fpa_df_primary_missing.apply(get_fgc_timestamp_missing, axis=1)

Found event for timestamp 2021-05-22  8:43:20.100000
Found event for timestamp 2021-05-22  9:58:40.300000
Found event for timestamp 2021-06-15  14:10:53.600000
Found event for timestamp 2021-06-19  7:25:03.400000



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [10]:
found_fgc_timestamps_df = mp3_fpa_df_primary_missing["timestamp_fgc"].dropna()
mp3_fpa_df.loc[found_fgc_timestamps_df.index, "timestamp_fgc"] = mp3_fpa_df_primary_missing["timestamp_fgc"].dropna().values

mp3_fpa_df_primary_missing = mp3_fpa_df[(mp3_fpa_df.timestamp_fgc.isna()) & (mp3_fpa_df['Nr in Q event']==1)]
print(f"missing primary events: {len(mp3_fpa_df_primary_missing)}")
mp3_fpa_df_primary_missing.Period.value_counts()

missing primary events: 84



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


HWC 2008    75
HWC 2007     6
HWC 2015     2
HWC 2009     1
Name: Period, dtype: int64

In [11]:
# dropping duplicates of timestamps
print(f"{len(mp3_fpa_df['timestamp_fgc'])} mp3 excel file entries")
mp3_fpa_df_unique = mp3_fpa_df.drop_duplicates(subset=['timestamp_fgc', 'Circuit Name']) ### you need timestamp + circuit !!! 

print(f"{len(mp3_fpa_df_unique)} unique fgc events") # 841 Primary quenches?
print(f"{len(mp3_fpa_df[mp3_fpa_df['Nr in Q event']==1])} mp3 primary quench entries") # 834 primary quenches with correct notes

4399 mp3 excel file entries
845 unique fgc events
918 mp3 primary quench entries


# Prepare csv for marvins notebooks

In [12]:
mp3_fpa_df["Date (FGC)"].iloc[0].strftime('%Y%m%d')

'20080819'

In [13]:
#mp3_fpa_df#[mp3_fpa_df["Date (FGC)"]=="2021-03-08 00:00:00"]
import datetime
mp3_fpa_df[mp3_fpa_df["Period"]=="HWC 2021"]["Date (FGC)"].iloc[0]
mp3_fpa_df[mp3_fpa_df["Date (FGC)"]==datetime.datetime(2021, 3, 28, 0, 0)]

def format_date(d):
    return str(d).replace(" 00:00:00","")
mp3_fpa_df["Date (FGC)"] = mp3_fpa_df["Date (FGC)"].apply(format_date)

mp3_fpa_df.loc[1691:1695].to_csv("RB.A78_FPA-2021-03-28-22h09.csv")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# Select events to download

In [14]:
# Time.to_string(1514761200000000000), Time.to_unix_timestamp('2018-01-01 00:00:00+01:00')
mp3_fpa_df_period = mp3_fpa_df_unique[mp3_fpa_df_unique['timestamp_fgc'] >= 1388530800000000000].reset_index(drop=True) 
len(mp3_fpa_df_period)

828

In [15]:
# use columns where download is not complete
context_path = Path('/eos/project/m/ml-for-alarm-system/private/RB_signals/context_data.csv')
if os.path.exists(context_path):
    df_context = pd.read_csv(context_path)
    dowloaded_fgc_ts = df_context[df_context.download_complete == True].timestamp_fgc.values

    mp3_fpa_df_to_download = mp3_fpa_df_period[~mp3_fpa_df_period.timestamp_fgc.isin(dowloaded_fgc_ts)]
else:
    mp3_fpa_df_to_download = mp3_fpa_df_period
len(mp3_fpa_df_to_download)

Columns (241) have mixed types.Specify dtype option on import or set low_memory=False.


282

In [16]:
signal_groups = [CurrentVoltageDiodeLeadsPM, CurrentVoltageDiodeLeadsNXCALS, EETResPM, EEUDumpResPM, Leads, PCPM, QHPM, VoltageLogicIQPS, VoltageNQPS, VoltageNXCALS]

file_dir = Path('/eos/project/m/ml-for-alarm-system/private/RB_signals')

In [17]:
# what is wrong with this one?
mp3_fpa_df_to_download.drop( mp3_fpa_df_to_download[mp3_fpa_df_to_download.timestamp_fgc == 1423519110160000000].index, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [2]:
for index, row in mp3_fpa_df_to_download.iterrows():
    fpa_identifier = {'circuit_type': row['Circuit Family'],
                      'circuit_name': row['Circuit Name'],
                      'timestamp_fgc': int(row['timestamp_fgc'])}

    for signal_group in signal_groups:
        group = signal_group(**fpa_identifier, spark=spark)
        acquisition_to_hdf5(acquisition=group, file_dir=file_dir)
        
    log_acquisition(identifier=fpa_identifier, log_data={"download_complete": True}, log_path=context_path)

NameError: name 'mp3_fpa_df_to_download' is not defined

In [20]:
context_path

PosixPath('/eos/project/m/ml-for-alarm-system/private/RB_signals/context_data.csv')

In [22]:
log_acquisition(identifier=fpa_identifier, log_data={"download_complete": False}, log_path=context_path)

Columns (241) have mixed types.Specify dtype option on import or set low_memory=False.
