# Basic Information

### Interferometer:
Livingston
### Time Frame:
Jan 01, 2017, 00:00:00 - Dec 31, 2017, 23:59:59
### Channels Group:
LSC
### Selected Channels: 
LSC:DARM_OUT_DQ

LSC:MCL_IN1_DQ

LCS:MCL_OUT_DQ

LSC:MICH_IN1_DQ

LSC:MICH_OUT_DQ

LSC:POP_A_LF_OUT_DQ

LSC:POP_A_RF45_I_ERR_DQ
### Glitches Under Study:
Blip

Koi Fish

In [5]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

If any of the libraries above didn't work you can run the command below for a native install within Jupyter Notebooks:

~~~
import sys
!{sys.executable} -m pip install <library_name> --user
~~~

Alternately - if you already have them downloaded but still didn't work - you can try to update them:

~~~
import sys
!{sys.executable} -m pip install --upgrade <library_name>
~~~

### Data Pre-Processing

df: contains the glitches from the selected channels under study for the given time range

calib: contains the glitches caught on CALIB_STRAIN on the same time range

In [32]:
df = pd.read_csv("trigger_data_2017.csv")
df = df[df["ifo"] == 'L1']
df["event_id"] = [int(s.split("sngl_burst:event_id:")[1]) for s in df["event_id"]]
df = df.drop({"process_id", "confidence", "chisq", "chisq_dof"}, axis=1)

calib = pd.read_csv("calib_strain_data.csv")
calib = calib[calib["ifo"] == 'L1']
calib["event_id"] = calib["event_id"].astype(int) 
calib = calib[(calib["label"] == 'Blip') | (calib["label"] == 'Koi_Fish')]
calib = calib[(calib["start_time_ns"] >= np.min(df["start_time_ns"])) & (calib["start_time_ns"] <= np.max(df["start_time_ns"]))]
calib = calib.drop({'sample_type', 'url1', 'url2', 'url3', 'url4', "process_id", "confidence", "chisq", "chisq_dof"}, axis=1)
calib = calib.sort_values(by=['start_time'], ascending=True)

In [44]:
calib.describe()

Unnamed: 0,event_time,peak_time,peak_time_ns,start_time,start_time_ns,duration,event_id,peak_frequency,central_freq,bandwidth,amplitude,snr,param_one_value
count,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0,556.0
mean,1131695000.0,1131695000.0,540079800.0,1131695000.0,436061900.0,0.837493,255.100719,198.396599,1389.274392,2712.641907,1.394422e-20,71.728759,-0.099043
std,3132823.0,3132823.0,263006900.0,3132823.0,273414400.0,1.034087,1423.208786,119.466614,1216.155653,2444.914976,5.054419e-20,82.207811,1.850619
min,1126410000.0,1126410000.0,976085.0,1126410000.0,0.0,0.03125,0.0,30.68652,83.4133,102.826599,9.150000000000001e-23,8.55515,-3.14013
25%,1128818000.0,1128818000.0,315429000.0,1128818000.0,218750000.0,0.1875,25.0,111.128151,377.964439,668.939667,2.7425e-22,19.198432,-1.78081
50%,1131752000.0,1131752000.0,553222400.0,1131752000.0,437500000.0,0.35864,55.5,170.6539,968.817474,1865.503479,8.22e-22,35.764845,-0.34394
75%,1134386000.0,1134386000.0,768676200.0,1134386000.0,656250000.0,1.25,92.0,262.064606,2217.773499,4384.390015,4.5725e-21,83.537041,1.564468
max,1137247000.0,1137247000.0,998291000.0,1137247000.0,961914100.0,7.375,16981.0,1166.041504,4001.470459,7944.340332,8.0199999999999995e-19,463.239899,3.13277


In [43]:
df.describe()

Unnamed: 0,year,peak_time,peak_time_ns,start_time,start_time_ns,duration,event_id,peak_frequency,central_freq,bandwidth,amplitude,snr,param_one_value
count,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0,672.0
mean,2017.0,1170608000.0,523491600.0,1170608000.0,344285100.0,14.803593,829.089286,526.784536,2307.247566,4600.334758,331.046595,13531.892797,-0.087317
std,0.0,18943580.0,306291500.0,18943580.0,318064800.0,17.400419,4176.128242,1592.505344,1514.788011,3026.300969,2090.252608,61638.427946,1.811026
min,2017.0,1135689000.0,61035.0,1135689000.0,0.0,0.4375,0.0,4.58109,17.30175,23.950951,0.0,1005.351013,-3.12912
25%,2017.0,1165061000.0,249755700.0,1165061000.0,0.0,5.25,18.0,24.547934,489.835114,970.523193,0.209815,1320.29773,-1.631302
50%,2017.0,1179614000.0,554687000.0,1179614000.0,324218500.0,9.796875,52.0,34.753849,2555.064209,5098.607422,0.878879,1953.573791,-0.155855
75%,2017.0,1183225000.0,768310100.0,1183225000.0,620239100.0,16.203125,310.25,79.938129,3900.092773,7792.185547,3.728187,3612.211181,1.48024
max,2017.0,1194682000.0,998047100.0,1194682000.0,979491900.0,167.972656,64195.0,7715.249512,4015.164551,7792.185547,23383.40039,935976.9375,3.13934


### Data Exploration

#### 1) Amplitude for top 10% loudest glitches on calib_strain vs. studied channels

In [46]:
fig = go.Figure()
x = 0.1 # snr cutoff (get x% loudest, i.e. 0.1 to get 10% loudest glitches)
snr_list = np.sort(calib["snr"])
snr_cutoff = (snr_list[int((1-x)*len(snr_list)):])[0] # find cutoff point
hist_calib = go.Histogram(x=calib[calib["snr"] >= snr_cutoff]["amplitude"], name="Calib Strain")
fig.add_trace(hist_calib)

snr_list = np.sort(df["snr"])
snr_cutoff = (snr_list[int((1-x)*len(snr_list)):])[0] # find cutoff point
hist_df = go.Histogram(x=df[df["snr"] >= snr_cutoff]["amplitude"], name="Studied Channels")
fig.add_trace(hist_df)

fig.update_layout(title=f"Amplitude Distribution CALIB_STRAIN vs. Studied Channels ({100*x}% loudest glitches)")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
feature_to_plot = 'amplitude'
x = 0.9 # snr cutoff

range_x = 5e-19
fig = go.Figure()

# Calib_Strain Glitches
df_g = calib[['label'] == g]
snr_list = np.sort(df_g["snr"])
snr_cutoff = (snr_list[int(x*len(snr_list)):])[0] # find 'x'th percentile cutoff point
hist = go.Histogram(x=df_g[(df_g["snr"] >= snr_cutoff) & (df_g[feature_to_plot] <= range_x)][feature_to_plot], nbinsx=20, name=str(g))
fig.add_trace(hist)

    
fig.update_layout(title=f"{glitches} {feature_to_plot}, range: [0, {range_x}], {100-(100*x)}% Loudest SNRs only")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

range_x = 5e-21
fig = go.Figure()
for g in glitches:
    df_g = df[df['label'] == g]
    snr_list = np.sort(df_g["snr"])
    snr_cutoff = (snr_list[int(x*len(snr_list)):])[0] # find 'x'th percentile cutoff point
    hist = go.Histogram(x=df_g[(df_g["snr"] >= snr_cutoff) & (df_g[feature_to_plot] <= range_x)][feature_to_plot], nbinsx=50, name=str(g))
    fig.add_trace(hist)
fig.update_layout(title=f"{glitches} {feature_to_plot}, range: [0, {range_x}], {100-(100*x)}% Loudest SNRs only")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

In [None]:
glitches = ['Blip', 'Koi_Fish']
feature_to_plot = 'amplitude'
x = 0.9 # snr cutoff

range_x = 5e-19
fig = go.Figure()
for g in glitches:
    df_g = df[df['label'] == g]
    snr_list = np.sort(df_g["snr"])
    snr_cutoff = (snr_list[int(x*len(snr_list)):])[0] # find 'x'th percentile cutoff point
    hist = go.Histogram(x=df_g[(df_g["snr"] >= snr_cutoff) & (df_g[feature_to_plot] <= range_x)][feature_to_plot], nbinsx=20, name=str(g))
    fig.add_trace(hist)
fig.update_layout(title=f"{glitches} {feature_to_plot}, range: [0, {range_x}], {100-(100*x)}% Loudest SNRs only")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

range_x = 5e-21
fig = go.Figure()
for g in glitches:
    df_g = df[df['label'] == g]
    snr_list = np.sort(df_g["snr"])
    snr_cutoff = (snr_list[int(x*len(snr_list)):])[0] # find 'x'th percentile cutoff point
    hist = go.Histogram(x=df_g[(df_g["snr"] >= snr_cutoff) & (df_g[feature_to_plot] <= range_x)][feature_to_plot], nbinsx=50, name=str(g))
    fig.add_trace(hist)
fig.update_layout(title=f"{glitches} {feature_to_plot}, range: [0, {range_x}], {100-(100*x)}% Loudest SNRs only")
fig.update_layout(barmode='overlay')
fig.update_traces(opacity=0.75)
fig.show()

### Channel Coupling with CALIB_STRAIN

In [23]:
tol = 10 # delay tolerance in millisecs
tol *= 1e6 # transfrom millisecs -> nanosecs
matched_time = list()
count = 0
for i in range(df.shape[0]): # for every glitch in the channels being analysed
    g = df.iloc[i] # specific trigger to be compared with calib_strain
    for time in calib["start_time_ns"]:
        if np.abs(g["start_time_ns"] - time) <= tol:  # difference in time recorded on both channels is within tolerance
            count += 1
            matched_time.append(g["start_time_ns"])
            break # no need to continue searching if a match was already found
print(f"{count} glitches ({np.round(100*count/len(df), 1)}%) were successfully matched")

match_df = df[df["start_time_ns"].isin(matched_time)]
match_df.head()

662 glitches (98.5%) were successfully matched


Unnamed: 0,year,month,ifo,peak_time,peak_time_ns,start_time,start_time_ns,duration,search,event_id,peak_frequency,central_freq,bandwidth,channel,amplitude,snr,param_one_name,param_one_value
0,2017,January,L1,1135689234,875000000,1135689227,250000000,12.75,Omicron,36,18.52425,3820.903076,7628.500977,LSC-DARM_OUT_DQ,1.61928,1051.424927,phase,1.67692
1,2017,January,L1,1135753175,375243902,1135753169,660156011,11.4043,Omicron,13,7715.249512,3901.419189,7789.533203,LSC-DARM_OUT_DQ,0.014877,309551.6563,phase,-1.51519
2,2017,January,L1,1135847011,179687023,1135847008,500000000,8.0,Omicron,14,26.94878,3820.726318,7628.85498,LSC-DARM_OUT_DQ,7.03537,1179.638794,phase,0.63051
3,2017,January,L1,1135848462,519531011,1135848460,0,5.5,Omicron,20,44.819481,3205.633057,6400.94873,LSC-DARM_OUT_DQ,9.14201,1888.194336,phase,-3.11213
4,2017,January,L1,1136015489,687500000,1136015474,750000000,15.25,Omicron,1,40.943748,420.936676,828.568237,LSC-DARM_OUT_DQ,0.819338,1300.982422,phase,0.58563


In [26]:
fig = px.bar(x=match_df["channel"].value_counts().values, y=match_df["channel"].value_counts().index, orientation='h')
fig.update_layout(title_text = f"Coupling for ASC channels with Calib_Strain", yaxis=dict(title_text="channel"), xaxis=dict(title_text="frequency of coupling", tick0=0, dtick=50.0))
fig.update_yaxes(showticklabels=True)

### Channel Coupling with Each Other

In [29]:
verbose = False

couples = dict()
for i in range(len(df)):
    channel = df.iloc[i]["channel"]
    time = df.iloc[i]["start_time"]
    coupled_channels = df[(df["start_time"] >= time - 1) & (df["start_time"] <= time + 1)]["channel"]
    coupled_channels = list(coupled_channels[coupled_channels != channel]) # remove couplings with itself
    if channel in couples.keys():
        curr_val = couples[channel]
        update_val = curr_val + coupled_channels
        couples[channel] = update_val
    else:
        couples[channel] = coupled_channels

if verbose:
    for k in couples.keys():
        print(f"{k}: {couples[k]}\n")

In [30]:
for k in couples.keys():
    series = pd.Series(couples[k])
    fig = px.bar(x=series.value_counts().values, y=series.value_counts().index, orientation='h')
    fig.update_layout(title_text = f"Coupling for {k}", yaxis=dict(title_text="coupled channels"), xaxis=dict(title_text="frequency of coupling", tick0=0, dtick=4.0))
    fig.update_yaxes(showticklabels=True)
    fig.show()