# Load and Prepera Dataset

In [1]:
import os
import pandas as pd
import numpy as np

Load dataset, including metadata info on the cavity frequency and length of the run.

In [2]:
def load_dataset(file):
    meta = pd.read_excel(file, sheet_name=0, header=None)
    freq = pd.read_excel(file, sheet_name=1)              # frequecies
    fft  = pd.read_excel(file, sheet_name=2)              # power
    
    data = pd.DataFrame({'freq':freq[1]})
    
    col = 0
    for col_name in fft.columns: # load all the subruns
        if col > 0:
            data[f'fft{col-1}'] = fft[col_name]
        col += 1
    
    #cavity frequency and number of files in each slice
    center = meta[1][3]
    length = meta[1][8]
    
    return data, center, length

Prepare data to be analyzed:
- Select only a 200 bins window around the cavity frequency
- Rescale data to yottowatt: in general, the average measured power should be known and equal to the noise temperature of the system, so we can rescale the data so that the power at the cavity frequency is $T_{noise} \cdot k_B \cdot \Delta\nu_{bin}$ $[W]$
- Compute weights, i.e. the errors associated to each bin; the error is assumed to be equal to $y_{bin}$. An ulterior term $\frac{1}{\sqrt{N}}$ is added as the bin values are obtained as the average over $N = 2731 \cdot length$ runs

In [3]:
def prep_data(alldata, center, mean=False, subrun=0, length=500, bin_width=651, nbins=100):
    
    N = length*2731 #N=1365500 if length=500
    
    # select window of 2*nbins bins around center
    # default is to select 200 bins of 651 Hz
    mask = ((alldata["freq"] > center - bin_width*nbins) &
            (alldata["freq"] < center + bin_width*nbins))
    cavdata = alldata[mask].reset_index(drop = True)    
    
    
    freq = cavdata["freq"]
    if mean:
        fft = cavdata.iloc[:, 1:].mean(axis=1)
    else:
        fft = cavdata[f'fft{subrun}']
    
    # Scale data to yottowat
    # In general, the average measured power should be known and equal to the noise temperature of the system.
    # So we can rescale the data so that the power at the cavity frequency is T_noise k_b B (W)
    minW = np.min(fft) # minimum power in the cavity
    ref = minW**(-1) * 3.5*1.38e-23*651/1e-24
    fft = ref * fft  # y' 
  
    # set weights
    weights = calc_weights(fft, N)  # -> y'/sqrt(N)
    #weights = np.sqrt(ref)*np.sqrt(cavdata[f'fft{subrun}'])/np.sqrt(N)  #-> sqrt(sigma'/N) = ref*sqrt(y/N)
    
    return freq, fft, weights, ref, N

In [4]:
def calc_weights(data, N=1365500):
    weights = data/np.sqrt(N) # -> y'/sqrt(N)
    return weights

## Load Runs

In [6]:
def list_files(path):
    listFile=[]
    for root, dirs, files in os.walk(path):
        for file in files:
            listFile.append(os.path.join(root, file))
    return listFile

def Load(run=None, path='db/', subrun=0, bin_width=651, nbins=100):
    
    if run is None:
        # get list of all the files in the directory
        file_list = list_files(path)
        mean = True
    else:
        # get specific run
        file_list = [f'{path}AnalyzedDataFFT_Run_{run}_sliced.xlsx']
        mean = False
        
    InfoDataset = []
    for file in file_list:
        
        data, center, length = load_dataset(file)
        freq, fft, weights, ref, N = prep_data(data, center, mean=mean, subrun=subrun,
                                               length=length, bin_width=bin_width, nbins=nbins)
        
        Info = {"name":file, "length":length, "center":center, "ref":ref,
                "freq":freq.values, "fft":fft.values, "weights":weights.values} 
        InfoDataset.append(Info)
            
    return InfoDataset