# Load and Prepera Dataset

In [1]:
import pandas as pd
import numpy as np

Load dataset, including metadata info on the cavity frequency and length of the run.

In [2]:
def load_dataset(run, path='db/'):
    file = f'{path}AnalyzedDataFFT_Run_{run}_sliced.xlsx'
    meta = pd.read_excel(file,sheet_name=0,header=None)
    freq = pd.read_excel(file,sheet_name=1)             # frequecies
    fft  = pd.read_excel(file,sheet_name=2)             # power
    
    data = pd.DataFrame({'freq':freq[1]})
    
    col = 0
    for col_name in fft.columns: # load all the subruns
        if col > 0:
            data[f'fft{col-1}'] = fft[col_name]
        col += 1
    
    #cavity frequency and number of files in each slice
    center = meta[1][3]
    length = meta[1][8]
    
    print(f'Dataset loaded: {file}\nCavity frequency: {center} Hz\nfft subrun:{len(data.columns)-1}')
    
    return data,center,length

Prepare data to be analyzed:
- Select only a 200 bins window around the cavity frequency
- Rescale data to yottowatt: in general, the average measured power should be known and equal to the noise temperature of the system, so we can rescale the data so that the power at the cavity frequency is $T_{noise} \cdot k_B \cdot \Delta\nu_{bin}$ $[W]$
- Compute weights, i.e. the errors associated to each bin; the error is assumed to be Poissonian, so they are computed as $\sqrt{y_{bin}}$. An ulterior term $\frac{1}{\sqrt{N}}$ is added as the bin values are obtained as the average over $N = 2731 \cdot length$ runs

In [3]:
def prep_data(alldata,center,subrun=0,length=500,bin_width=651,nbins=100):
    
    N = length*2731 #N=1365500 if length=500
    
    # select window of 2*nbins bins around center
    # default is to select 200 bins of 651 Hz
    mask = (alldata['freq']>center-bin_width*nbins) & (alldata['freq']<center+bin_width*nbins)
    cavdata = alldata[mask].reset_index()
    
    # scale data to yottowat
    minW = np.min(cavdata[f'fft{subrun}'].copy()) # minimum power in the cavity
    
    ref = minW**(-1) * 3.5*1.38e-23*651/1e-24 #It is possibile to add an extra contribute to make them integers
    cavdata[f'fft{subrun}'] = ref * cavdata[f'fft{subrun}']  # y' 
    
    # set weights
    weights = cavdata[f'fft{subrun}']/np.sqrt(N)      # -> y'/sqrt(N)
    #weights = np.sqrt(ref)*np.sqrt(cavdata[f'fft{subrun}'])/np.sqrt(N)  #-> sqrt(sigma'/N) = ref*sqrt(y/N)
    
    freq = cavdata['freq']
    fft = cavdata[f'fft{subrun}']
    
    return freq,fft,weights,ref