# One way to convert root files to dataframes


In [None]:
import numpy as np
import ROOT
import pickle
import pandas as pd

# Function to get number of entries

In [None]:
%%cpp -d
#include "../RootTreeStructs.h"
using namespace std;
int Entries(){
    int run = 3956;
//#    int probe = 0;
    TFile* filein = new TFile(Form("~/Desktop/FieldPlainRootOutput_0%d_tier1.root",run),"read");
    TDirectory * dTrolley = (TDirectory*)filein->Get("TreeGenTrolley");
    TTree* trolley = (TTree*)dTrolley->Get("trolley");
    int N = trolley->GetEntries();
    filein->Close();
    return N;
}

# Function to get basic trolley info

In [None]:
%%cpp -d
#include "../RootTreeStructs.h"
using namespace std;
void TrolleyInfo(double* f, double* p, double* fid_len, int run, int probe){
//  int run = 3956;
//  int probe = 0;
    TFile* filein = new TFile(Form("~/Desktop/FieldPlainRootOutput_0%d_tier1.root",run),"read");
    TDirectory * dTrolley = (TDirectory*)filein->Get("TreeGenTrolley");
    TTree* trolley = (TTree*)dTrolley->Get("trolley");
    gm2field::trolleyTimeStamp_t trolleyTimeStamp;
    gm2field::trolleyProbeFrequency_t trolleyProbeFrequency;
    gm2field::trolleyFieldMultipole_t trolleyFieldMultipole;
    gm2field::trolleyPosition_t trolleyPosition;
    trolley->SetBranchAddress("TimeStamp",&trolleyTimeStamp.GpsCycleStart);
    trolley->SetBranchAddress("ProbeFrequency",&trolleyProbeFrequency.Frequency);
    trolley->SetBranchAddress("FieldMultipole",&trolleyFieldMultipole.Multipole);
    trolley->SetBranchAddress("Position",&trolleyPosition.Phi);
    
    //#for FID length
    gm2field::trolleyProbeSignal_t trolleyProbeSignal;
    //#need to set the first one in the struct, which happens to be amplitude
    trolley->SetBranchAddress("ProbeSignal",&trolleyProbeSignal.Amplitude);
    
///////////////////////////////////////////////////////////////////////////////////
    int nAllReadings = trolley->GetEntries();
    for(int i=0;i<nAllReadings;i++){ //loop through trolley events
        trolley->GetEntry(i);
        f[i] = trolleyProbeFrequency.Frequency[probe][0]; 
        p[i] = trolleyPosition.Phi[probe][2];
        fid_len[i] = trolleyProbeSignal.FidLength[probe];
    }
    filein->Close();
    return;
}

# Function to get trolley FIDs

In [None]:
%%cpp -d
#include "../RootTreeStructs.h"
using namespace std;
void RawFid(double* raw_fid,  int run, int reading, int probe){
//  int run = 3956;
//  int probe = 0;
    TFile* filein = new TFile(Form("~/Desktop/FieldPlainRootOutput_0%d_tier1.root",run),"read");
    TDirectory * dMyTrolley = (TDirectory*)filein->Get("TrolleyWfExtraction");
    TTree* myTrolley = (TTree*)dMyTrolley->Get("trolleyWf");
    gm2field::trolleyFid_t trolleyFid;
    myTrolley->SetBranchAddress("trolleyFid", &trolleyFid.RawFid);
///////////////////////////////////////////////////////////////////////////////////
    int fidLength = 16000;
    int dataStartingPoint = probe*fidLength;    
    myTrolley->GetEntry(reading);                                
    for (int i=0; i<fidLength; i++) {
        raw_fid[i] = trolleyFid.RawFid[i];
    }
    filein->Close();
    return;
}

# Loop over all trolley probes for a given run list. 

* all info for each probe gets stored as a df.
* each probe df gets stored into a dict
* each dict is stored as file named by its run


In [14]:
#set up for the loop
n_entries = ROOT.Entries()
probe_list = [p for p in range(17)]
run_list = [3956]
all_data_frames = {}

for run in run_list:
    print("starting run",run)
    for probe in probe_list:
        print("starting probe",probe)
        freq = np.empty(n_entries)
        pos = np.empty(n_entries)
        fid_len = np.empty(n_entries)
        ROOT.TrolleyInfo(freq,pos,fid_len,run,probe)
        df = pd.DataFrame({'Frequency':freq, 
                           'Position':pos, 
                           'Fid_len': fid_len})
        all_data_frames[probe] = df
        
    with open("./output/TrolleyInfo_" + str(run) + ".pkl", "wb") as f:
        pickle.dump(all_data_frames, f)

starting run 3956
starting probe 0
starting probe 1
starting probe 2


# (How to load the pkl files)

To load the pkl files, you would do something like the following, wherein a run's pkl file, containing a dict, is loaded into "trolley"


```python
with open("output/TrolleyInfo_3956.pkl", "rb") as f:
    trolley = pickle.load(f)
```
to get the df for probe 6, for example, simply do

```python
trolley[6]
```

in general, not all probes may be stored. to get a list of probes, just do

```python
keys_list = [key for key in trolley.keys()]
```





# Show how to get the raw FIDs

In [None]:
raw_fid = np.empty(16000)
reading = 4
probe = 0
run = 3956
ROOT.RawFid(raw_fid,run,reading,probe)

In [None]:
plt.plot(raw_fid, 'o', color='black');

# Select specific raw FIDs based on their length

## First, medium-length FIDS

In [None]:
df.hist(column='FID_len',bins=300)

In [None]:
print(df.head(9))

In [None]:
df_med_fids = df[ (fid_len > 0.00599) & (fid_len < 0.006) ]
df_med_fids.hist(column='FID_len',bins=300)
med_event_list = df_med_fids.index.tolist()
print(med_event_list)

In [None]:
# df_med_fids['RawFid'] = ""
df_med_fids.loc[:,"RawFid"].astype(object,copy=False)
# print(df_med_fids)

In [None]:
probe = 0
run = 3956
for reading in med_event_list:
    raw_fid = np.empty(16000)
    ROOT.RawFid(raw_fid,run,reading,probe)
    df_med_fids.at[reading, 'RawFid'] = raw_fid

In [None]:
df_med_fids

## Second, medium-length FIDS

In [None]:
df_long_fids = df[ (fid_len > 0.00879) & (fid_len < 0.0088) ]
df_long_fids.hist(column='FID_len',bins=300)
long_event_list = df_long_fids.index.tolist()
print(long_event_list)

In [None]:
df_long_fids['RawFid'] = ""
df_long_fids['RawFid'].astype(object,copy=False)
print(df_long_fids)

In [None]:
probe = 0
run = 3956
for reading in long_event_list:
    raw_fid = np.empty(16000)
    ROOT.RawFid(raw_fid,run,reading,probe)
    df_long_fids.at[reading, 'RawFid'] = raw_fid
print(df_long_fids)

## short FIDs, and very short FIDs

In [None]:
df_short_fids = df[ (fid_len > 0.0039) & (fid_len < 0.004) ]
df_short_fids.hist(column='FID_len',bins=300)
short_event_list = df_short_fids.index.tolist()
print(short_event_list)
df_short_fids['RawFid'] = ""
df_short_fids['RawFid'].astype(object,copy=False)
print(df_short_fids)
probe = 0
run = 3956
for reading in short_event_list:
    raw_fid = np.empty(16000)
    ROOT.RawFid(raw_fid,run,reading,probe)
    df_short_fids.at[reading, 'RawFid'] = raw_fid
print(df_short_fids)

In [None]:
df_vshort_fids = df[ (fid_len > 0.001) & (fid_len < 0.002) ]
df_vshort_fids.hist(column='FID_len',bins=300)
vshort_event_list = df_vshort_fids.index.tolist()
# print(vshort_event_list)
df_vshort_fids['RawFid'] = ""
df_vshort_fids['RawFid'].astype(object,copy=False)
# print(df_vshort_fids)
probe = 0
run = 3956
for reading in vshort_event_list:
    raw_fid = np.empty(16000)
    ROOT.RawFid(raw_fid,run,reading,probe)
    df_vshort_fids.at[reading, 'RawFid'] = raw_fid
# print(df_vshort_fids)

## Compare random FIDs from the four groups

In [None]:
plt.plot(df_long_fids.loc[1963,"RawFid"], 'o', color='green',alpha=0.01);
plt.plot(df_med_fids.loc[1004,"RawFid"], 'o', color='yellow',alpha=0.01);
plt.plot(df_short_fids.loc[6130,"RawFid"], 'o', color='red', alpha=0.01);
plt.plot(df_vshort_fids.loc[5528,"RawFid"], 'o', color='black', alpha=0.01 );

# Look at FID length vs event number

In [None]:
df[['FID_len']].plot()