In [37]:
import re
import numpy as np
import pandas as pd

In [2]:
def ProcessInputFile(file):
    with open(file) as f:
        lines = [line.rstrip() for line in f]
        
    beginning = "LambdaSel_T.Lam...WARNING -----------------BEGINNING EVENT-----------------"
    end = "LambdaSel_T.Lam...WARNING -------------------------------------------------"

    lindx = lines.index(beginning)
    rindx = len(lines) - lines[::-1].index(end)

    lines = lines[lindx:rindx]
    
    for index in range(len(lines)):
        warningPrologue = "LambdaSel_T.Lam...WARNING "
        if lines[index].startswith(warningPrologue):
            lines[index] = lines[index][len(warningPrologue):]
    
    return lines

In [3]:
def CheckPrefix(function, prefix, line):
    if not line.startswith(prefix):
        print("ERROR in ", function.__name__, ": prefix\n", prefix, "\ndoes not match line\n", line)

def ProcessTuple(line, prefix):
    CheckPrefix(ProcessTuple, prefix, line)
    return tuple(map(float, line[len(prefix):].split(', ')))

def ProcessTupleWithParentheses(line, prefix):
    CheckPrefix(ProcessTupleWithParentheses, prefix, line)
    return tuple(map(float, line[len(prefix)+1:-1].split(',')))

def ProcessInt(line, prefix):
    CheckPrefix(ProcessInt, prefix, line)
    return int(line[len(prefix):])

def ProcessFloat(line, prefix):
    CheckPrefix(ProcessFloat, prefix, line)
    return float(line[len(prefix):])

def ProcessMatrix(lines, prefix):
    CheckPrefix(ProcessMatrix, prefix, lines[0])
    matrix = []
    for line in lines[1:]:
        if line[0] == '[':
            line = line[1:]
        if line[-1] == ']':
            line = line[:-1]
        matrix.append([float(number) for number in line.split()])
    return matrix

def ProcessPID(line):
    prefix = "PID: LHCb.ParticleID"
    CheckPrefix(ProcessPID, prefix, line)
    PID = int(line[len(prefix)+1:-1])
    if abs(PID) == 2212:
        return 'proton'
    elif abs(PID) == 211:
        return 'pion'
    else:
        print("ERROR: unrecognized particle in line\n", line)
        return -1

def ProcessStatus(line):
    prefix = "Status: "
    CheckPrefix(ProcessStatus, prefix, line)
    return line[len(prefix):]

In [4]:
def ProcessParticleChunk(particleChunk):
    chunkBegin = "PID: LHCb.ParticleID"
    chunkEnd =   "------------------------"
    
    if not (particleChunk[0].startswith(chunkBegin) and particleChunk[-1] == chunkEnd):
        print("ERROR: the following particle chunk is not standard:\n", particleChunk)
        return -1
    
    particleName = ProcessPID(particleChunk[0])
    refPoint = ProcessTupleWithParentheses(particleChunk[1], "Reference point: ")
    fourMom = ProcessTupleWithParentheses(particleChunk[2], "4-momentum: ")
    posMomCovMatrix = ProcessMatrix(particleChunk[3:11], "PosMomCovMatrix:")
    
    return particleName, refPoint, fourMom, posMomCovMatrix

In [5]:
def ProcessIterationChunk(iterChunk):
    chunkBegin = "Iter: "
    chunkEnd =   "------------------------"
    
    if not (iterChunk[0].startswith(chunkBegin) and iterChunk[-1] == chunkEnd):
        print("ERROR: the following iteration chunk is not standard:\n", iterChunk)
        return -1
    
    iteration = ProcessInt(iterChunk[0],"Iter: ")
    currentVtx = ProcessTuple(iterChunk[1],"x: ")
    previousVtx = ProcessTuple(iterChunk[2],"x0: ")
    ci = ProcessMatrix(iterChunk[3:7], "ci:")
    chi2 = ProcessFloat(iterChunk[7], "chi2: ")
    deltaVtx = ProcessTuple(iterChunk[8], "dx: ")
    deltaDistance = ProcessFloat(iterChunk[9], "d1: ")
    deltaChi2 = ProcessFloat(iterChunk[10], "d2: ")
    
    return iteration, currentVtx, previousVtx, ci, chi2, deltaVtx, deltaDistance, deltaChi2

In [68]:
def ProcessChunk(chunk):
    chunkBegin = "-----------------BEGINNING EVENT-----------------"
    chunkEnd =   "-------------------------------------------------"
    
    if not (chunk[0] == chunkBegin and chunk[-1] == chunkEnd):
        print("ERROR: the following chunk is not standard:\n", chunk)
        return -1
    
    initVtx = ProcessTuple(chunk[2], "Initial vtx: ")
    initChi2 = ProcessFloat(chunk[3], "Initial chi2: ")
    initCi = ProcessMatrix(chunk[4:8], "Initial ci:")
    
    particle1 = ProcessParticleChunk(chunk[9:21])
    particle2 = ProcessParticleChunk(chunk[21:33])
    
    if particle1[0] == 'proton':
        protonInfo = particle1
        pionInfo = particle2
    else:
        pionInfo = particle1
        protonInfo = particle2
    
    lengthOfIterationChunk = 12
    numberOfIterations = len(chunk[33:-2]) / lengthOfIterationChunk
    if int(numberOfIterations) != numberOfIterations:
        print("ERROR: number of iterations", numberOfIterations, "is not an integer.")
        return -1
    
    iter_currentVertices = []
    iter_previousVertices = []
    iter_covMatrices = []
    iter_chi2s = []
    iter_deltaVertices = []
    iter_deltaDistances = []
    iter_deltaChi2s = []
    
    for iIter in range(int(numberOfIterations)):
        startIterIndex = 33+iIter*lengthOfIterationChunk
        endIterIndex = startIterIndex + lengthOfIterationChunk
        
        iterationInfo = ProcessIterationChunk(chunk[startIterIndex:endIterIndex])
        
        iter_currentVertices.append(iterationInfo[1])
        iter_previousVertices.append(iterationInfo[2])
        iter_covMatrices.append(iterationInfo[3])
        iter_chi2s.append(iterationInfo[4])
        iter_deltaVertices.append(iterationInfo[5])
        iter_deltaDistances.append(iterationInfo[6])
        iter_deltaChi2s.append(iterationInfo[7])    
  
    status = ProcessStatus(chunk[-2])
    
    dictionary = {
        "seed_vtx": initVtx,
        "seed_chi2": initChi2,
        "seed_ci": initCi,
        "p_refPoint": protonInfo[1],
        "p_momentum": protonInfo[2][:3],
        "p_energy": protonInfo[2][3],
        "p_posMomCovMatrix": pionInfo[3],
        "pim_refPoint": pionInfo[1],
        "pim_momentum": pionInfo[2][:3],
        "pim_energy": pionInfo[2][3],
        "pim_posMomCovMatrix": pionInfo[3],
        "numberOfIterations": int(numberOfIterations),
        "iter_currentVertices": iter_currentVertices,
        "iter_previousVertices": iter_previousVertices,
        "iter_covMatrices": iter_covMatrices,
        "iter_chi2s": iter_chi2s,
        "iter_deltaVertices": iter_deltaVertices,
        "iter_deltaDistances": iter_deltaDistances,
        "iter_deltaChi2s": iter_deltaChi2s,
        "status": status
    }
    
    return dictionary

In [69]:
file = 'davinci_output.dat'
lines = ProcessInputFile(file)

In [70]:
chunk = lines[0:155]

In [71]:
ProcessChunk(chunk)

{'seed_vtx': (147.985, 102.425, 5813.65),
 'seed_chi2': 0.0,
 'seed_ci': [[0.155613, -0.00059156, -0.0018908],
  [-0.00059156, 0.000832927, 3.08563e-05],
  [-0.0018908, 3.08563e-05, 0.000366583]],
 'p_refPoint': (137.247, 128.359, 7755.84),
 'p_momentum': (-138.28, 365.041, 25017.9),
 'p_energy': 25038.5,
 'p_posMomCovMatrix': [[0.00543169,
   0.0153676,
   0.0,
   -1.88666,
   -0.435677,
   -12.8758,
   -13.0164],
  [0.0153676, 0.888872, 0.0, -5.12742, -5.59216, -35.0747, -35.5752],
  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
  [-1.88666, -5.12742, 0.0, 3490.85, 705.445, 24585.1, 24835.1],
  [-0.435677, -5.59216, 0.0, 705.445, 180.24, 4966.02, 5017.57],
  [-12.8758, -35.0747, 0.0, 24585.1, 4966.02, 173291.0, 175050.0],
  [-13.0164, -35.5752, 0.0, 24835.1, 5017.57, 175050.0, 176826.0]],
 'pim_refPoint': (429.459, 173.613, 7862.59),
 'pim_momentum': (835.081, 166.117, 6084.36),
 'pim_energy': 6145.24,
 'pim_posMomCovMatrix': [[0.00543169,
   0.0153676,
   0.0,
   -1.88666,
   -0.435677,
   -

In [72]:
dic = ProcessChunk(chunk)

In [74]:
op = pd.DataFrame(columns=list(dic))
op = op.append(dic, ignore_index=True)
op

Unnamed: 0,seed_vtx,seed_chi2,seed_ci,p_refPoint,p_momentum,p_energy,p_posMomCovMatrix,pim_refPoint,pim_momentum,pim_energy,pim_posMomCovMatrix,numberOfIterations,iter_currentVertices,iter_previousVertices,iter_covMatrices,iter_chi2s,iter_deltaVertices,iter_deltaDistances,iter_deltaChi2s,status
0,"(147.985, 102.425, 5813.65)",0.0,"[[0.155613, -0.00059156, -0.0018908], [-0.0005...","(137.247, 128.359, 7755.84)","(-138.28, 365.041, 25017.9)",25038.5,"[[0.00543169, 0.0153676, 0.0, -1.88666, -0.435...","(429.459, 173.613, 7862.59)","(835.081, 166.117, 6084.36)",6145.24,"[[0.00543169, 0.0153676, 0.0, -1.88666, -0.435...",10,"[(128.156, 86.9709, 4871.29), (113.514, 77.478...","[(147.985, 102.425, 5813.65), (128.156, 86.970...","[[[0.659589, -0.034261, -0.00900721], [-0.0342...","[3.08255, 3.50117, 1.73936, 4.06577, 2.75733, ...","[(-19.8295, -15.4544, -942.357), (-14.6421, -9...","[942.692, 573.933, 438.001, 278.849, 253.93, 2...","[245.489, 4.36432, 2.09498, 0.739273, 0.616799...",NonConverged


In [96]:
## Use 1,2,3 instead of 0,1,2. Trust me, it's easier.
def Feature1DComponent(series, component):
    return series.map(lambda x: x[component-1])

def Feature2DComponent(series, row, column):
    return series.map(lambda x: x[row-1][column-1])

def FeatureCoordinate(series, coordinate):  
    coordToComponent = {'x': 1, 'y': 2, 'z': 3}
    return Feature1DComponent(series, coordToComponent[coordinate])

In [86]:
Feature1DComponent(op['seed_vtx'], 2)

0    102.425
1    102.425
Name: seed_vtx, dtype: float64

In [90]:
Feature2DComponent(op['seed_ci'], 1, 2)

0   -0.000592
1   -0.000592
Name: seed_ci, dtype: float64

In [99]:
FeatureCoordinate(op['seed_vtx'], 'z')

0    5813.65
1    5813.65
Name: seed_vtx, dtype: float64

Da fare rimane solo il processare i singoli chunk e ci sei.