Skip to content
This repository has been archived by the owner on May 2, 2022. It is now read-only.

Commit

Permalink
Merge branch 'v0.9.x-20170604-01-ToTupleList' into v0.9.x
Browse files Browse the repository at this point in the history
  • Loading branch information
TaiSakuma committed Jun 4, 2017
2 parents fb42ec0 + a2c1639 commit 91435f5
Show file tree
Hide file tree
Showing 17 changed files with 616 additions and 249 deletions.
24 changes: 0 additions & 24 deletions alphatwirl/collector/CombineIntoPandasDataFrame.py

This file was deleted.

32 changes: 32 additions & 0 deletions alphatwirl/collector/ToDataFrame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Tai Sakuma <tai.sakuma@cern.ch>

import pandas as pd

from .ToTupleList import ToTupleList

##__________________________________________________________________||
class ToDataFrame(object):
def __init__(self, summaryColumnNames):

self.summaryColumnNames = summaryColumnNames
self.to_tuple_list = ToTupleList(summaryColumnNames = summaryColumnNames)

def __repr__(self):

name_value_pairs = (
('summaryColumnNames', self.summaryColumnNames),
)
return '{}({})'.format(
self.__class__.__name__,
', '.join(['{} = {!r}'.format(n, v) for n, v in name_value_pairs]),
)

def combine(self, dataset_readers_list):
tuple_list = self.to_tuple_list.combine(dataset_readers_list)
if tuple_list is None:
return None
header = tuple_list[0]
contents = tuple_list[1:]
return pd.DataFrame(contents, columns = header)

##__________________________________________________________________||
38 changes: 38 additions & 0 deletions alphatwirl/collector/ToDataFrameWithDatasetColumn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Tai Sakuma <tai.sakuma@cern.ch>

import pandas as pd

from .ToTupleListWithDatasetColumn import ToTupleListWithDatasetColumn

##__________________________________________________________________||
class ToDataFrameWithDatasetColumn(object):
def __init__(self, summaryColumnNames,
datasetColumnName = 'component'
):

self.summaryColumnNames = summaryColumnNames
self.datasetColumnName = datasetColumnName
self.to_tuple_list = ToTupleListWithDatasetColumn(
summaryColumnNames = summaryColumnNames,
datasetColumnName = datasetColumnName)

def __repr__(self):

name_value_pairs = (
('summaryColumnNames', self.summaryColumnNames),
('datasetColumnName', self.datasetColumnName),
)
return '{}({})'.format(
self.__class__.__name__,
', '.join(['{} = {!r}'.format(n, v) for n, v in name_value_pairs]),
)

def combine(self, dataset_readers_list):
tuple_list = self.to_tuple_list.combine(dataset_readers_list)
if tuple_list is None:
return None
header = tuple_list[0]
contents = tuple_list[1:]
return pd.DataFrame(contents, columns = header)

##__________________________________________________________________||
67 changes: 67 additions & 0 deletions alphatwirl/collector/ToTupleList.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Tai Sakuma <tai.sakuma@cern.ch>
import itertools

##__________________________________________________________________||
class ToTupleList(object):
def __init__(self, summaryColumnNames
):

self.summaryColumnNames = summaryColumnNames

def __repr__(self):

name_value_pairs = (
('summaryColumnNames', self.summaryColumnNames),
)
return '{}({})'.format(
self.__class__.__name__,
', '.join(['{} = {!r}'.format(n, v) for n, v in name_value_pairs]),
)

def combine(self, dataset_readers_list):


if len(dataset_readers_list) == 0: return None

# e.g.,
# dataset_readers_list = [
# ('QCD', (reader1, reader2)),
# ('TTJets', (reader3, )),
# ('WJets', (reader4, )),
# ('ZJets', ( )),
# ]

readers_list = itertools.chain(*(r for _, r in dataset_readers_list))
# e.g.,
# readers_list = (reader1, reader2, reader3, reader4)

summarizers_list = (r.results() for r in readers_list)
# e.g.,
# summarizers_list = (summarizer1, summarizer2, summarizer3, summarizer4)

summarizer = sum(summarizers_list)

ret = summarizer.to_tuple_list()
# e.g.,
# ret = [
# (200, 2, 120, 240),
# (300, 2, 490, 980),
# (300, 3, 210, 420)
# (300, 2, 20, 40),
# (300, 3, 15, 30)
# ]

ret.insert(0, self.summaryColumnNames)
# e.g.,
# [
# ('htbin', 'njetbin', 'n', 'nvar'),
# ( 200, 2, 120, 240),
# ( 300, 2, 490, 980),
# ( 300, 3, 210, 420),
# ( 300, 2, 20, 40),
# ( 300, 3, 15, 30)
# ]

return ret

##__________________________________________________________________||
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Tai Sakuma <tai.sakuma@cern.ch>

##__________________________________________________________________||
class CombineIntoList(object):
class ToTupleListWithDatasetColumn(object):
def __init__(self, summaryColumnNames,
datasetColumnName = 'component'
):
Expand Down
6 changes: 4 additions & 2 deletions alphatwirl/collector/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from CombineIntoList import CombineIntoList
from ToTupleList import ToTupleList
from ToTupleListWithDatasetColumn import ToTupleListWithDatasetColumn
from WriteListToFile import WriteListToFile

##__________________________________________________________________||
Expand All @@ -10,7 +11,8 @@
pass

if hasPandas:
from CombineIntoPandasDataFrame import CombineIntoPandasDataFrame
from ToDataFrameWithDatasetColumn import ToDataFrameWithDatasetColumn
from ToDataFrame import ToDataFrame
from WritePandasDataFrameToFile import WritePandasDataFrameToFile

##__________________________________________________________________||
4 changes: 2 additions & 2 deletions alphatwirl/configure/build_counter_collector_pair.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Tai Sakuma <tai.sakuma@cern.ch>
from ..summary import Reader, Summarizer, NextKeyComposer, KeyValueComposer
from ..collector import CombineIntoList
from ..collector import ToTupleListWithDatasetColumn
from ..collector import WriteListToFile
from ..loop import Collector

Expand All @@ -24,7 +24,7 @@ def build_counter_collector_pair(tblcfg):
weightCalculator = tblcfg['weight'],
nevents = tblcfg['nevents']
)
resultsCombinationMethod = CombineIntoList(
resultsCombinationMethod = ToTupleListWithDatasetColumn(
summaryColumnNames = tblcfg['keyOutColumnNames'] + tblcfg['valOutColumnNames']
)
deliveryMethod = WriteListToFile(tblcfg['outFilePath']) if tblcfg['outFile'] else None
Expand Down
8 changes: 4 additions & 4 deletions docs/alphatwirl.collector.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ alphatwirl.collector package
Submodules
----------

alphatwirl.collector.CombineIntoList module
alphatwirl.collector.ToTupleListWithDatasetColumn module
-------------------------------------------

.. automodule:: alphatwirl.collector.CombineIntoList
.. automodule:: alphatwirl.collector.ToTupleListWithDatasetColumn
:members:
:undoc-members:
:show-inheritance:

alphatwirl.collector.CombineIntoPandasDataFrame module
alphatwirl.collector.ToDataFrameWithDatasetColumn module
------------------------------------------------------

.. automodule:: alphatwirl.collector.CombineIntoPandasDataFrame
.. automodule:: alphatwirl.collector.ToDataFrameWithDatasetColumn
:members:
:undoc-members:
:show-inheritance:
Expand Down
41 changes: 41 additions & 0 deletions tests/unit/collector/mock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import copy

##__________________________________________________________________||
class MockReader(object):
def __init__(self, summarizer):
self.summarizer = summarizer

def results(self):
return self.summarizer

def __repr__(self):
return '{}(summarizer = {!r})'.format(
self.__class__.__name__,
self.summarizer
)

##__________________________________________________________________||
class MockSummarizer(object):
def __init__(self, results):
self._results = results

def __repr__(self):
return '{}({!r})'.format(
self.__class__.__name__,
self._results
)

def __add__(self, other):
if other == 0:
res = copy.copy(self._results)
else:
res = copy.copy(self._results) + copy.copy(other._results)
return self.__class__(res)

def __radd__(self, other):
return self.__add__(other)

def to_tuple_list(self):
return self._results

##__________________________________________________________________||

0 comments on commit 91435f5

Please sign in to comment.