Join GitHub today
GitHub is home to over 31 million developers working together to host and review code, manage projects, and build software together.
Sign upTowards 1529: replacing the predicates with an IEnumerable on IRowToRowMapper.GetDependencies #2504
Conversation
sfilipi
requested review from
TomFinley
,
Ivanidzo4ka
and
yaeldekel
Feb 11, 2019
sfilipi
reviewed
Feb 11, 2019
var predicateOut = GetActiveOutputColumns(active); | ||
|
||
// Now map those to active input columns. | ||
var predicateIn = _mapper.GetDependencies(predicateOut); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
reviewed
Feb 11, 2019
for (int i = InnerMappers.Length - 1; i >= 0; --i) | ||
toReturn = InnerMappers[i].GetDependencies(toReturn); | ||
return toReturn; | ||
columnsNeeded = columnsNeeded.Union(InnerMappers[i].GetDependencies(columnsNeeded)); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 11, 2019
•
Member
Union [](start = 46, length = 5)
Is this needed? The old code seems to "forget" each intermediate mapper's predicate and only return the dependencies of the first one. #Resolved
sfilipi
self-assigned this
Feb 12, 2019
sfilipi
added
the
api
label
Feb 12, 2019
sfilipi
added this to In progress
in Project 13
via automation
Feb 12, 2019
sfilipi
added this to the 0219 milestone
Feb 12, 2019
sfilipi
reviewed
Feb 12, 2019
Contracts.AssertValue(dependingColumns); | ||
|
||
var active = GetActiveInput(dependingColumns); | ||
Contracts.Assert(active.Count() == Input.Count); |
This comment has been minimized.
This comment has been minimized.
sfilipi
reviewed
Feb 12, 2019
var mapperColumns = Mappers[i].OutputSchema.Where(col => mapperPredicate(col.Index)); | ||
var inputColumns = Mappers[i].GetDependencies(mapperColumns); | ||
|
||
Func<int, bool> inputPredicate = c => BoundPipelines[i].OutputSchema.Count() < c; |
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
inputPredicate = c => BoundPipelines[i].OutputSchema.Count() < c; [](start = 40, length = 65)
fix #Resolved
This comment has been minimized.
This comment has been minimized.
sfilipi
reviewed
Feb 12, 2019
var predicateInputForMapper = bindings.RowMapper.GetDependencies(predicateMapper); | ||
// Get the active output columns | ||
var activeOutputCols = bindings.RowMapper.OutputSchema.Where(c => localMapper(c.Index)); | ||
var predicateInputForMapper = bindings.RowMapper.GetDependencies(activeOutputCols); |
This comment has been minimized.
This comment has been minimized.
sfilipi
reviewed
Feb 12, 2019
return col => false; | ||
} | ||
IEnumerable<Schema.Column> IRowToRowMapper.GetDependencies(IEnumerable<Schema.Column> dependingColumns) | ||
=> Enumerable.Repeat(FeatureColumn, 1); |
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
Enumerable.Repeat(FeatureColumn, 1); [](start = 14, length = 37)
fix #Resolved
sfilipi
reviewed
Feb 12, 2019
while (transform != null) | ||
{ | ||
var mapper = transform as IRowToRowMapper; | ||
_ectx.AssertValue(mapper); | ||
pred = mapper.GetDependencies(pred); | ||
dependingColumns = dependingColumns.Union(mapper.GetDependencies(cols)); |
This comment has been minimized.
This comment has been minimized.
sfilipi
reviewed
Feb 12, 2019
@@ -252,13 +256,15 @@ public Row GetRow(Row input, Func<int, bool> active) | |||
var actives = new List<Func<int, bool>>(); | |||
var transform = _chain as IDataTransform; | |||
var activeCur = active; | |||
var activeCurCol = InputSchema.Where(col => active(col.Index)); |
This comment has been minimized.
This comment has been minimized.
sfilipi
reviewed
Feb 12, 2019
@@ -252,13 +256,16 @@ public Row GetRow(Row input, Func<int, bool> active) | |||
var actives = new List<Func<int, bool>>(); | |||
var transform = _chain as IDataTransform; | |||
var activeCur = active; | |||
var activeCurCol = OutputSchema.Where(col => active(col.Index)); |
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
activeCurCol [](start = 20, length = 12)
remove, implement without it. #Resolved
sfilipi
reviewed
Feb 12, 2019
if (dependingColumns.Count() == 0 || !InputRoleMappedSchema.Feature.HasValue) | ||
return Enumerable.Empty<Schema.Column>(); | ||
|
||
return InputSchema.Where(col => col.Name.Equals(InputRoleMappedSchema.Feature?.Name)); |
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
col => col.Name.Equals(InputRoleMappedSchema.Feature?.Name [](start = 41, length = 58)
base it on the index #Resolved
sfilipi
reviewed
Feb 12, 2019
if (dependingColumns.Count() == 0 || !InputRoleMappedSchema.Feature.HasValue) | ||
return Enumerable.Empty<Schema.Column>(); | ||
|
||
return InputSchema.Where(col => col.Name.Equals(InputRoleMappedSchema.Feature?.Name)); |
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
col => col.Name.Equals(InputRoleMappedSchema.Feature?.Name [](start = 41, length = 58)
base it on the index #Resolved
sfilipi
changed the title
[WIP] towards 1529: replacing the predicates with an IEnumerable on IRowToRowMapper.GetDependencies
Towards 1529: replacing the predicates with an IEnumerable on IRowToRowMapper.GetDependencies
Feb 12, 2019
Ivanidzo4ka
reviewed
Feb 12, 2019
deps[i - 1] = InnerMappers[i].GetDependencies(deps[i]); | ||
{ | ||
var outputColumns = InnerMappers[i].OutputSchema.Where(c => deps[i](c.Index)); | ||
var cols = InnerMappers[i].GetDependencies(outputColumns); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
GetDependencies [](start = 43, length = 15)
I would put ToArray to cache it. Otherwise you constantly fetch results from IEnumerable
Well techinacally only twice, one in Count one in Any. #Closed
Ivanidzo4ka
reviewed
Feb 12, 2019
@@ -245,11 +238,14 @@ void ISaveAsPfa.SaveAsPfa(BoundPfaContext ctx) | |||
} | |||
} | |||
|
|||
public Func<int, bool> GetDependencies(Func<int, bool> predicate) | |||
/// <summary> | |||
/// Given a set of columns, return the input columns that are needed to generate those output columns. |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
set of columns [](start = 20, length = 14)
set of output columns? #Closed
Ivanidzo4ka
reviewed
Feb 12, 2019
} | ||
return col => false; | ||
var columnNames = dependingColumns.Select(col => col.Name); | ||
|
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
reviewed
Feb 12, 2019
return col => col == InputRoleMappedSchema.Feature.Value.Index; | ||
} | ||
return col => false; | ||
if (dependingColumns.Count() == 0 || !InputRoleMappedSchema.Feature.HasValue) |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
if (dependingColumns.Count() == 0 || !InputRoleMappedSchema.Feature.HasValue) [](start = 16, length = 77)
Micro optimization but i would switch order. #Closed
Ivanidzo4ka
reviewed
Feb 12, 2019
/// Given a set of columns, return the input columns that are needed to generate those output columns. | ||
/// </summary> | ||
IEnumerable<Schema.Column> IRowToRowMapper.GetDependencies(IEnumerable<Schema.Column> dependingColumns) | ||
=> dependingColumns; |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
If you look few lines above we have example of different style for =>.
Can we put tab here? (not tab, 4 spaces, we are not barbarians) #Closed
Ivanidzo4ka
reviewed
Feb 12, 2019
{ | ||
var activeOutput = RowCursorUtils.FromColumnsToPredicate(columns, _mapper.OutputSchema); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
FromColumnsToPredicate [](start = 50, length = 22)
You will need to untangle it sooner or later :)
Right now I don't see any reason to use predicate. We get set of columns we return set of columns, and we don't call any function which required predicate.
But you can always postpone it to moment when we delete than Utils method #Closed
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
Author
Member
You know, that Utils method might stay for a while, because GetActive is not public...
untangling this now:)
In reply to: 256133342 [](ancestors = 256133342)
This comment has been minimized.
This comment has been minimized.
codecov
bot
commented
Feb 12, 2019
•
Codecov Report
@@ Coverage Diff @@
## master #2504 +/- ##
==========================================
- Coverage 71.26% 71.26% -0.01%
==========================================
Files 797 797
Lines 141292 141278 -14
Branches 16118 16097 -21
==========================================
- Hits 100692 100677 -15
- Misses 36138 36141 +3
+ Partials 4462 4460 -2
|
Ivanidzo4ka
reviewed
Feb 12, 2019
/// Given a set of columns, return the input columns that are needed to generate those output columns. | ||
/// </summary> | ||
IEnumerable<Schema.Column> IRowToRowMapper.GetDependencies(IEnumerable<Schema.Column> dependingColumns) | ||
=> _mapper.GetDependencies(dependingColumns); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
=> [](start = 15, length = 3)
somehow this triggers me. can you add tab? #Closed
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
reviewed
Feb 12, 2019
|
||
return InputSchema.Where(col => _inputColIndices.Contains(col.Index)); | ||
|
||
//return Enumerable.Repeat(InputSchema.First(col => _inputColIndices.Contains(col.Index)), 1); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
//return Enumerable.Repeat(InputSchema.First(col => _inputColIndices.Contains(col.Index)), 1); [](start = 16, length = 94)
clean it #Closed
Ivanidzo4ka
reviewed
Feb 12, 2019
} | ||
return col => false; | ||
var columnNames = dependingColumns.Select(col => col.Name); | ||
return InputSchema.Where(col => columnNames.Contains(col.Name)); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 12, 2019
•
Member
InputSchema [](start = 23, length = 11)
was it mistake in previous code? We used to filter by OutputSchema, now it's InputSchema #Closed
This comment has been minimized.
This comment has been minimized.
sfilipi
Feb 12, 2019
•
Author
Member
The description of the function was:
"Given a predicate specifying which columns are needed, return a predicate indicating which input columns are
needed. " So i took it as : input is columns from the outputschema, and return value is columns from the input schema.
i thought the iteration is over the OutputSchema, since the predicate was over the OutputSchema.
hmm, going back to the IRowToRowMapper, summary:
" The domain of the function is defined over the indices of the columns of for ."
but InputSchema => OutputSchema makes no sense, to me?
In reply to: 256136896 [](ancestors = 256136896)
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
Member
"The domain of the function is defined...", I think this refers to the predicate returned by GetDependencies().
The way I understand the old code is - if any columns are active, then activate all the input columns. If this is correct, then the new code should return all the columns of InputSchema if dependingColumns is not empty, and an empty enumerable if dependingColumns is empty.
In reply to: 256140629 [](ancestors = 256140629,256136896)
This comment has been minimized.
This comment has been minimized.
yaeldekel
reviewed
Feb 13, 2019
{ | ||
int n = _bindings.Schema.Count; | ||
var active = Utils.BuildArray(n, predicate); | ||
Contracts.Assert(active.Length == n); | ||
|
||
var activeInput = _bindings.GetActiveInput(predicate); | ||
Contracts.Assert(activeInput.Length == _bindings.InputSchema.Count); | ||
Contracts.Assert(activeInput.Count() == _bindings.InputSchema.Count); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
•
Member
Count [](start = 41, length = 5)
Is activeInput not an array? #Resolved
yaeldekel
reviewed
Feb 13, 2019
@@ -164,8 +163,8 @@ private bool[] GetActive(Func<int, bool> predicate, out Func<int, bool> predicat | |||
var predicateIn = _mapper.GetDependencies(predicateOut); | |||
|
|||
// Combine the two sets of input columns. | |||
predicateInput = | |||
col => 0 <= col && col < activeInput.Length && (activeInput[col] || predicateIn(col)); | |||
inputColumns = _bindings.InputSchema.Where(col => col.Index < activeInput.Length |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
•
Member
col => col.Index < activeInput.Length [](start = 55, length = 37)
I think this should always be true, given the assert in line 157 above. #Resolved
yaeldekel
reviewed
Feb 13, 2019
while (transform != null) | ||
{ | ||
var mapper = transform as IRowToRowMapper; | ||
_ectx.AssertValue(mapper); | ||
pred = mapper.GetDependencies(pred); | ||
dependingColumns = mapper.GetDependencies(cols); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
•
Member
dependingColumns [](start = 20, length = 16)
Shouldn't this also be cols? #Resolved
Ivanidzo4ka
reviewed
Feb 13, 2019
{ | ||
var activeOutput = RowCursorUtils.FromColumnsToPredicate(columns, _mapper.OutputSchema); |
This comment has been minimized.
This comment has been minimized.
Ivanidzo4ka
Feb 13, 2019
•
Member
activeOutput [](start = 20, length = 12)
You no longer use this one. #Resolved
TomFinley
reviewed
Feb 13, 2019
{ | ||
var outputColumns = InnerMappers[i].OutputSchema.Where(c => deps[i](c.Index)); | ||
var cols = InnerMappers[i].GetDependencies(outputColumns).ToArray(); | ||
deps[i - 1] = c => cols.Count() > 0 ? cols.Any(col => col.Index == c) : false; |
This comment has been minimized.
This comment has been minimized.
TomFinley
Feb 13, 2019
•
Contributor
Count() [](start = 40, length = 7)
Length
. Note that Count()
the method will actually iterate and literally count. #Resolved
TomFinley
reviewed
Feb 13, 2019
@@ -164,8 +163,7 @@ private bool[] GetActive(Func<int, bool> predicate, out Func<int, bool> predicat | |||
var predicateIn = _mapper.GetDependencies(predicateOut); | |||
|
|||
// Combine the two sets of input columns. | |||
predicateInput = | |||
col => 0 <= col && col < activeInput.Length && (activeInput[col] || predicateIn(col)); | |||
inputColumns = _bindings.InputSchema.Where(col => activeInput[col.Index]|| predicateIn(col.Index)); |
This comment has been minimized.
This comment has been minimized.
TomFinley
reviewed
Feb 13, 2019
GetActive(predicate, out predicateInput); | ||
return predicateInput; | ||
var predicate = RowCursorUtils.FromColumnsToPredicate(dependingColumns, OutputSchema); | ||
GetActive(predicate, out IEnumerable<Schema.Column> inputColumns); |
This comment has been minimized.
This comment has been minimized.
TomFinley
Feb 13, 2019
•
Contributor
IEnumerable<Schema.Column> [](start = 37, length = 26)
Don't be afraid of out var
. This will ultimately make @eerhardt's job of renaming this sort of thing easier if you want a selfless reason to do so. #Resolved
TomFinley
reviewed
Feb 13, 2019
@@ -258,7 +262,8 @@ public Row GetRow(Row input, Func<int, bool> active) | |||
_ectx.AssertValue(mapper); | |||
mappers.Add(mapper); | |||
actives.Add(activeCur); | |||
activeCur = mapper.GetDependencies(activeCur); | |||
var activeCurCol = mapper.GetDependencies(mapper.OutputSchema.Where(col => activeCur(col.Index))); | |||
activeCur = c => activeCurCol.Any(col => col.Index == c); |
This comment has been minimized.
This comment has been minimized.
TomFinley
Feb 13, 2019
•
Contributor
activeCurCol.Any(col => col.Index == c); [](start = 37, length = 40)
I don't like this usage of Any
that I've been seeing... using quadratic algorithms is probably best avoided. Did we not have a utility method to take care of this predicate mapping problem? I think we did. #Resolved
This comment has been minimized.
This comment has been minimized.
TomFinley
Feb 13, 2019
Contributor
internal static Func<int, bool> FromColumnsToPredicate(IEnumerable<Schema.Column> columnsNeeded, Schema sourceSchema)
, you called it, in RowCursorUtils
.
In reply to: 256593471 [](ancestors = 256593471)
sfilipi
added some commits
Feb 11, 2019
sfilipi
force-pushed the
sfilipi:getDependanciesRemovePredicates
branch
from
8da2253
to
3a7d3da
Feb 13, 2019
yaeldekel
reviewed
Feb 13, 2019
if (!InputRoleMappedSchema.Feature.HasValue || dependingColumns.Count() == 0) | ||
return Enumerable.Empty<Schema.Column>(); | ||
|
||
return InputSchema.Where(col => col.Index == InputRoleMappedSchema.Feature.Value.Index); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
Member
InputSchema.Where(col => col.Index == InputRoleMappedSchema.Feature.Value.Index); [](start = 23, length = 81)
Isn't this InputRoleMappedSchema.Feature?
yaeldekel
reviewed
Feb 13, 2019
if (dependingColumns.Count() == 0 || !InputRoleMappedSchema.Feature.HasValue) | ||
return Enumerable.Empty<Schema.Column>(); | ||
|
||
return InputSchema.Where(col => col.Index == InputRoleMappedSchema.Feature.Value.Index); |
This comment has been minimized.
This comment has been minimized.
yaeldekel
Feb 13, 2019
Member
InputSchema.Where(col => col.Index == InputRoleMappedSchema.Feature.Value.Index); [](start = 23, length = 81)
Here too.
yaeldekel
reviewed
Feb 13, 2019
@@ -589,6 +589,16 @@ public bool[] GetActive(Func<int, bool> predicate) | |||
return Utils.BuildArray(ColumnCount, predicate); | |||
} | |||
|
|||
/// <summary> | |||
/// The given predicate maps from output column index to whether the column is active. |
This comment has been minimized.
This comment has been minimized.
yaeldekel
reviewed
Feb 13, 2019
@@ -609,6 +619,19 @@ public bool[] GetActiveInput(Func<int, bool> predicate) | |||
return active; | |||
} | |||
|
|||
/// <summary> | |||
/// The given predicate maps from output column index to whether the column is active. |
This comment has been minimized.
This comment has been minimized.
yaeldekel
reviewed
Feb 13, 2019
@@ -763,6 +786,18 @@ public bool[] GetActiveInput(Func<int, bool> predicate) | |||
} | |||
return active; | |||
} | |||
|
|||
/// <summary> | |||
/// The given predicate maps from output column index to whether the column is active. |
sfilipi commentedFeb 11, 2019
More work towards #1529.
Marked the pr as still working on it, because there is one test failing: TestAndPredictoOnIris; double-checking the changes on the CompositeRowToRowMapper.