Skip to content

Commit

Permalink
Support resource_name on AttachmentProcessor (#5205) (#5264)
Browse files Browse the repository at this point in the history
Contributes to #5198
Relates to elastic/elasticsearch#64389

Co-authored-by: Steve Gordon <sgordon@hotmail.co.uk>
  • Loading branch information
github-actions[bot] and stevejgordon committed Jan 14, 2021
1 parent bd35d28 commit 9cd87cd
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 5 deletions.
26 changes: 21 additions & 5 deletions src/Nest/Ingest/Processors/Plugins/AttachmentProcessor.cs
Expand Up @@ -21,12 +21,12 @@ namespace Nest
[InterfaceDataContract]
public interface IAttachmentProcessor : IProcessor
{
/// <summary> The field to get the base64 encoded field from </summary>
/// <summary> The field to get the base64 encoded field from.</summary>
[DataMember(Name ="field")]
Field Field { get; set; }


/// <summary> If `true` and `field` does not exist, the processor quietly exits without modifying the document </summary>
/// <summary> If `true` and `field` does not exist, the processor quietly exits without modifying the document.</summary>
[DataMember(Name ="ignore_missing")]
bool? IgnoreMissing { get; set; }

Expand All @@ -37,20 +37,26 @@ public interface IAttachmentProcessor : IProcessor
[DataMember(Name ="indexed_chars")]
long? IndexedCharacters { get; set; }

/// <summary> Field name from which you can overwrite the number of chars being used for extraction. </summary>
/// <summary> Field name from which you can overwrite the number of chars being used for extraction.</summary>
[DataMember(Name ="indexed_chars_field")]
Field IndexedCharactersField { get; set; }

/// <summary>
/// Properties to select to be stored. Can be content, title, name, author,
/// keywords, date, content_type, content_length, language. Defaults to all
/// keywords, date, content_type, content_length, language. Defaults to all.
/// </summary>
[DataMember(Name ="properties")]
IEnumerable<string> Properties { get; set; }

/// <summary> The field that will hold the attachment information </summary>
/// <summary> The field that will hold the attachment information.</summary>
[DataMember(Name ="target_field")]
Field TargetField { get; set; }

/// <summary> The field containing the name of the resource to decode.
/// If specified, the processor passes this resource name to the underlying
/// Tika library to enable 'Resource Name Based Detection'.</summary>
[DataMember(Name = "resource_name")]
Field ResourceName { get; set; }
}

/// <inheritdoc cref="IAttachmentProcessor" />
Expand All @@ -75,6 +81,9 @@ public class AttachmentProcessor : ProcessorBase, IAttachmentProcessor
/// <inheritdoc cref="IAttachmentProcessor.TargetField" />
public Field TargetField { get; set; }

/// <inheritdoc cref="IAttachmentProcessor.ResourceName" />
public Field ResourceName { get; set; }

protected override string Name => "attachment";
}

Expand All @@ -91,6 +100,7 @@ public class AttachmentProcessorDescriptor<T>
Field IAttachmentProcessor.IndexedCharactersField { get; set; }
IEnumerable<string> IAttachmentProcessor.Properties { get; set; }
Field IAttachmentProcessor.TargetField { get; set; }
Field IAttachmentProcessor.ResourceName { get; set; }

/// <inheritdoc cref="IAttachmentProcessor.Field" />
public AttachmentProcessorDescriptor<T> Field(Field field) => Assign(field, (a, v) => a.Field = v);
Expand Down Expand Up @@ -122,5 +132,11 @@ public class AttachmentProcessorDescriptor<T>

/// <inheritdoc cref="IAttachmentProcessor.Properties" />
public AttachmentProcessorDescriptor<T> Properties(params string[] properties) => Assign(properties, (a, v) => a.Properties = v);

/// <inheritdoc cref="IAttachmentProcessor.ResourceName" />
public AttachmentProcessorDescriptor<T> ResourceName(Field field) => Assign(field, (a, v) => a.ResourceName = v);

/// <inheritdoc cref="IAttachmentProcessor.TargetField" />
public AttachmentProcessorDescriptor<T> ResourceName<TValue>(Expression<Func<T, TValue>> objectPath) => Assign(objectPath, (a, v) => a.ResourceName = v);
}
}
33 changes: 33 additions & 0 deletions tests/Tests/Ingest/ProcessorAssertions.cs
Expand Up @@ -463,6 +463,39 @@ public class Attachment : ProcessorAssertion
public override string Key => "attachment";
}

[SkipVersion("<7.11.0", "Resource name support was added in 7.11")]
public class Attachment_WithResourceName : ProcessorAssertion
{
public override Func<ProcessorsDescriptor, IPromise<IList<IProcessor>>> Fluent => d => d
.Attachment<Project>(ud => ud
.Field(p => p.Description)
.IndexedCharacters(100_000)
.Properties("title", "author")
.IgnoreMissing()
.ResourceName(n => n.Name)
);

public override IProcessor Initializer => new AttachmentProcessor
{
Field = "description",
Properties = new[] { "title", "author" },
IndexedCharacters = 100_000,
IgnoreMissing = true,
ResourceName = "name"
};

public override object Json => new
{
field = "description",
ignore_missing = true,
properties = new[] { "title", "author" },
indexed_chars = 100_000,
resource_name = "name"
};

public override string Key => "attachment";
}

[SkipVersion("<7.4.0", "Circle processor added in 7.4.0")]
public class Circle : ProcessorAssertion
{
Expand Down

0 comments on commit 9cd87cd

Please sign in to comment.