Skip to content
Permalink
Browse files
FREEMARKER-199 Add "utah-parser-tool"
  • Loading branch information
sgoeschl committed Oct 23, 2021
1 parent 45c391f commit 20d7f13e58788155322b5c7fca4043c769dc2d6f
Showing 30 changed files with 967 additions and 60 deletions.
@@ -27,6 +27,8 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Function;
import java.util.stream.Collectors;

import static java.util.function.Function.identity;
@@ -180,7 +182,7 @@ public List<DataSource> findByName(String wildcard) {
}

/**
* Find data sources based on their metadata key and wildcard string.
* Find <code>DataSources</code> based on their metadata key and wildcard string.
*
* @param key metadata key to match
* @param wildcard the wildcard string to match against
@@ -206,6 +208,19 @@ public DataSources filter(String key, String wildcard) {
return new DataSources(find(key, wildcard));
}

/**
* Group the <code>DataSources</code> by a metadata value.
* @param key metadata key to group by
* @return groups of <code>DataSources</code>
*/
public Map<String, DataSources> groupingBy(String key) {
final Function<DataSource, String> metadataFunction = dataSource -> dataSource.getMetadata(key);
return dataSources.stream()
.collect(Collectors.groupingBy(metadataFunction))
.entrySet().stream()
.collect(Collectors.toMap(Entry::getKey, p -> new DataSources(p.getValue())));
}

@Override
public void close() {
dataSources.forEach(ClosableUtils::closeQuietly);
@@ -141,7 +141,7 @@ public static Table fromRows(List<List<Object>> rows) {

/**
* Create a table from a list of rows representing tabular data
* where the first row may consists of column headers.
* where the first row may consist of column headers.
*
* @param rows row values
* @param withFirstRowAsColumnNames column names as first row?
@@ -25,6 +25,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Arrays.asList;
@@ -119,6 +120,36 @@ public void shouldGetGroups() {
assertEquals(singletonList(DEFAULT_GROUP), dataSources().getGroups());
}

@Test
public void shouldSupportGroupingByMetadata() {
try (DataSources dataSources = dataSources()) {
final Map<String, DataSources> map = dataSources.groupingBy("mimeType");

assertEquals(2, map.size());
assertEquals(1, map.get("application/xml").size());
assertEquals(2, map.get("text/plain").size());
}
}

@Test
public void shouldSupportFilteringByMetadata() {
try (DataSources dataSources = dataSources().filter("mimeType", "text/plain")) {
assertEquals(2, dataSources.size());
assertEquals("text/plain", dataSources.get(0).getMimeType());
assertEquals("text/plain", dataSources.get(1).getMimeType());
}

try (DataSources dataSources = dataSources().filter("mimeType", "application/xml")) {
assertEquals(1, dataSources.size());
assertEquals("application/xml", dataSources.get(0).getMimeType());
}

try (DataSources dataSources = dataSources().filter("mimeType", "!text/plain")) {
assertEquals(1, dataSources.size());
assertEquals("application/xml", dataSources.get(0).getMimeType());
}
}

@Test(expected = IllegalArgumentException.class)
public void shouldThrowExceptionWhenGetDoesNotFindDataSource() {
dataSources().get("file-does-not-exist");
@@ -142,7 +173,7 @@ private static DataSource fileDataSource() {
}

private static DataSource urlDataSource() {
return DataSourceFactory.fromUrl("server.invalid?foo=bar", "default", toUrl(ANY_URL), "plain/text", UTF_8, new HashMap<>());
return DataSourceFactory.fromUrl("server.invalid?foo=bar", "default", toUrl(ANY_URL), "text/plain", UTF_8, new HashMap<>());
}

private static URL toUrl(String value) {
@@ -4,6 +4,9 @@ All notable changes to this project will be documented in this file. We try to a

## 0.2.0-SNAPSHOT

### Added
* [FREEMARKER-199] Add [utah-parser-tool](https://github.com/sonalake/utah-parser) to parse semi-structured text.

### Changed
* [FREEMARKER-195] Improve exposure of DataSources using TemplateHashModelEx2

@@ -90,4 +93,5 @@ All notable changes to this project will be documented in this file. We try to a
[FREEMARKER-182]: https://issues.apache.org/jira/browse/FREEMARKER-182
[FREEMARKER-188]: https://issues.apache.org/jira/browse/FREEMARKER-188
[FREEMARKER-195]: https://issues.apache.org/jira/browse/FREEMARKER-195
[FREEMARKER-199]: https://issues.apache.org/jira/browse/FREEMARKER-199

@@ -34,6 +34,7 @@ freemarker.tools.jsonpath=org.apache.freemarker.generator.tools.jsonpath.JsonPat
freemarker.tools.jsoup=org.apache.freemarker.generator.tools.jsoup.JsoupTool
freemarker.tools.properties=org.apache.freemarker.generator.tools.properties.PropertiesTool
freemarker.tools.system=org.apache.freemarker.generator.tools.system.SystemTool
freemarker.tools.utahparser=org.apache.freemarker.generator.tools.utahparser.UtahParserTool
freemarker.tools.uuid=org.apache.freemarker.generator.tools.uuid.UUIDTool
freemarker.tools.xml=org.apache.freemarker.generator.tools.xml.XmlTool
freemarker.tools.yaml=org.apache.freemarker.generator.tools.snakeyaml.SnakeYamlTool
@@ -0,0 +1,30 @@
server {
listen 443 ssl;
server_name test1-api.company.org;

access_log /var/log/nginx/test1-api.access.log;
error_log /var/log/nginx/test1-api.error.log;

ssl_certificate /etc/nginx/ssl/wildcard.company.org-public.crt;
ssl_certificate_key /etc/nginx/ssl/wildcard.company.org-private.rsa;

include /etc/nginx/includes/FLKPMM.nginx;

proxy_buffers 16 64k;
proxy_buffer_size 128k;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_redirect off;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
proxy_set_header X-TPP-QWAC $ssl_client_cert;

ssl_verify_client optional_no_ca;


location / {
return 403;
}

}
@@ -0,0 +1,27 @@
server {
listen 443 ssl;
server_name test1-application.company.org;

access_log /var/log/nginx/test1-application.access.log;
error_log /var/log/nginx/test1-application.error.log;

ssl_certificate /etc/nginx/ssl/wildcard.company.org-public.crt;
ssl_certificate_key /etc/nginx/ssl/wildcard.company.org-private.rsa;

proxy_buffers 16 64k;
proxy_buffer_size 128k;

location / {
proxy_pass https://osroutercpssl/;
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504;
proxy_redirect off;

proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
}

}


@@ -0,0 +1,12 @@
Groups: 3 Peers: 3 Down peers: 0
Table Tot Paths Act Paths Suppressed History Damp State Pending
inet.0 947 310 0 0 0 0
inet6.0 849 807 0 0 0 0
Peer AS InPkt OutPkt OutQ Flaps Last Up/Dwn State|#Active/Received/Damped...
10.247.68.182 65550 131725 28179233 0 11 6w3d17h Establ
inet.0: 4/5/1
inet6.0: 0/0/0
10.254.166.246 65550 136159 29104942 0 0 6w5d6h Establ
inet.0: 0/0/0
inet6.0: 7/8/1
192.0.2.100 65551 1269381 1363320 0 1 9w5d6h 1/2/3 4/5/6
@@ -0,0 +1,83 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<searches>

<!-- in this case, we have a CSV (space delimited file) so we define the line once, and then reuse it over
and again for each value -->
<search id="QUERY-LINE"><![CDATA[\s*{ipAddress}\s+{numbers}\s+{numbers}\s+{numbers}\s+{numbers}\s+{numbers}\s+{numbersThenText}]]></search>
<search id="inetInline"><![CDATA[{inet} {inet}]]></search>
<search id="inet4"><![CDATA[inet.0:\s*{inet}]]></search>
<search id="inet6"><![CDATA[inet6.0:\s*{inet}]]></search>
<search id="inet"><![CDATA[{numbers}/{numbers}/{numbers}]]></search>

<!-- Some rules for finding text, to make the values a little easier below -->
<search id="numbers"><![CDATA[(\d+)]]></search>
<search id="numbersThenText"><![CDATA[(\d+\S+)]]></search>
<search id="string"><![CDATA[(\S+?)]]></search>
<search id="ipAddress"><![CDATA[(\d+(\.\d+){3})]]></search>
<search id="EOL"><![CDATA[[\n\r]]]></search>
</searches>

<!-- the record starts with a line with an ip address and ends with either an inet6 line, or where the ids are at
the end of the line-->
<delim retain="true">{ipAddress}.*(\/\d+)\s*{EOL}</delim>
<delim>\s*({inet6})</delim>

<!--
This is the last line of the header
-->
<header-delim><![CDATA[Peer\s+AS\s+InPkt]]></header-delim>

<!--
Files look like this:
10.247.68.182 65550 131725 28179233 0 11 6w3d17h Establ
inet.0: 4/5/1
inet6.0: 0/0/0
or
192.0.2.100 65551 1269381 1363320 0 1 9w5d6h 2/3/0 0/0/0
-->
<values>
<!-- here we reuse the line pattern, only we pull out different group values -->
<value id="remoteIp" group="1"><![CDATA[{QUERY-LINE}]]></value>
<value id="uptime" group="8"><![CDATA[{QUERY-LINE}]]></value>

<!-- here we check for values in the inet* lines and use these -->
<value id="activeV4" group="1"><![CDATA[{inet4}]]></value>
<value id="receivedV4" group="2"><![CDATA[{inet4}]]></value>
<value id="accepted_V4" group="3"><![CDATA[{inet4}]]></value>

<value id="activeV6" group="1"><![CDATA[{inet6}]]></value>
<value id="receivedV6" group="2"><![CDATA[{inet6}]]></value>
<value id="accepted_V6" group="3"><![CDATA[{inet6}]]></value>

<!--
here we check for values at the end of the query line, and use these
NOTE: since we only set non-null values, these will not overwrite any values set above
-->
<value id="activeV4" group="9"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>
<value id="receivedV4" group="10"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>
<value id="accepted_V4" group="11"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>
<value id="activeV6" group="12"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>
<value id="receivedV6" group="13"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>
<value id="accepted_V6" group="14"><![CDATA[{QUERY-LINE}\s*{inetInline}]]></value>

</values>
</config>
@@ -0,0 +1,34 @@
<#compress>
|| FILE || SERVER || ACCESSLOG || SPLUNK ||
<#list dataSources as dataSource>
<#assign fileName = dataSource.fileName>
<#assign serverName = "N.A">
<#-- Transform to a single line to avoid matching OS-specific line endings -->
<#assign text = dataSource.getText()?replace("\r", "")?replace("\n", " ")>
<#assign accessLog = getAccessLog(text)>
<#assign serverName = getServerName(text)>
| ${fileName} | ${serverName} | ${accessLog} | [${splunkSearchUrl(accessLog)}] |
</#list>
</#compress>
<#--------------------------------------------------------------------------->
<#function splunkSearchUrl accessLog>
<#return "https://splunk.p.santanderconsumer.at/en-US/app/scbdevteam/search?q=search%20source%3D%22${accessLog?url}%22">
</#function>
<#--------------------------------------------------------------------------->
<#function getAccessLog text>
<#assign matches = text?matches(r".*access_log\s*([\w\.\-\/\\]*);.*")>
<#if matches>
<#return matches?groups[1]?trim>
<#else>
<#return "N.A.">
</#if>
</#function>
<#--------------------------------------------------------------------------->
<#function getServerName text>
<#assign matches = text?matches(r".*server_name\s*([\w\.\-\\]*);.*")>
<#if matches>
<#return matches?groups[1]?trim>
<#else>
<#return "N.A.">
</#if>
</#function>
@@ -0,0 +1,32 @@
<#--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<#-- Setup Utah-Parser and parse all records to determine the headers -->
<#assign conf = tools.utahparser.getConfig(dataSources[0])>
<#assign parser = tools.utahparser.getParser(conf, dataSources[1])>
<#assign records = parser.toList()>
<#assign headers = tools.utahparser.getHeaders(records)>
<#-- Setup CSVPrinter -->
<#assign defaultCsvformat = tools.csv.formats[CSV_TARGET_FORMAT!"DEFAULT"]>
<#assign csvDelimiter = tools.csv.toDelimiter(CSV_TARGET_DELIMITER!defaultCsvformat.getDelimiter())>
<#assign cvsFormat = defaultCsvformat.withHeader(headers).withDelimiter(csvDelimiter)>
<#assign csvPrinter = tools.csv.printer(cvsFormat)>
<#-- Print records as CSV -->
<#compress>
<#list records as record>
${csvPrinter.printRecord(record, headers)}
</#list>
</#compress>
@@ -0,0 +1,19 @@
<#--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<#assign conf = tools.utahparser.getConfig(dataSources[0])>
<#assign parser = tools.utahparser.getParser(conf, dataSources[1])>
${tools.gson.toJson(parser.toList())}

0 comments on commit 20d7f13

Please sign in to comment.