-
Notifications
You must be signed in to change notification settings - Fork 0
/
bikeshareFormat4.conf
110 lines (96 loc) · 2.63 KB
/
bikeshareFormat4.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
input { stdin { } }
filter {
## ----- CSV PROCESSING --------
csv {
columns => ["rawDuration","startDate","rawStartStation","endDate","rawEndStation","bikeNum","memberType"]
}
mutate {
remove_field => "message"
add_field => {
"durHours" => 0
"durMin" => 0
"durSec" => 0
"city" => "Washington, DC"
}
}
mutate {
convert => {
"durHours" => "integer"
"durMin" => "integer"
"durSec" => "integer"
}
}
## ----- CLEAN STATION NAMES --------
# remove optinonal station number from end of the station name
grok {
match => [ "rawStartStation", "^%{DATA:startStation}(\ \(%{INT}\))?$"]
remove_field => "rawStartStation"
}
# remove optinonal station number from end of the station name
grok{
match => [ "rawEndStation", "^%{DATA:endStation}(\ \(%{INT}\))?$"]
remove_field => "rawEndStation"
}
## ----- INTERRPET DURATION ------
grok {
match => [ "rawDuration", "(%{INT:durHours:int}h\ ?)?(%{INT:durMin:int}m\ ?)?(%{INT:durSec:int}s)?"]
overwrite => ["durHours", "durMin", "durSec"]
}
ruby {
code => " event['duration'] = (event['durHours'] * 3600 + event['durMin'] * 60 + event['durSec']) / 60.0 "
remove_field => ["rawDuration","durHours","durMin","durSec"]
}
## ----- INTERPRET DATES --------
# event date is the start Date
date {
#2014-10-01 00:01
match => ["startDate", 'YYYY-MM-dd HH:mm']
timezone => "America/New_York"
locale => en
}
# adjust date to ISO 8601
date {
match => ["startDate", 'YYYY-MM-dd HH:mm']
timezone => "America/New_York"
locale => en
target => "startDate"
}
# adjust date to ISO 8601
date {
match => ["endDate", 'YYYY-MM-dd HH:mm']
timezone => "America/New_York"
locale => en
target => "endDate"
}
## ----- Process Geospatial --------
geoEnrich {
database => "/Users/dave/dev/examples/cabi/logstash-attempt/fullDCStations.csv"
source => "startStation"
target => "startLocation"
}
geoEnrich {
database => "/Users/dave/dev/examples/cabi/logstash-attempt/fullDCStations.csv"
source => "endStation"
target => "endLocation"
}
## ------ Behavior ------
ruby {
code => " event['behavior'] = (event['startStation'] == event['endStation']) ? 'Round Trip' : 'Point to Point'"
}
#make all data sources conform to 'Subscriber' and 'Casual' for memberType
ruby {
code => " if (event['memberType'] == 'Registered') then event['memberType'] = 'Subscriber' end "
}
}
output {
# stdout { codec => rubydebug }
stdout { codec => dots }
#stdout { codec => json }
elasticsearch {
index => "bikelog-dc-%{+YYYY}"
index_type => rides
manage_template => false
host => localhost
protocol => http
}
}