-
Notifications
You must be signed in to change notification settings - Fork 40
/
date_funcs.rb
120 lines (94 loc) · 3.1 KB
/
date_funcs.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
require 'index_manager.rb'
module DateFuncs
# Checks if item is a date
def isDate?(field, dataspec)
datefields = dataspec.field_info.select{ |item| item["Type"] == "Date" }
datefields.each do |f|
return true if f["Field Name"] == field
end
return false
end
# Blanks the date field if it matches specified terms
def blank_if_match(f, item, date_field, terms)
terms.each do |t|
item[date_field] = "" if item[date_field].to_s.include?(t)
end
return item
end
# Normalize unknown dates
def handle_unknown_dates(f, item, date_field)
# List of unknown date options
unknown = ["Date unknown", "Unknown", "nodate", "0000-00-00 00:00:00"]
# Check against unknown options
item = blank_if_match(f, item, date_field, unknown)
# Handle blank dates
if (!item[date_field]) || (item[date_field].to_s.empty?)
item[date_field] = ""
end
return item
end
# Handle present dates
def handle_present_dates(f, item, date_field)
# List of words for present
present = ["Present", "Current", "Gegenwart"]
# Blank present dates
return blank_if_match(f, item, date_field, present)
end
# Handle year only dates
def handle_year_only(f, item, date_field)
if item[date_field].to_s.length == 4 || item[date_field].to_s.length == 5
item[date_field] = Date.parse("January " + item[date_field].to_s)
end
return item
end
# Handle foreign dates
def handle_foreign_dates(f, item, date_field)
if !item[date_field].to_s.empty?
former_date = item[date_field]
begin
item[date_field] = Date.parse(normalize_date(item[date_field]).to_s)
rescue
item[date_field] = nil
end
end
return item
end
# Processes dates and handles unknowns
def process_date(f, item)
if f["Type"] == "Date"
# Set date field to symbol or string as needed
date_field = set_name(f["Field Name"], item)
# Handle unknown and present dates
handle_unknown_dates(f, item, date_field)
handle_present_dates(f, item, date_field)
# Handle dates that are parsed correctly
begin
item[date_field] = DateTime.parse(item[date_field])
rescue # Handle dates that aren't parsed correctly
item = handle_year_only(f, item, date_field)
item = handle_foreign_dates(f, item, date_field)
end
end
return item
end
# Normalizes dates in other languages
def normalize_date(date)
# Load in file
package_path = "data_packages/month-names/"
package = DataPackage::Package.new(package_path+"datapackage.json")
file = CSV.parse(File.read(package_path+package.resources[0]["path"]))
# Check for matches in all value rows
file.each_with_index do |row, index|
if index != 0
row.each_with_index do |item, i_index|
# Check all other language row items to see if it is included
if i_index != 0 && date.to_s.include?(item)
return date.sub(item, row[0])
end
end
end
end
# Return input if it gets this far
return date
end
end