From 015690161243956fcf253b4b4d4e92a840aa85b7 Mon Sep 17 00:00:00 2001 From: Ashwin Nair <68902907+reversingentropy@users.noreply.github.com> Date: Thu, 9 Jun 2022 18:16:32 +0800 Subject: [PATCH] Update helpFuncs.py I have faced issues with PDs such as these : ['PD AUG.','PD SEPT','PD AUG-SEPT', 'PD AUG15'] 1)The delimiter can be a fullstop instead of a -. AUG15 and SEPT is not in the dictionary. This code would take the word after 'PD', with the length of 3. --- metaknowledge/WOS/tagProcessing/helpFuncs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/metaknowledge/WOS/tagProcessing/helpFuncs.py b/metaknowledge/WOS/tagProcessing/helpFuncs.py index 5446afe..13e2ad3 100644 --- a/metaknowledge/WOS/tagProcessing/helpFuncs.py +++ b/metaknowledge/WOS/tagProcessing/helpFuncs.py @@ -1,3 +1,5 @@ +import re + #Written by Reid McIlroy-Young for Dr. John McLevey, University of Waterloo 2015 monthDict = {'SPR': 3, 'SUM': 6, 'FAL': 9, 'WIN': 12, 'JAN' : 1, 'FEB' : 2, 'MAR' : 3, 'APR' : 4, 'MAY' : 5, 'JUN' : 6 , 'JUL' : 7, 'AUG' : 8, 'SEP' : 9, 'OCT' : 10, 'NOV' : 11, 'DEC' : 12} @@ -12,7 +14,7 @@ def getMonth(s): Month Year ("%b %Y") Year Month Day ("%Y %m %d") """ - monthOrSeason = s.split('-')[0].upper() + monthOrSeason = [x for x in re.split(r'[ |.|-]',s)][1][:3].upper() if monthOrSeason in monthDict: return monthDict[monthOrSeason] else: