Skip to content

Commit

Permalink
Change enum classes to be capitalized (#107)
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-blanchard committed Apr 10, 2017
1 parent 7ec7c4b commit 93b7c80
Show file tree
Hide file tree
Showing 15 changed files with 230 additions and 200 deletions.
10 changes: 5 additions & 5 deletions chardet/charsetgroupprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,22 @@ def feed(self, byte_str):
state = prober.feed(byte_str)
if not state:
continue
if state == ProbingState.found_it:
if state == ProbingState.FOUND_IT:
self._best_guess_prober = prober
return self.state
elif state == ProbingState.not_me:
elif state == ProbingState.NOT_ME:
prober.active = False
self._active_num -= 1
if self._active_num <= 0:
self._state = ProbingState.not_me
self._state = ProbingState.NOT_ME
return self.state
return self.state

def get_confidence(self):
state = self.state
if state == ProbingState.found_it:
if state == ProbingState.FOUND_IT:
return 0.99
elif state == ProbingState.not_me:
elif state == ProbingState.NOT_ME:
return 0.01
best_conf = 0.0
self._best_guess_prober = None
Expand Down
2 changes: 1 addition & 1 deletion chardet/charsetprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def __init__(self, lang_filter=None):
self.logger = logging.getLogger(__name__)

def reset(self):
self._state = ProbingState.detecting
self._state = ProbingState.DETECTING

@property
def charset_name(self):
Expand Down
4 changes: 2 additions & 2 deletions chardet/codingstatemachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,13 @@ def __init__(self, sm):
self.reset()

def reset(self):
self._curr_state = MachineState.start
self._curr_state = MachineState.START

def next_state(self, c):
# for each byte we get its class
# if it is first byte, we also get byte length
byte_class = self._model['class_table'][c]
if self._curr_state == MachineState.start:
if self._curr_state == MachineState.START:
self._curr_byte_pos = 0
self._curr_char_len = self._model['char_len_table'][byte_class]
# from byte's class and state_table, we get its next state
Expand Down
63 changes: 46 additions & 17 deletions chardet/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,39 +9,68 @@ class InputState(object):
"""
This enum represents the different states a universal detector can be in.
"""
pure_ascii = 0
esc_ascii = 1
high_byte = 2
PURE_ASCII = 0
ESC_ASCII = 1
HIGH_BYTE = 2


class LanguageFilter(object):
"""
This enum represents the different language filters we can apply to a
``UniversalDetector``.
"""
chinese_simplified = 0x01
chinese_traditional = 0x02
japanese = 0x04
korean = 0x08
non_cjk = 0x10
all = 0x1F
chinese = chinese_simplified | chinese_traditional
cjk = chinese | japanese | korean
CHINESE_SIMPLIFIED = 0x01
CHINESE_TRADITIONAL = 0x02
JAPANESE = 0x04
KOREAN = 0x08
NON_CJK = 0x10
ALL = 0x1F
CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
CJK = CHINESE | JAPANESE | KOREAN


class ProbingState(object):
"""
This enum represents the different states a prober can be in.
"""
detecting = 0
found_it = 1
not_me = 2
DETECTING = 0
FOUND_IT = 1
NOT_ME = 2


class MachineState(object):
"""
This enum represents the different states a state machine can be in.
"""
start = 0
error = 1
its_me = 2
START = 0
ERROR = 1
ITS_ME = 2


class SequenceLikelihood(object):
"""
This enum represents the likelihood of a character following the previous one.
"""
NEGATIVE = 0
UNLIKELY = 1
LIKELY = 2
POSITIVE = 3

@classmethod
def get_num_categories(cls):
""":returns: The number of likelihood categories in the enum."""
return 4


class CharacterCategory(object):
"""
This enum represents the different categories language models for
``SingleByteCharsetProber`` put characters into.
Anything less than CONTROL is considered a letter.
"""
UNDEFINED = 255
LINE_BREAK = 254
SYMBOL = 253
DIGIT = 252
CONTROL = 251
14 changes: 7 additions & 7 deletions chardet/escprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ class EscCharSetProber(CharSetProber):
def __init__(self, lang_filter=None):
super(EscCharSetProber, self).__init__(lang_filter=lang_filter)
self.coding_sm = []
if self.lang_filter & LanguageFilter.chinese_simplified:
if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED:
self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL))
self.coding_sm.append(CodingStateMachine(ISO2022CN_SM_MODEL))
if self.lang_filter & LanguageFilter.japanese:
if self.lang_filter & LanguageFilter.JAPANESE:
self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL))
if self.lang_filter & LanguageFilter.korean:
if self.lang_filter & LanguageFilter.KOREAN:
self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL))
self.active_sm_count = None
self._detected_charset = None
Expand Down Expand Up @@ -80,14 +80,14 @@ def feed(self, byte_str):
if not coding_sm or not coding_sm.active:
continue
coding_state = coding_sm.next_state(c)
if coding_state == MachineState.error:
if coding_state == MachineState.ERROR:
coding_sm.active = False
self.active_sm_count -= 1
if self.active_sm_count <= 0:
self._state = ProbingState.not_me
self._state = ProbingState.NOT_ME
return self.state
elif coding_state == MachineState.its_me:
self._state = ProbingState.found_it
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
self._detected_charset = coding_sm.get_coding_state_machine()
return self.state

Expand Down
56 changes: 28 additions & 28 deletions chardet/escsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
)

HZ_ST = (
MachineState.start,MachineState.error, 3,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,# 00-07
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 08-0f
MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.start,MachineState.start, 4,MachineState.error,# 10-17
5,MachineState.error, 6,MachineState.error, 5, 5, 4,MachineState.error,# 18-1f
4,MachineState.error, 4, 4, 4,MachineState.error, 4,MachineState.error,# 20-27
4,MachineState.its_me,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 28-2f
MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17
5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f
4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27
4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f
)

HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
Expand Down Expand Up @@ -115,14 +115,14 @@
)

ISO2022CN_ST = (
MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 00-07
MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 08-0f
MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 10-17
MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,# 18-1f
MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 20-27
5, 6,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 28-2f
MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 30-37
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.start,# 38-3f
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27
5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f
)

ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
Expand Down Expand Up @@ -169,15 +169,15 @@
)

ISO2022JP_ST = (
MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 00-07
MachineState.start,MachineState.start,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 08-0f
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 10-17
MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,# 18-1f
MachineState.error, 5,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,MachineState.error,# 20-27
MachineState.error,MachineState.error,MachineState.error, 6,MachineState.its_me,MachineState.error,MachineState.its_me,MachineState.error,# 28-2f
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,# 30-37
MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error,MachineState.error,# 38-3f
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.error,MachineState.start,MachineState.start,# 40-47
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07
MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47
)

ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
Expand Down Expand Up @@ -224,11 +224,11 @@
)

ISO2022KR_ST = (
MachineState.start, 3,MachineState.error,MachineState.start,MachineState.start,MachineState.start,MachineState.error,MachineState.error,# 00-07
MachineState.error,MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.its_me,MachineState.its_me,MachineState.its_me,# 08-0f
MachineState.its_me,MachineState.its_me,MachineState.error,MachineState.error,MachineState.error, 4,MachineState.error,MachineState.error,# 10-17
MachineState.error,MachineState.error,MachineState.error,MachineState.error, 5,MachineState.error,MachineState.error,MachineState.error,# 18-1f
MachineState.error,MachineState.error,MachineState.error,MachineState.its_me,MachineState.start,MachineState.start,MachineState.start,MachineState.start,# 20-27
MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f
MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f
MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27
)

ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
Expand Down
14 changes: 7 additions & 7 deletions chardet/eucjpprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ def feed(self, byte_str):
for i in range(len(byte_str)):
# PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.error:
if coding_state == MachineState.ERROR:
self.logger.debug('%s prober hit error at byte %s',
self.charset_name, i)
self._state = ProbingState.not_me
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.its_me:
self._state = ProbingState.found_it
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.start:
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
Expand All @@ -75,10 +75,10 @@ def feed(self, byte_str):

self._last_char[0] = byte_str[-1]

if self.state == ProbingState.detecting:
if self.state == ProbingState.DETECTING:
if (self.context_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.found_it
self._state = ProbingState.FOUND_IT

return self.state

Expand Down
16 changes: 8 additions & 8 deletions chardet/hebrewprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,9 @@ def feed(self, byte_str):
# We automatically filter out all 7-bit characters (replace them with
# spaces) so the word boundary detection works properly. [MAP]

if self.state == ProbingState.not_me:
if self.state == ProbingState.NOT_ME:
# Both model probers say it's not them. No reason to continue.
return ProbingState.not_me
return ProbingState.NOT_ME

byte_str = self.filter_high_byte_only(byte_str)

Expand All @@ -249,8 +249,8 @@ def feed(self, byte_str):
self._prev = cur

# Forever detecting, till the end or until both model probers return
# ProbingState.not_me (handled above)
return ProbingState.detecting
# ProbingState.NOT_ME (handled above)
return ProbingState.DETECTING

@property
def charset_name(self):
Expand Down Expand Up @@ -282,7 +282,7 @@ def charset_name(self):
@property
def state(self):
# Remain active as long as any of the model probers are active.
if (self._logical_prober.state == ProbingState.not_me) and \
(self._visual_prober.state == ProbingState.not_me):
return ProbingState.not_me
return ProbingState.detecting
if (self._logical_prober.state == ProbingState.NOT_ME) and \
(self._visual_prober.state == ProbingState.NOT_ME):
return ProbingState.NOT_ME
return ProbingState.DETECTING
4 changes: 2 additions & 2 deletions chardet/latin1prober.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,15 +116,15 @@ def feed(self, byte_str):
freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM)
+ char_class]
if freq == 0:
self._state = ProbingState.not_me
self._state = ProbingState.NOT_ME
break
self._freq_counter[freq] += 1
self._last_char_class = char_class

return self.state

def get_confidence(self):
if self.state == ProbingState.not_me:
if self.state == ProbingState.NOT_ME:
return 0.01

total = sum(self._freq_counter)
Expand Down
14 changes: 7 additions & 7 deletions chardet/mbcharsetprober.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ def charset_name(self):
def feed(self, byte_str):
for i in range(len(byte_str)):
coding_state = self.coding_sm.next_state(byte_str[i])
if coding_state == MachineState.error:
if coding_state == MachineState.ERROR:
self.logger.debug('%s prober hit error at byte %s',
self.charset_name, i)
self._state = ProbingState.not_me
self._state = ProbingState.NOT_ME
break
elif coding_state == MachineState.its_me:
self._state = ProbingState.found_it
elif coding_state == MachineState.ITS_ME:
self._state = ProbingState.FOUND_IT
break
elif coding_state == MachineState.start:
elif coding_state == MachineState.START:
char_len = self.coding_sm.get_current_charlen()
if i == 0:
self._last_char[1] = byte_str[0]
Expand All @@ -76,10 +76,10 @@ def feed(self, byte_str):

self._last_char[0] = byte_str[-1]

if self.state == ProbingState.detecting:
if self.state == ProbingState.DETECTING:
if (self.distribution_analyzer.got_enough_data() and
(self.get_confidence() > self.SHORTCUT_THRESHOLD)):
self._state = ProbingState.found_it
self._state = ProbingState.FOUND_IT

return self.state

Expand Down

0 comments on commit 93b7c80

Please sign in to comment.