forked from sloria/TextBlob
/
test_formats.py
96 lines (74 loc) · 2.74 KB
/
test_formats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
import os
import unittest
import logging
from nose.tools import * # PEP8 asserts
from textblob import formats
from textblob.compat import unicode
logging.basicConfig(level=logging.DEBUG)
HERE = os.path.abspath(os.path.dirname(__file__))
CSV_FILE = os.path.join(HERE, 'data.csv')
JSON_FILE = os.path.join(HERE, "data.json")
TSV_FILE = os.path.join(HERE, "data.tsv")
class TestFormats(unittest.TestCase):
def setUp(self):
pass
def test_detect_csv(self):
format = formats.detect(CSV_FILE)
assert_equal(format, formats.CSV)
def test_detect_json(self):
format = formats.detect(JSON_FILE)
assert_equal(format, formats.JSON)
def test_available(self):
assert_true('csv' in formats.AVAILABLE.keys())
assert_true('json' in formats.AVAILABLE.keys())
assert_true('tsv' in formats.AVAILABLE.keys())
class TestDelimitedFormat(unittest.TestCase):
def test_delimiter_defaults_to_comma(self):
assert_equal(formats.DelimitedFormat.delimiter, ",")
def test_detect(self):
with open(CSV_FILE, 'r') as fp:
stream = fp.read()
assert_true(formats.DelimitedFormat.detect(stream))
with open(JSON_FILE, 'r') as fp:
stream = fp.read()
assert_false(formats.DelimitedFormat.detect(stream))
class TestCSV(unittest.TestCase):
def test_read_from_filename(self):
data = formats.CSV(CSV_FILE)
def test_detect(self):
with open(CSV_FILE, 'r') as fp:
stream = fp.read()
assert_true(formats.CSV.detect(stream))
with open(JSON_FILE, 'r') as fp:
stream = fp.read()
assert_false(formats.CSV.detect(stream))
class TestTSV(unittest.TestCase):
def test_read_from_filename(self):
data = formats.TSV(TSV_FILE)
def test_detect(self):
with open(TSV_FILE, 'r') as fp:
stream = fp.read()
assert_true(formats.TSV.detect(stream))
with open(CSV_FILE, 'r') as fp:
stream = fp.read()
assert_false(formats.TSV.detect(stream))
class TestJSON(unittest.TestCase):
def test_read_from_filename(self):
formats.JSON(JSON_FILE)
def test_detect(self):
with open(JSON_FILE, 'r') as fp:
stream = fp.read()
assert_true(formats.JSON.detect(stream))
with open(CSV_FILE, 'r') as fp:
stream = fp.read()
assert_false(formats.JSON.detect(stream))
def test_to_iterable(self):
d = formats.JSON(JSON_FILE)
logging.debug(d.dict)
data = d.to_iterable()
first = data[0]
text, label = first[0], first[1]
assert_true(isinstance(text, unicode))
if __name__ == '__main__':
unittest.main()