This repository has been archived by the owner on Jul 11, 2023. It is now read-only.
/
stream.py
131 lines (100 loc) · 3.24 KB
/
stream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import
from __future__ import unicode_literals
import io
import sys
from tabulator import Stream
print('Parse csv format:')
source = 'data/table.csv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse linear tsv format:')
source = 'data/table.tsv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse json with dicts:')
source = 'file://data/table-dicts.json'
with Stream(source) as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse json with lists:')
source = 'file://data/table-lists.json'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse xls format:')
source = 'data/table.xls'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nParse xlsx format:')
source = 'data/table.xlsx'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
# print('\nLoad from stream scheme:')
source = io.open('data/table.csv', mode='rb')
with Stream(source, headers='row1', format='csv') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nLoad from text scheme:')
source = 'text://id,name\n1,english\n2,中国人\n'
with Stream(source, headers='row1', format='csv') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nLoad from http scheme:')
source = 'https://raw.githubusercontent.com'
source += '/okfn/tabulator-py/master/data/table.csv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nUsage of inline lists:')
source = [['id', 'name'], ['1', 'english'], ('2', '中国人')]
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nUsage of inline lists (keyed):')
source = [{'id': '1', 'name': 'english'}, {'id': '2', 'name': '中国人'}]
with Stream(source) as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nIter with keyed rows representation:')
source = [{'id': '1', 'name': 'english'}, {'id': '2', 'name': '中国人'}]
with Stream(source, headers=1) as stream:
print(stream.headers)
for row in stream.iter(keyed=True):
print(row)
print('\nTable reset and read limit:')
source = 'data/table.csv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
print(stream.read(limit=1))
stream.reset()
print(stream.read(limit=1))
print('\nLate headers (on a second row):')
source = 'data/special/late_headers.csv'
with Stream(source, headers='row2') as stream:
print(stream.headers)
for row in stream:
print(row)
print('\nSpaces in headers:')
source = 'https://raw.githubusercontent.com/datasets/gdp/master/data/gdp.csv'
with Stream(source, headers='row1') as stream:
print(stream.headers)
for row in stream.read(limit=5):
print(row)