-
-
Notifications
You must be signed in to change notification settings - Fork 50
/
CsvFileReader.kt
110 lines (100 loc) · 3.63 KB
/
CsvFileReader.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package com.github.doyaaaaaken.kotlincsv.client
import com.github.doyaaaaaken.kotlincsv.dsl.context.CsvReaderContext
import com.github.doyaaaaaken.kotlincsv.parser.CsvParser
import com.github.doyaaaaaken.kotlincsv.util.CSVFieldNumDifferentException
import com.github.doyaaaaaken.kotlincsv.util.MalformedCSVException
import mu.KotlinLogging
/**
* CSV Reader class, which controls file I/O flow.
*
* @author doyaaaaaken
*/
class CsvFileReader internal constructor(
private val ctx: CsvReaderContext,
reader: Reader
) {
private val logger = KotlinLogging.logger { }
private val reader = BufferedLineReader(reader)
private var rowNum = 0L
private val parser = CsvParser(ctx.quoteChar, ctx.delimiter, ctx.escapeChar)
/**
* read next csv row
* (which may contain multiple lines, because csv fields may contain line feed)
*
* @return return fields in row as List<String>.
* or return null, if all line are already read.
*/
fun readNext(): List<String>? {
return readUntilNextCsvRow("")
}
/**
* read all csv rows as Sequence
*/
fun readAllAsSequence(fieldsNum: Int? = null): Sequence<List<String>> {
var fieldsNumInRow: Int? = fieldsNum
return generateSequence {
readNext()
}.mapIndexedNotNull { idx, row ->
if (fieldsNumInRow == null) fieldsNumInRow = row.size
if (fieldsNumInRow != row.size) {
if (ctx.skipMissMatchedRow) {
logger.info{"skip miss matched row. [csv row num = ${idx + 1}, fields num = ${row.size}, fields num of first row = $fieldsNumInRow]"}
null
} else {
throw CSVFieldNumDifferentException(requireNotNull(fieldsNumInRow), row.size, idx + 1)
}
} else {
row
}
}
}
/**
* read all csv rows as Sequence with header information
*/
fun readAllWithHeaderAsSequence(): Sequence<Map<String, String>> {
val headers = readNext() ?: return emptySequence()
val duplicated = findDuplicate(headers)
if (duplicated != null) throw MalformedCSVException("header '$duplicated' is duplicated")
return readAllAsSequence(headers.size).map { fields -> headers.zip(fields).toMap() }
}
fun close() {
reader.close()
}
/**
* read next csv row (which may contain multiple lines)
*
* @return return fields in row as List<String>.
* or return null, if all line are already read.
*/
private tailrec fun readUntilNextCsvRow(leftOver: String = ""): List<String>? {
val nextLine = reader.readLineWithTerminator()
rowNum++
return if (nextLine == null) {
if (leftOver.isNotEmpty()) {
throw MalformedCSVException("\"$leftOver\" on the tail of file is left on the way of parsing row")
} else {
null
}
} else if (ctx.skipEmptyLine && nextLine.isBlank() && leftOver.isBlank()) {
readUntilNextCsvRow(leftOver)
} else {
val value = if (leftOver.isEmpty()) {
"$nextLine"
} else {
"$leftOver$nextLine"
}
parser.parseRow(value, rowNum) ?: readUntilNextCsvRow("$leftOver$nextLine")
}
}
private fun findDuplicate(headers: List<String>): String? {
val set = mutableSetOf<String>()
headers.forEach { h ->
if (set.contains(h)) {
return h
} else {
set.add(h)
}
}
return null
}
}