Skip to content

Commit

Permalink
feat: Notations for Java packages and constants
Browse files Browse the repository at this point in the history
fix: also handle Java keywords
  • Loading branch information
jGleitz committed Feb 6, 2020
1 parent 017e3a8 commit 10b4fe1
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 130 deletions.
12 changes: 5 additions & 7 deletions src/main/kotlin/BaseStringNotation.kt
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
package de.joshuagleitze.stringnotation

import java.util.*

/**
* Base class for implementing string notations.
*
* @constructor Creates a string notation that will use the provided regular expression to [split][String.split] parts when parsing.
*/
abstract class BaseStringNotation(private val splitAt: Regex): StringNotation {
/**
* Transforms a parsed part after it has been read. The default implementation is to convert the part to lowercase to discard possibly
* wrong case information.
* Transforms a parsed part after it has been read. The default implementation does not change the part.
*/
protected open fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)
protected open fun transformPartAfterParse(index: Int, part: String) = part

override fun parse(sourceString: String): Word = Word(sourceString.split(splitAt).asSequence().mapIndexed(::transformPartAfterParse))
override fun parse(sourceString: String): Word =
Word(sourceString.split(splitAt).asSequence().filter(String::isNotBlank).mapIndexed(::transformPartAfterParse))

/**
* Allows to transform a part before it is being printed. The default implementation does not modify the part in any way.
Expand All @@ -34,7 +32,7 @@ abstract class BaseStringNotation(private val splitAt: Regex): StringNotation {

override fun print(word: Word) = word.parts
.mapIndexed(::transformPartToPrint)
.foldIndexed(StringBuffer()) { index, left, right -> left.append(printBeforePart(index, right)).append(right) }
.foldIndexed(StringBuffer()) { index, existing, part -> existing.append(printBeforePart(index, part)).append(part) }
.toString()

override fun toString() = this::class.java.simpleName!!
Expand Down
101 changes: 101 additions & 0 deletions src/main/kotlin/JavaNotations.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package de.joshuagleitze.stringnotation

import java.util.*
import java.util.stream.IntStream
import javax.lang.model.SourceVersion

/**
* A notation for Java type names. This notation is like [UpperCamelCase], but when [printing][StringNotation.print], it will drop any
* character that is not allowed in a Java identifier. If the result is a Java keyword, `_` will be appended to it.
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
* using [SourceVersion.isKeyword].
*/
object JavaTypeName: StringNotation by UpperCamelCase {
override fun print(word: Word) = UpperCamelCase.print(word).makeValidJavaIdentifier()

override fun toString() = this::class.java.simpleName!!
}

/**
* A notation for java member names. This notation is like [LowerCamelCase], but when [printing][StringNotation.print], it will drop any
* character that is not allowed in a Java identifier. If the result is a Java keyword, `_` will be appended to it.
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
* using [SourceVersion.isKeyword].
*/
object JavaMemberName: BaseStringNotation(camelCaseSplitRegex) {
override fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

override fun print(word: Word) = word.parts
.foldIndexed(StringBuffer()) { index, left, right ->
val rightPart =
if (left.contains(Regex("[a-zA-Z]"))) right.toFirstUpperOtherLowerCase()
else right.toLowerCase()
left.append(printBeforePart(index, rightPart)).append(rightPart)
}.toString().makeValidJavaIdentifier()
}

/**
* A notation for java package parts. When [printing][StringNotation.print], it simply concatenates all word parts and drops any character
* that is not allowed in a Java identifier. If the result is a Java keyword, `_` will be appended to it. When
* [parsing][StringNotation.parse], the notation will recognise word parts both in the [LowerCamelCase] and the [SnakeCase] notation.
* However, neither notation is conventional and parsing will usually yield only one word part on real-world inputs.
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
* using [SourceVersion.isKeyword].
*/
object JavaPackagePart: BaseStringNotation(Regex("_|${camelCaseSplitRegex.pattern}")) {
override fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

override fun transformPartToPrint(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

override fun print(word: Word) = super.print(word).makeValidJavaIdentifier()
}

/**
* A notation for whole java packages. When [printing][StringNotation.print] parts, it will drop any character that is not allowed in a Java
* identifier. If the result is a Java keyword, `_` will be appended to it.
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
* using [SourceVersion.isKeyword].
*/
object JavaPackageName: BaseStringNotation(Regex("\\.")) {
override fun transformPartToPrint(index: Int, part: String) = part.toLowerCase(Locale.ROOT).makeValidJavaIdentifier()

override fun printBeforeInnerPart(index: Int, part: String) = "."
}

/**
* A notation for `static final` fields in Java. This notation is like [ScreamingSnakeCase], but when [printing][StringNotation.print], it
* will drop any character that is not allowed in a Java identifier. If the result is a Java keyword, `_` will be appended to it.
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart]. Keywords are detected
* using [SourceVersion.isKeyword].
*/
object JavaConstantName: StringNotation by ScreamingSnakeCase {
override fun print(word: Word) = ScreamingSnakeCase.print(word).makeValidJavaIdentifier()

override fun toString() = this::class.java.simpleName!!
}

private fun String.makeValidJavaIdentifier() = this.keepOnlyJavaIdentifierChars().neutralizeJavaReservedKeywords().ifEmpty { "__" }

private fun String.keepOnlyJavaIdentifierChars() = this.chars()
.skipWhile { !Character.isJavaIdentifierStart(it) }
.filter { Character.isJavaIdentifierPart(it) }
.collect({ StringBuilder() }, { left, right -> left.appendCodePoint(right) }, { left, right -> left.append(right) })
.toString()

private fun String.neutralizeJavaReservedKeywords() = if (SourceVersion.isKeyword(this)) this + "_" else this

private inline fun IntStream.skipWhile(crossinline condition: (Int) -> Boolean): IntStream {
var found = false
return this.filter {
if (!found) {
found = !condition(it)
}
found
}
}

56 changes: 8 additions & 48 deletions src/main/kotlin/Notations.kt
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
package de.joshuagleitze.stringnotation

import java.util.stream.IntStream
import java.util.*

private val camelCaseSplitRegex = Regex("(?<=.)(?=\\p{Lu})")
internal val camelCaseSplitRegex = Regex("(?<=.)(?=\\p{Lu})")

/**
* The `UpperCamelCase` notation.
*
* @see JavaTypeName
*/
object UpperCamelCase: BaseStringNotation(camelCaseSplitRegex) {
override fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

public override fun transformPartToPrint(index: Int, part: String) = part.toFirstUpperOtherLowerCase()
}

Expand All @@ -19,13 +21,17 @@ object UpperCamelCase: BaseStringNotation(camelCaseSplitRegex) {
* @see JavaMemberName
*/
object LowerCamelCase: BaseStringNotation(camelCaseSplitRegex) {
override fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

override fun transformPartToPrint(index: Int, part: String) = if (index == 0) part.toLowerCase() else part.toFirstUpperOtherLowerCase()
}

/**
* The `SCREAMING_SNAKE_CASE` notation.
*/
object ScreamingSnakeCase: BaseStringNotation(Regex("_")) {
override fun transformPartAfterParse(index: Int, part: String) = part.toLowerCase(Locale.ROOT)

override fun printBeforeInnerPart(index: Int, part: String) = "_"

override fun transformPartToPrint(index: Int, part: String) = part.toUpperCase()
Expand All @@ -35,63 +41,17 @@ object ScreamingSnakeCase: BaseStringNotation(Regex("_")) {
* The `snake_case` notation.
*/
object SnakeCase: BaseStringNotation(Regex("_")) {
override fun transformPartAfterParse(index: Int, part: String) = part
override fun printBeforeInnerPart(index: Int, part: String) = "_"
}

/**
* A notation for java type names. This notation is like [UpperCamelCase], but will drop any character that is not allowed in a Java
* identifier when [printing][StringNotation.print].
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart].
*/
object JavaTypeName: BaseStringNotation(camelCaseSplitRegex) {
override fun transformPartToPrint(index: Int, part: String) = part.toFirstUpperOtherLowerCase()
override fun print(word: Word) = super.print(word).keepOnlyJavaIdentifierChars()
}

/**
* A notation for java member names. This notation is like [LowerCamelCase], but will drop any character that is not allowed in a Java
* identifier when [printing][StringNotation.print].
*
* Allowed characters are determined using [Character.isJavaIdentifierStart] and [Character.isJavaIdentifierPart].
*/
object JavaMemberName: BaseStringNotation(camelCaseSplitRegex) {
override fun print(word: Word) = word.parts
.foldIndexed(StringBuffer()) { index, left, right ->
val rightPart =
if (left.contains(Regex("[a-zA-Z]"))) right.toFirstUpperOtherLowerCase()
else right.toLowerCase()
left.append(printBeforePart(index, rightPart)).append(rightPart)
}.toString().keepOnlyJavaIdentifierChars()

}

/**
* Notation for words written like in normal language. [Parsing][StringNotation.parse] will recognise all substrings that are separated by
* one or more characters of whitespace as a [part][Word.parts]. [Printing][StringNotation.print] will print the parts separated by one
* space.
*/
object NormalWords: BaseStringNotation(Regex("[\\s]+")) {
override fun transformPartAfterParse(index: Int, part: String) = part
override fun printBeforeInnerPart(index: Int, part: String) = " "
}

internal fun String.toFirstUpperOtherLowerCase() = if (isNotEmpty()) this[0].toUpperCase() + substring(1).toLowerCase() else this

fun String.keepOnlyJavaIdentifierChars() = this.chars()
.skipWhile { !Character.isJavaIdentifierStart(it) }
.filter { Character.isJavaIdentifierPart(it) }
.collect({ StringBuilder() }, { left, right -> left.appendCodePoint(right) }, { left, right -> left.append(right) })
.toString()

internal inline fun IntStream.skipWhile(crossinline condition: (Int) -> Boolean): IntStream {
var found = false
return this.filter {
if (!found) {
found = !condition(it)
}
found
}
}

56 changes: 56 additions & 0 deletions src/test/kotlin/BaseNotationTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package de.joshuagleitze.stringnotation

import ch.tutteli.atrium.api.fluent.en_GB.feature
import ch.tutteli.atrium.api.fluent.en_GB.toBe
import ch.tutteli.atrium.api.verbs.expect
import org.junit.jupiter.api.TestInstance
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.Arguments
import org.junit.jupiter.params.provider.MethodSource

@TestInstance(TestInstance.Lifecycle.PER_CLASS)
abstract class BaseNotationTest(
private val notation: StringNotation,
private val unchangedWords: List<Pair<String, Word>>,
private val parseOnlyWords: List<Pair<String, Word>> = emptyList(),
private val printOnlyWords: List<Pair<Word, String>> = emptyList()
) {
@ParameterizedTest(name = "\"{0}\" -> {1}")
@MethodSource("parseWords")
fun `parses words correctly`(input: String, expectedWord: Word) {
expect(input.fromNotation(notation)) {
feature(Word::partsList).toBe(expectedWord.partsList)
}
}

@ParameterizedTest(name = "{1} -> \"{0}\"")
@MethodSource("printWords")
fun `prints words correctly`(sourceWord: Word, expectedResult: String) {
expect(sourceWord) {
feature(Word::toNotation, notation).toBe(expectedResult)
}
}

@ParameterizedTest(name = "\"{0}\"")
@MethodSource("unchangedWords")
fun `parsing and printing a word written in this notation does not change the word`(word: String) {
expect(word) {
feature(String::fromNotation, notation) {
feature(Word::toNotation, notation).toBe(word)
}
}
}

private fun parseWords() = asArguments(unchangedWords + parseOnlyWords)
private fun printWords() = asArguments(unchangedWords.map { it.swap() } + printOnlyWords)
private fun unchangedWords() = asArguments(unchangedWords)

private fun asArguments(pairs: List<Pair<*, *>>) = pairs.map {
Arguments.arguments(
it.first,
it.second
)
}

private fun <First, Second> Pair<First, Second>.swap() = Pair(this.second, this.first)
}
81 changes: 81 additions & 0 deletions src/test/kotlin/JavaNotationsTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package de.joshuagleitze.stringnotation

class JavaTypeNameTest: BaseNotationTest(
notation = JavaTypeName,
unchangedWords = listOf("ImInJavaTypeNotation" to Word("im", "in", "java", "type", "notation")),
printOnlyWords = listOf(
Word("I’m using", "Bad", "chaRacters!") to "ImusingBadCharacters",
Word("1", "type", "name", "4", "you") to "TypeName4You",
Word("removes", "upperCase") to "RemovesUppercase",
Word("") to "__",
Word("1") to "__",
Word("8if") to "if_",
Word("_") to "__"
)
)

class JavaMemberNameTest: BaseNotationTest(
notation = JavaMemberName,
unchangedWords = listOf("imInJavaMemberNotation" to Word("im", "in", "java", "member", "notation")),
printOnlyWords = listOf(
Word("I’m using", "Bad", "chaRacters!") to "imusingBadCharacters",
Word("1", "Member", "name", "4", "you") to "memberName4You",
Word("_", "underscore", "start") to "_underscoreStart",
Word("$", "dollar", "start") to "\$dollarStart",
Word("a", "letter", "start") to "aLetterStart",
Word("removes", "upperCase") to "removesUppercase",
Word("") to "__",
Word("1") to "__",
Word("8if") to "if_",
Word("_") to "__"
)
)

class JavaPackagePartTest: BaseNotationTest(
notation = JavaPackagePart,
unchangedWords = listOf(
"imapackagename" to Word("imapackagename")
),
parseOnlyWords = listOf(
"withCamelCase" to Word("with", "camel", "case"),
"with_snake_case" to Word("with", "snake", "case"),
"withCamelAnd_snake_case" to Word("with", "camel", "and", "snake", "case"),
"if" to Word("if")
),
printOnlyWords = listOf(
Word("") to "__",
Word("1") to "__",
Word("8if") to "if_",
Word("_") to "__"
)
)

class JavaPackageNameTest: BaseNotationTest(
notation = JavaPackageName,
unchangedWords = listOf("i.am.a.packagename" to Word("i", "am", "a", "packagename")),
parseOnlyWords = listOf(
"wIth.CAPITALS" to Word("wIth", "CAPITALS"),
"if.true" to Word("if", "true")
),
printOnlyWords = listOf(
Word("if", "", "cApitAls") to "if_.__.capitals",
Word("_") to "__"
)

)

class JavaConstantNameTest: BaseNotationTest(
notation = JavaConstantName,
unchangedWords = listOf(
"I_AM_A_CONSTANT" to Word("i", "am", "a", "constant")
),
parseOnlyWords = listOf(
"if" to Word("if")
),
printOnlyWords = listOf(
Word("") to "__",
Word("1") to "__",
Word("8if") to "IF",
Word("_") to "__"
)
)
Loading

0 comments on commit 10b4fe1

Please sign in to comment.