exercism · SleeplessByte · Mar 22, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/config.json b/config.json
@@ -2577,6 +2577,16 @@
           "loops",
           "strings"
         ]
+      },
+      {
+        "slug": "micro-blog",
+        "name": "Micro Blog",
+        "uuid": "ee771d09-33fb-4450-b9c3-d591a4a90a99",
+        "practices": [],
+        "prerequisites": [
+          "strings"
+        ],
+        "difficulty": 2
       }
     ]
   },

diff --git a/exercises/practice/micro-blog/.approaches/config.json b/exercises/practice/micro-blog/.approaches/config.json
@@ -0,0 +1,36 @@
+{
+  "introduction": {
+    "authors": [
+      "Cool-Katt"
+    ]
+  },
+  "approaches": [
+    {
+      "uuid": "ebd5893b-1f62-4634-a086-414338da1842",
+      "slug": "regex",
+      "title": "Regex",
+      "blurb": "Split a Unicode string using a RegEx.",
+      "authors": [
+        "Cool-Katt"
+      ]
+    },
+    {
+      "uuid": "a12fa836-201c-43bb-b7e2-28f441c270db",
+      "slug": "iterators",
+      "title": "Iterators",
+      "blurb": "Split a Unicode string using a string iterator.",
+      "authors": [
+        "Cool-Katt"
+      ]
+    },
+    {
+      "uuid": "c8b58d62-a129-41ad-afa6-cc6afb5b284c",
+      "slug": "intl-segmenter",
+      "title": "Intl.Segmenter",
+      "blurb": "Split a Unicode string using Intl.Segmenter.",
+      "authors": [
+        "Cool-Katt"
+      ]
+    }
+  ]
+}
diff --git a/exercises/practice/micro-blog/.approaches/intl-segmenter/content.md b/exercises/practice/micro-blog/.approaches/intl-segmenter/content.md
@@ -0,0 +1,27 @@
+# Intl.Segmenter
+
+```javascript
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+
+const splitWithSegmenter = (s) =>
+  Array.from(new Intl.Segmenter().segment(String(s)), (x) => x.segment)
+    .slice(0, 5)
+    .join('');
+
+console.log(splitWithSegmenter(string)); // will be "👨‍👨‍👧‍👧💜🤧🤒🏥" - correct, yay!
+```
+
+This solution:
+
+- Uses the [Intl.Segmenter object][segmenter] to split the string by graphemes and form an array from the result.
+- Then it separates the first 5 graphemes.
+- Finally, it joins them back into a string.
+
+<!-- prettier-ignore-start -->
+~~~~exercism/note
+At the time of writing (February 2024) this method is not fully supported by the stable release of the Mozilla Firefox browser.
+However, support for the Intl.Segmenter object is being worked on in the Nightly release of the browser.
+~~~~
+<!-- prettier-ignore-end -->
+
+[segmenter]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/Segmenter
diff --git a/exercises/practice/micro-blog/.approaches/intl-segmenter/snippet.txt b/exercises/practice/micro-blog/.approaches/intl-segmenter/snippet.txt
@@ -0,0 +1,7 @@
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+
+const splitWithSegmenter = (s) =>
+  Array.from(new Intl.Segmenter().segment(String(s)), (x) => x.segment)
+    .slice(0, 5)
+    .join('');
+console.log(splitWithSegmenter(string)); // will be "👨‍👨‍👧‍👧💜🤧🤒🏥" - correct, yay!
diff --git a/exercises/practice/micro-blog/.approaches/introduction.md b/exercises/practice/micro-blog/.approaches/introduction.md
@@ -0,0 +1,64 @@
+# Introduction
+
+As noted in this exercise's introduction, moso built-in Javascript methods for working with strings are Unicode-aware, but work with UTF-16 code units.
+This might not be a problem, if all of the input contains characters represented by one code unit and you might not even notice it.
+Unfortunately, this isn't the case with our micro-blog.
+
+Different approaches we'll compare include:
+
+- Using a `String iterator`
+- Using a `Regular Expression`
+- Using `Intl.Segmenter`
+
+## General guidance
+
+The main part of this exercise is figuring out how to split a Unicode encoded string and count up to 5 characters of it.
+
+## Approach: `String iterator`
+
+```javascript
+function splitWithIterator(string) {
+  return [...string].slice(0, 5).join('');
+}
+```
+
+For more information, and a detailed explanation, check the [`String iterator` approach][iterator].
+
+## Approach: `Regular Expression`
+
+```javascript
+function splitWithRegex(string) {
+  return string.match(/.{0,5}/gu)[0];
+}
+```
+
+For more information, and a detailed explanation, check the [`Regular Expression` approach][regex]
+
+## Other approaches
+
+The aformentioned approaches both use UTF-16 code points, so character made of multiple code units aren't a problem.
+But what about characters made of multiple code _points_, like some emoji?
+
+### Other approach: `Intl.Segmenter`
+
+The `Intl.Segmenter` object enables locale-sensitive string splitting abd by default splits by graphemes,
+so it should work well with symbols like emoji made of multiple code points.
+For more information, and a detailed explanation, check the [`Intl.Segmenter` approach][separator].
+
+## Which approach is the best in terms of performance?
+
+Testing with the following two strings on [JSBench.me][jsbench-me] yielded:
+
+```javascript
+let string1 = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+let string2 = 'The quick brown fox jumps over the lazy dog. It barked.';
+```
+
+- The `String iterator` approach benched fastest.
+- The `RegEx` approach was about 12% slower than the first.
+- The `Intl.Segmenter` approach was the slowest of the three, by a considerable margin.
+
+[iterator]: https://exercism.org/tracks/javascript/exercises/micro-blog/approaches/iterators
+[regex]: https://exercism.org/tracks/javascript/exercises/micro-blog/approaches/regex
+[separator]: https://exercism.org/tracks/javascript/exercises/micro-blog/approaches/intl-segmenter
+[jsbench-me]: https://jsbench.me/
diff --git a/exercises/practice/micro-blog/.approaches/iterators/content.md b/exercises/practice/micro-blog/.approaches/iterators/content.md
@@ -0,0 +1,20 @@
+# Iterators
+
+```javascript
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+let string2 = 'The quick brown fox jumps over the lazy dog. It barked.';
+
+const splitWithIterator = (s) => [...s].slice(0, 5).join('');
+
+console.log(splitWithIterator(string)); // will be "👨‍👨‍👧" - incorrect
+console.log(splitWithIterator(string2)); // will be "‍The q"
+```
+
+This solution:
+
+- Uses [spread syntax][spread] to unpack the string into an array of its characters.
+  - internaly, the spread operator works with iterators to separate the string by its code points.
+- Then it separates the first 5 characters (code points).
+- Finally, it joins them back into a string.
+
+[spread]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_syntax
diff --git a/exercises/practice/micro-blog/.approaches/iterators/snippet.txt b/exercises/practice/micro-blog/.approaches/iterators/snippet.txt
@@ -0,0 +1,4 @@
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+
+const splitWithIterator = (s) => [...s].slice(0, 5).join('');
+console.log(splitWithIterator(string)) // will be "👨‍👨‍👧" - incorrect
diff --git a/exercises/practice/micro-blog/.approaches/regex/content.md b/exercises/practice/micro-blog/.approaches/regex/content.md
@@ -0,0 +1,20 @@
+# Regex
+
+```javascript
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+let string2 = 'The quick brown fox jumps over the lazy dog. It barked.';
+
+const splitWithRegEx = (s) => s.match(/.{0,5}/gu)[0];
+
+console.log(splitWithRegEx(string)); // will be "👨‍👨‍👧" - incorrect
+console.log(splitWithIterator(string2)); // will be "‍The q"
+```
+
+This solution:
+
+- Uses the [String.match() method][match] with a supplied RegEx
+  - The RegEx supplied matches any character `.`, between 0 and 5 times `{0, 5}`. The `u` flag enables Unicode support.
+  - This matches characters by code points as well.
+- Then it returns the first match as the output string.
+
+[match]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/match
diff --git a/exercises/practice/micro-blog/.approaches/regex/snippet.txt b/exercises/practice/micro-blog/.approaches/regex/snippet.txt
@@ -0,0 +1,4 @@
+let string = '👨‍👨‍👧‍👧💜🤧🤒🏥😀';
+
+const splitWithRegEx = (s) => s.match(/.{0,5}/gu)[0];
+console.log(splitWithRegEx(string)); // will be "👨‍👨‍👧" - incorrect
diff --git a/exercises/practice/micro-blog/.docs/instruction.append.md b/exercises/practice/micro-blog/.docs/instruction.append.md
@@ -0,0 +1,33 @@
+# Instruction append
+
+## Unicode code points vs code units.
+
+A "normal" UTF-16 encoded string can be represented as a series of characters, where each character can be up to 16 bits long (hence, the name UTF-16).
+This means there are a maximum of 2¹⁶ (two to the power of sixteen), or 65536 possible characters representable with 16 bits, or 1 code **unit**.
+These 65536 characters form what's known as the [Basic Multilingual Set][basic-multilingual-set], which is large enough for the most common characters of most languages.
+
+However, some symbols, can't fit in just 1 code unit. The solution is to represent them with two code units.
+These two UTF-16 code units, often also reffered to as a _surrogate pair_, form a code **point**.
+
+So, in summary, when reffering to UTF-16 encoding:
+
+- A `code unit` is 16 (or less) bits representing a single character.
+- A `code point` is one or two code units representing a single character.
+
+To add more confusion to the mix, theres also _grapheme clusters_,
+that are basically sequences of Unicode characters (code points) that should be treated as a single visual unit.
+For example, some emojis, like this one 👨‍👦.
+
+## UTF-16 in Javascript
+
+Most built-in Javascript methods will work with UTF-16 encoded strings, however they work based on UTF-16 code units.
+For example, a [`String.prototype.split("")`][split] method will separate a string by code units.
+
+On the other hand, [`String iterators`][iterator] iterate by code points.
+
+You can read a lot more, and find examples about Unicode strings, on [MDN][MDN].
+
+[basic-multilingual-set]: https://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane
+[split]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/split
+[iterator]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/@@iterator
+[MDN]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String#utf-16_characters_unicode_code_points_and_grapheme_clusters
diff --git a/exercises/practice/micro-blog/.docs/instructions.md b/exercises/practice/micro-blog/.docs/instructions.md
@@ -0,0 +1,37 @@
+# Instructions
+
+You have identified a gap in the social media market for very very short posts.
+Now that Twitter allows 280 character posts, people wanting quick social media updates aren't being served.
+You decide to create your own social media network.
+
+To make your product noteworthy, you make it extreme and only allow posts of 5 or less characters.
+Any posts of more than 5 characters should be truncated to 5.
+
+To allow your users to express themselves fully, you allow Emoji and other Unicode.
+
+The task is to truncate input strings to 5 characters.
+
+## Text Encodings
+
+Text stored digitally has to be converted to a series of bytes.
+There are 3 ways to map characters to bytes in common use.
+
+- **ASCII** can encode English language characters.
+  All characters are precisely 1 byte long.
+- **UTF-8** is a Unicode text encoding.
+  Characters take between 1 and 4 bytes.
+- **UTF-16** is a Unicode text encoding.
+  Characters are either 2 or 4 bytes long.
+
+UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including:
+
+- Text in most of the world's languages and scripts
+- Historic text
+- Emoji
+
+UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space.
+
+Consider the letter 'a' and the emoji '😛'.
+In UTF-16 the letter takes 2 bytes but the emoji takes 4 bytes.
+
+The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
diff --git a/exercises/practice/micro-blog/.eslintrc b/exercises/practice/micro-blog/.eslintrc
@@ -0,0 +1,14 @@
+{
+  "root": true,
+  "extends": "@exercism/eslint-config-javascript",
+  "env": {
+    "jest": true
+  },
+  "overrides": [
+    {
+      "files": [".meta/proof.ci.js", ".meta/exemplar.js", "*.spec.js"],
+      "excludedFiles": ["custom.spec.js"],
+      "extends": "@exercism/eslint-config-javascript/maintainers"
+    }
+  ]
+}
diff --git a/exercises/practice/micro-blog/.gitignore b/exercises/practice/micro-blog/.gitignore
@@ -0,0 +1,5 @@
+/node_modules
+/bin/configlet
+/bin/configlet.exe
+/pnpm-lock.yaml
+/yarn.lock
diff --git a/exercises/practice/micro-blog/.meta/config.json b/exercises/practice/micro-blog/.meta/config.json
@@ -0,0 +1,17 @@
+{
+  "authors": [
+    "Cool-Katt"
+  ],
+  "files": {
+    "solution": [
+      "micro-blog.js"
+    ],
+    "test": [
+      "micro-blog.spec.js"
+    ],
+    "example": [
+      ".meta/proof.ci.js"
+    ]
+  },
+  "blurb": "Given an input string, truncate it to 5 characters."
+}
diff --git a/exercises/practice/micro-blog/.meta/proof.ci.js b/exercises/practice/micro-blog/.meta/proof.ci.js
@@ -0,0 +1,4 @@
+export const truncate = (input) =>
+  Array.from(new Intl.Segmenter().segment(String(input)), (x) => x.segment)
+    .slice(0, 5)
+    .join('');
diff --git a/exercises/practice/micro-blog/.meta/tests.toml b/exercises/practice/micro-blog/.meta/tests.toml
@@ -0,0 +1,46 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[b927b57f-7c98-42fd-8f33-fae091dc1efc]
+description = "English language short"
+
+[a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0]
+description = "English language long"
+
+[01910864-8e15-4007-9c7c-ac956c686e60]
+description = "German language short (broth)"
+
+[f263e488-aefb-478f-a671-b6ba99722543]
+description = "German language long (bear carpet → beards)"
+
+[0916e8f1-41d7-4402-a110-b08aa000342c]
+description = "Bulgarian language short (good)"
+
+[bed6b89c-03df-4154-98e6-a61a74f61b7d]
+description = "Greek language short (health)"
+
+[485a6a70-2edb-424d-b999-5529dbc8e002]
+description = "Maths short"
+
+[8b4b7b51-8f48-4fbe-964e-6e4e6438be28]
+description = "Maths long"
+
+[71f4a192-0566-4402-a512-fe12878be523]
+description = "English and emoji short"
+
+[6f0f71f3-9806-4759-a844-fa182f7bc203]
+description = "Emoji short"
+
+[ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e]
+description = "Emoji long"
+
+[5dee98d2-d56e-468a-a1f2-121c3f7c5a0b]
+description = "Royal Flush?"
diff --git a/exercises/practice/micro-blog/.npmrc b/exercises/practice/micro-blog/.npmrc
@@ -0,0 +1 @@
+audit=false
diff --git a/exercises/practice/micro-blog/LICENSE b/exercises/practice/micro-blog/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Exercism
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.