diff --git a/config.json b/config.json index 25db9e3..ac78c7f 100644 --- a/config.json +++ b/config.json @@ -354,6 +354,14 @@ "prerequisites": [], "difficulty": 8 }, + { + "slug": "word-count", + "name": "Word Count", + "uuid": "67ef66f1-4a18-42ba-86ca-59c296487c0d", + "practices": [], + "prerequisites": [], + "difficulty": 8 + }, { "slug": "yacht", "name": "Yacht", diff --git a/exercises/practice/word-count/.docs/instructions.append.md b/exercises/practice/word-count/.docs/instructions.append.md new file mode 100644 index 0000000..8b2019d --- /dev/null +++ b/exercises/practice/word-count/.docs/instructions.append.md @@ -0,0 +1,25 @@ +# SQLite-specific instructions + +The **result** column should contain JSON-encoded object of objects, each word as key and count as integer. + +For example, consider the sentence above: `"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` +The **result** for this input should be: + +```json +{"123":1,"agent":1,"cried":1,"fled":1,"i":1,"password":2,"so":1,"special":1,"that's":1,"the":2} +``` + +## Table Schema + +```sql +CREATE TABLE "word-count" ( + sentence TEXT NOT NULL, + result TEXT -- json object +); +``` + +## JSON documentation + +[JSON Functions And Operators][json-docs] + +[json-docs]: https://www.sqlite.org/json1.html diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md new file mode 100644 index 0000000..064393c --- /dev/null +++ b/exercises/practice/word-count/.docs/instructions.md @@ -0,0 +1,47 @@ +# Instructions + +Your task is to count how many times each word occurs in a subtitle of a drama. + +The subtitles from these dramas use only ASCII characters. + +The characters often speak in casual English, using contractions like _they're_ or _it's_. +Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word. + +Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " "). +The only punctuation that does not separate words is the apostrophe in contractions. + +Numbers are considered words. +If the subtitles say _It costs 100 dollars._ then _100_ will be its own word. + +Words are case insensitive. +For example, the word _you_ occurs three times in the following sentence: + +> You come back, you hear me? DO YOU HEAR ME? + +The ordering of the word counts in the results doesn't matter. + +Here's an example that incorporates several of the elements discussed above: + +- simple words +- contractions +- numbers +- case insensitive words +- punctuation (including apostrophes) to separate words +- different forms of whitespace to separate words + +`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` + +The mapping for this subtitle would be: + +```text +123: 1 +agent: 1 +cried: 1 +fled: 1 +i: 1 +password: 2 +so: 1 +special: 1 +that's: 1 +the: 2 +``` diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md new file mode 100644 index 0000000..1654508 --- /dev/null +++ b/exercises/practice/word-count/.docs/introduction.md @@ -0,0 +1,8 @@ +# Introduction + +You teach English as a foreign language to high school students. + +You've decided to base your entire curriculum on TV shows. +You need to analyze which words are used, and how often they're repeated. + +This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes. diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json new file mode 100644 index 0000000..2edde19 --- /dev/null +++ b/exercises/practice/word-count/.meta/config.json @@ -0,0 +1,18 @@ +{ + "authors": [ + "jimmytty" + ], + "files": { + "solution": [ + "word-count.sql" + ], + "test": [ + "word-count_test.sql" + ], + "example": [ + ".meta/example.sql" + ] + }, + "blurb": "Given a phrase, count the occurrences of each word in that phrase.", + "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour." +} diff --git a/exercises/practice/word-count/.meta/example.sql b/exercises/practice/word-count/.meta/example.sql new file mode 100644 index 0000000..aca1c2c --- /dev/null +++ b/exercises/practice/word-count/.meta/example.sql @@ -0,0 +1,32 @@ +UPDATE "word-count" + SET result = ( + WITH RECURSIVE to_words (string, word) AS ( + VALUES(( + WITH RECURSIVE mark_sep (string, letter) AS ( + VALUES(LOWER(sentence), '') + UNION ALL + SELECT SUBSTRING(string, 2), CASE + WHEN GLOB('[0-9a-z'']', SUBSTRING(string, 1, 1)) + THEN SUBSTRING(string, 1, 1) + ELSE CHAR(10) + END + FROM mark_sep + WHERE string <> '' + ) SELECT GROUP_CONCAT(letter, '') || CHAR(10) AS string FROM mark_sep + ), NULL) + UNION ALL + SELECT SUBSTRING( + string, INSTR(string, CHAR(10)) + 1), + TRIM(SUBSTRING(string, 1, INSTR(string, CHAR(10)) - 1), '''' + ) + FROM to_words + WHERE string <> '' + ) + SELECT JSON_GROUP_OBJECT(word, count) + FROM ( + SELECT word, COUNT(*) count + FROM to_words + WHERE word NOT IN ('', CHAR(10)) + GROUP BY word + ) + ); diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml new file mode 100644 index 0000000..1be425b --- /dev/null +++ b/exercises/practice/word-count/.meta/tests.toml @@ -0,0 +1,57 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[61559d5f-2cad-48fb-af53-d3973a9ee9ef] +description = "count one word" + +[5abd53a3-1aed-43a4-a15a-29f88c09cbbd] +description = "count one of each word" + +[2a3091e5-952e-4099-9fac-8f85d9655c0e] +description = "multiple occurrences of a word" + +[e81877ae-d4da-4af4-931c-d923cd621ca6] +description = "handles cramped lists" + +[7349f682-9707-47c0-a9af-be56e1e7ff30] +description = "handles expanded lists" + +[a514a0f2-8589-4279-8892-887f76a14c82] +description = "ignore punctuation" + +[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e] +description = "include numbers" + +[dac6bc6a-21ae-4954-945d-d7f716392dbf] +description = "normalize case" + +[4185a902-bdb0-4074-864c-f416e42a0f19] +description = "with apostrophes" +include = false + +[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3] +description = "with apostrophes" +reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19" + +[be72af2b-8afe-4337-b151-b297202e4a7b] +description = "with quotations" + +[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6] +description = "substrings from the beginning" + +[c5f4ef26-f3f7-4725-b314-855c04fb4c13] +description = "multiple spaces not detected as a word" + +[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360] +description = "alternating word separators not detected as a word" + +[6d00f1db-901c-4bec-9829-d20eb3044557] +description = "quotation for word with apostrophe" diff --git a/exercises/practice/word-count/create_fixture.sql b/exercises/practice/word-count/create_fixture.sql new file mode 100644 index 0000000..dbbca73 --- /dev/null +++ b/exercises/practice/word-count/create_fixture.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS "word-count"; +CREATE TABLE "word-count" ( + sentence TEXT NOT NULL, + result TEXT -- json object +); + +.mode csv +.import ./data.csv "word-count" + +UPDATE "word-count" SET sentence = REPLACE(sentence, '\n', CHAR(10)); +UPDATE "word-count" SET result = NULL; diff --git a/exercises/practice/word-count/create_test_table.sql b/exercises/practice/word-count/create_test_table.sql new file mode 100644 index 0000000..68caa09 --- /dev/null +++ b/exercises/practice/word-count/create_test_table.sql @@ -0,0 +1,34 @@ +DROP TABLE IF EXISTS tests; +CREATE TABLE IF NOT EXISTS tests ( + -- uuid and description are taken from the test.toml file + uuid TEXT PRIMARY KEY, + description TEXT NOT NULL, + -- The following section is needed by the online test-runner + status TEXT DEFAULT 'fail', + message TEXT, + output TEXT, + test_code TEXT, + task_id INTEGER DEFAULT NULL, + -- Here are columns for the actual tests + sentence TEXT NOT NULL, + expected TEXT NOT NULL -- json object +); + +INSERT INTO tests (uuid, description, sentence, expected) + VALUES + ('61559d5f-2cad-48fb-af53-d3973a9ee9ef', 'count one word', 'word', '{"word":1}'), + ('5abd53a3-1aed-43a4-a15a-29f88c09cbbd', 'count one of each word', 'one of each', '{"one":1,"of":1,"each":1}'), + ('2a3091e5-952e-4099-9fac-8f85d9655c0e', 'multiple occurrences of a word', 'one fish two fish red fish blue fish', '{"one":1,"fish":4,"two":1,"red":1,"blue":1}'), + ('e81877ae-d4da-4af4-931c-d923cd621ca6', 'handles cramped lists', 'one,two,three', '{"one":1,"two":1,"three":1}'), + ('7349f682-9707-47c0-a9af-be56e1e7ff30', 'handles expanded lists', 'one,\ntwo,\nthree', '{"one":1,"two":1,"three":1}'), + ('a514a0f2-8589-4279-8892-887f76a14c82', 'ignore punctuation', 'car: carpet as java: javascript!!&@$%^&', '{"car":1,"carpet":1,"as":1,"java":1,"javascript":1}'), + ('d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e', 'include numbers', 'testing, 1, 2 testing', '{"testing":2,"1":1,"2":1}'), + ('dac6bc6a-21ae-4954-945d-d7f716392dbf', 'normalize case', 'go Go GO Stop stop', '{"go":3,"stop":2}'), + ('4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3', 'with apostrophes', '''First: don''t laugh. Then: don''t cry. You''re getting it.''', '{"first":1,"don''t":2,"laugh":1,"then":1,"cry":1,"you''re":1,"getting":1,"it":1}'), + ('be72af2b-8afe-4337-b151-b297202e4a7b', 'with quotations', 'Joe can''t tell between ''large'' and large.', '{"joe":1,"can''t":1,"tell":1,"between":1,"large":2,"and":1}'), + ('8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6', 'substrings from the beginning', 'Joe can''t tell between app, apple and a.', '{"joe":1,"can''t":1,"tell":1,"between":1,"app":1,"apple":1,"and":1,"a":1}'), + ('c5f4ef26-f3f7-4725-b314-855c04fb4c13', 'multiple spaces not detected as a word', ' multiple whitespaces', '{"multiple":1,"whitespaces":1}'), + ('50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360', 'alternating word separators not detected as a word', ',\n,one,\n ,two \n ''three''', '{"one":1,"two":1,"three":1}'), + ('6d00f1db-901c-4bec-9829-d20eb3044557', 'quotation for word with apostrophe', 'can, can''t, ''can''t''', '{"can":1,"can''t":2}'); + +UPDATE tests SET sentence = REPLACE(sentence, '\n', CHAR(10)); diff --git a/exercises/practice/word-count/data.csv b/exercises/practice/word-count/data.csv new file mode 100644 index 0000000..ee1b5e4 --- /dev/null +++ b/exercises/practice/word-count/data.csv @@ -0,0 +1,15 @@ +"word","" +"one of each","" +"one fish two fish red fish blue fish","" +"one,two,three","" +"one,\ntwo,\nthree","" +"car: carpet as java: javascript!!&@$%^&","" +"testing, 1, 2 testing","" +"go Go GO Stop stop","" +"First: don't laugh. Then: don't cry.","" +"'First: don't laugh. Then: don't cry. You're getting it.'","" +"Joe can't tell between 'large' and large.","" +"Joe can't tell between app, apple and a.","" +" multiple whitespaces","" +",\n,one,\n ,two \n 'three'","" +"can, can't, 'can't'","" diff --git a/exercises/practice/word-count/word-count.sql b/exercises/practice/word-count/word-count.sql new file mode 100644 index 0000000..493953b --- /dev/null +++ b/exercises/practice/word-count/word-count.sql @@ -0,0 +1,9 @@ +-- Schema: +-- CREATE TABLE "word-count" ( +-- sentence TEXT NOT NULL, +-- result TEXT -- json object +-- ); +-- +-- Task: update the word-count table and set the result based on sentence. +-- * the result column should contain JSON-encoded object of objects, +-- each word as key and count as integer. diff --git a/exercises/practice/word-count/word-count_test.sql b/exercises/practice/word-count/word-count_test.sql new file mode 100644 index 0000000..40fb9d2 --- /dev/null +++ b/exercises/practice/word-count/word-count_test.sql @@ -0,0 +1,40 @@ +-- Create database: +.read ./create_fixture.sql + +-- Read user student solution and save any output as markdown in user_output.md: +.mode markdown +.output user_output.md +.read ./word-count.sql +.output + +-- Create a clean testing environment: +.read ./create_test_table.sql + +-- Comparison of user input and the tests updates the status for each test: +UPDATE tests +SET status = 'pass' +FROM (SELECT sentence, result FROM "word-count") AS actual +WHERE actual.sentence = tests.sentence AND JSON_PATCH(actual.result, tests.expected) = actual.result AND JSON_PATCH(tests.expected, actual.result) = tests.expected; + +-- Update message for failed tests to give helpful information: +UPDATE tests +SET message = ( + 'Result for "' + || tests.sentence + || '"' + || ' is <' || COALESCE(actual.result, 'NULL') + || '> but should be <' || tests.expected || '>' +) +FROM (SELECT sentence, result FROM "word-count") AS actual +WHERE actual.sentence = tests.sentence AND tests.status = 'fail'; + +-- Save results to ./output.json (needed by the online test-runner) +.mode json +.once './output.json' +SELECT description, status, message, output, test_code, task_id +FROM tests; + +-- Display test results in readable form for the student: +.mode table +SELECT description, status, message +FROM tests;