diff --git a/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md index b84685cf0c..d393e41185 100644 --- a/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md @@ -36,14 +36,14 @@ It's because some products have variants with different prices. Later in the cou Ideally we'd go and discuss the problem with those who are about to use the resulting data. For their purposes, is the fact that some prices are just minimum prices important? What would be the most useful representation of the range for them? Maybe they'd tell us that it's okay if we just remove the `From` prefix? ```js -const priceText = price.text().replace("From ", ""); +const priceText = $price.text().replace("From ", ""); ``` In other cases, they'd tell us the data must include the range. And in cases when we just don't know, the safest option is to include all the information we have and leave the decision on what's important to later stages. One approach could be having the exact and minimum prices as separate values. If we don't know the exact price, we leave it empty: ```js const priceRange = { minPrice: null, price: null }; -const priceText = price.text() +const priceText = $price.text() if (priceText.startsWith("From ")) { priceRange.minPrice = priceText.replace("From ", ""); } else { @@ -71,14 +71,14 @@ if (response.ok) { const $ = cheerio.load(html); $(".product-item").each((i, element) => { - const productItem = $(element); + const $productItem = $(element); - const title = productItem.find(".product-item__title"); - const titleText = title.text(); + const $title = $productItem.find(".product-item__title"); + const title = $title.text(); - const price = productItem.find(".price").contents().last(); + const $price = $productItem.find(".price").contents().last(); const priceRange = { minPrice: null, price: null }; - const priceText = price.text(); + const priceText = $price.text(); if (priceText.startsWith("From ")) { priceRange.minPrice = priceText.replace("From ", ""); } else { @@ -86,7 +86,7 @@ if (response.ok) { priceRange.price = priceRange.minPrice; } - console.log(`${titleText} | ${priceRange.minPrice} | ${priceRange.price}`); + console.log(`${title} | ${priceRange.minPrice} | ${priceRange.price}`); }); } else { throw new Error(`HTTP ${response.status}`); @@ -100,9 +100,9 @@ Often, the strings we extract from a web page start or end with some amount of w We call the operation of removing whitespace _trimming_ or _stripping_, and it's so useful in many applications that programming languages and libraries include ready-made tools for it. Let's add JavaScript's built-in [.trim()](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/trim): ```js -const titleText = title.text().trim(); +const title = $title.text().trim(); -const priceText = price.text().trim(); +const priceText = $price.text().trim(); ``` ## Removing dollar sign and commas @@ -124,7 +124,7 @@ The demonstration above is inside the Node.js' [interactive REPL](https://nodejs We need to remove the dollar sign and the decimal commas. For this type of cleaning, [regular expressions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions) are often the best tool for the job, but in this case [`.replace()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace) is also sufficient: ```js -const priceText = price +const priceText = $price .text() .trim() .replace("$", "") @@ -137,7 +137,7 @@ Now we should be able to add `parseFloat()`, so that we have the prices not as a ```js const priceRange = { minPrice: null, price: null }; -const priceText = price.text() +const priceText = $price.text() if (priceText.startsWith("From ")) { priceRange.minPrice = parseFloat(priceText.replace("From ", "")); } else { @@ -156,7 +156,7 @@ Great! Only if we didn't overlook an important pitfall called [floating-point er These errors are small and usually don't matter, but sometimes they can add up and cause unpleasant discrepancies. That's why it's typically best to avoid floating point numbers when working with money. We won't store dollars, but cents: ```js -const priceText = price +const priceText = $price .text() .trim() .replace("$", "") @@ -178,14 +178,14 @@ if (response.ok) { const $ = cheerio.load(html); $(".product-item").each((i, element) => { - const productItem = $(element); + const $productItem = $(element); - const title = productItem.find(".product-item__title"); - const titleText = title.text().trim(); + const $title = $productItem.find(".product-item__title"); + const titleText = $title.text().trim(); - const price = productItem.find(".price").contents().last(); + const $price = $productItem.find(".price").contents().last(); const priceRange = { minPrice: null, price: null }; - const priceText = price + const priceText = $price .text() .trim() .replace("$", "") @@ -199,7 +199,7 @@ if (response.ok) { priceRange.price = priceRange.minPrice; } - console.log(`${titleText} | ${priceRange.minPrice} | ${priceRange.price}`); + console.log(`${title} | ${priceRange.minPrice} | ${priceRange.price}`); }); } else { throw new Error(`HTTP ${response.status}`); @@ -259,15 +259,15 @@ Denon AH-C720 In-Ear Headphones | 236 const $ = cheerio.load(html); $(".product-item").each((i, element) => { - const productItem = $(element); + const $productItem = $(element); - const title = productItem.find(".product-item__title"); - const titleText = title.text().trim(); + const title = $productItem.find(".product-item__title"); + const title = $title.text().trim(); - const unitsText = productItem.find(".product-item__inventory").text(); + const unitsText = $productItem.find(".product-item__inventory").text(); const unitsCount = parseUnitsText(unitsText); - console.log(`${titleText} | ${unitsCount}`); + console.log(`${title} | ${unitsCount}`); }); } else { throw new Error(`HTTP ${response.status}`); @@ -308,15 +308,15 @@ Simplify the code from previous exercise. Use [regular expressions](https://deve const $ = cheerio.load(html); $(".product-item").each((i, element) => { - const productItem = $(element); + const $productItem = $(element); - const title = productItem.find(".product-item__title"); - const titleText = title.text().trim(); + const $title = $productItem.find(".product-item__title"); + const title = $title.text().trim(); - const unitsText = productItem.find(".product-item__inventory").text(); + const unitsText = $productItem.find(".product-item__inventory").text(); const unitsCount = parseUnitsText(unitsText); - console.log(`${titleText} | ${unitsCount}`); + console.log(`${title} | ${unitsCount}`); }); } else { throw new Error(`HTTP ${response.status}`); @@ -370,19 +370,19 @@ Hints: const $ = cheerio.load(html); $("#maincontent ul li").each((i, element) => { - const article = $(element); + const $article = $(element); - const titleText = article + const title = $article .find("h3") .text() .trim(); - const dateText = article + const dateText = $article .find("time") .attr("datetime") .trim(); const date = new Date(dateText); - console.log(`${titleText} | ${date.toDateString()}`); + console.log(`${title} | ${date.toDateString()}`); }); } else { throw new Error(`HTTP ${response.status}`);