Skip to content

Commit

Permalink
feat(helper): ✨ remove links, emails, and handles from descriptions
Browse files Browse the repository at this point in the history
  • Loading branch information
djdembeck committed Aug 17, 2023
1 parent 59a5d5c commit e80819d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
3 changes: 2 additions & 1 deletion src/helpers/authors/audible/ScrapeHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as cheerio from 'cheerio'
import { htmlToText } from 'html-to-text'

import { ApiAuthorOnBook, ApiAuthorProfile, ApiAuthorProfileSchema } from '#config/types'
import cleanupDescription from '#helpers/utils/cleanupDescription'
import fetch from '#helpers/utils/fetchPlus'
import SharedHelper from '#helpers/utils/shared'
import {
Expand Down Expand Up @@ -130,7 +131,7 @@ class ScrapeHelper {
}

// Description
const description = this.getDescription(dom)
const description = cleanupDescription(this.getDescription(dom))
// Genres
const genres = this.helper.collectGenres(
this.asin,
Expand Down
9 changes: 6 additions & 3 deletions src/helpers/books/audible/ApiHelper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import {
AudibleSeries,
AudibleSeriesSchema
} from '#config/types'
import cleanupDescription from '#helpers/utils/cleanupDescription'
import fetch from '#helpers/utils/fetchPlus'
import SharedHelper from '#helpers/utils/shared'
import {
Expand Down Expand Up @@ -269,9 +270,11 @@ class ApiHelper {
}
return authorJson
}),
description: htmlToText(this.audibleResponse['merchandising_summary'], {
wordwrap: false
}).trim(),
description: cleanupDescription(
htmlToText(this.audibleResponse['merchandising_summary'], {
wordwrap: false
})
).trim(),
formatType: this.audibleResponse.format_type,
...(this.categories && {
genres: [...this.getGenres(), ...this.getTags()]
Expand Down
14 changes: 14 additions & 0 deletions src/helpers/utils/cleanupDescription.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// regex to remove any emails
const emailRegex = /([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)/g

// Regex to remove any twitter handles
const twitterRegex = /(^|[^@\w])@(\w{1,15})\b/g

// Regex to remove any urls (with or without http(s))
const urlRegex = /((https?:\/\/)?[\w-]+(\.[\w-]+)+\.?(:\d+)?(\/\S*)?)/gm

const cleanupDescription = (description: string): string => {
return description.replace(emailRegex, '').replace(twitterRegex, '').replace(urlRegex, '').trim()
}

export default cleanupDescription

0 comments on commit e80819d

Please sign in to comment.