Skip to content

Commit

Permalink
feat(book-audible-api): ♻️ use audible API for series data instead of…
Browse files Browse the repository at this point in the history
… scraping
  • Loading branch information
djdembeck committed Nov 22, 2021
1 parent c281ac3 commit 0c7c7db
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 103 deletions.
32 changes: 26 additions & 6 deletions src/helpers/books/audibleApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import fetch from 'isomorphic-fetch'
// For merchandising_summary
import { htmlToText } from 'html-to-text'
import { AudibleInterface } from '../../interfaces/audible/index'
import { ApiBookInterface } from '../../interfaces/books/index'
import { ApiBookInterface, SeriesInterface } from '../../interfaces/books/index'
import { AuthorInterface, NarratorInterface } from '../../interfaces/people/index'
import SharedHelper from '.././shared'

Expand All @@ -14,7 +14,7 @@ class ApiHelper {
const helper = new SharedHelper()
const baseDomain: string = 'https://api.audible.com'
const baseUrl: string = '1.0/catalog/products'
const params = '?response_groups=contributors,product_desc,product_extended_attrs,product_attrs,media,rating&image_sizes=500,1024'
const params = '?response_groups=contributors,product_desc,product_extended_attrs,product_attrs,media,rating,series&image_sizes=500,1024'
this.reqUrl = helper.buildUrl(asin, baseDomain, baseUrl, params)
}

Expand Down Expand Up @@ -164,10 +164,30 @@ class ApiHelper {
newKey = 'runtimeLengthMin'
optionalKeyHandling(key, newKey)

// SeriesPrimary
key = 'publication_name'
newKey = 'publicationName'
optionalKeyHandling(key, newKey)
// Series
key = 'series'
if (key in inputJson) {
inputJson[key].forEach((series: { asin: string | undefined; title: string; sequence: string | undefined }) => {
const seriesJson = <SeriesInterface>{}
if ('asin' in series) {
seriesJson.asin = series.asin
}
if ('name' in series) {
seriesJson.name = series.title
} else {
console.log(`Series name not available on: ${inputJson.asin}`)
return undefined
}
if ('sequence' in series) {
seriesJson.position = series.sequence
}
if (series.title === inputJson.publication_name!) {
finalJson.seriesPrimary = seriesJson
} else if (inputJson.series.length > 1 && series.title !== inputJson.publication_name) {
finalJson.seriesSecondary = seriesJson
}
})
}

// Subtitle
key = 'subtitle'
Expand Down
65 changes: 2 additions & 63 deletions src/helpers/books/audibleScrape.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// Import interfaces
import { HtmlBookInterface, SeriesInterface } from '../../interfaces/books/index'
import { HtmlBookInterface } from '../../interfaces/books/index'
import { GenreInterface } from '../../interfaces/audible'
import fetch from 'isomorphic-fetch'
// For HTML scraping
Expand Down Expand Up @@ -53,45 +53,6 @@ class ScrapeHelper {
return genreArr
}

/**
* Checks the presence of series' on html page and formats them into JSON
* @param {NodeListOf<Element>} series selected source from seriesLabel
* @param {string} seriesRaw innerHTML of the series node
* @returns {SeriesInterface[]}
*/
collectSeries (series: cheerio.Cheerio<cheerio.Element>[], seriesRaw: string): SeriesInterface[] | undefined {
const bookPos = this.getBookFromHTML(seriesRaw)

// What is the singular of series? Who knows
const seriesArr: SeriesInterface[] | undefined = series.map((serie, index): any => {
const thisSeries = {} as SeriesInterface
let asin: string
let href: string
if (serie.attr('href')) {
href = serie.attr('href')!
asin = this.getAsinFromUrl(href)

if (serie.text()) {
thisSeries.asin = asin
thisSeries.name = serie.text()

if (bookPos && bookPos[0]) {
thisSeries.position = bookPos[0]
}

return thisSeries
} else {
console.log(`Series ${index} name not available on: ${this.asin}`)
}
} else {
console.log(`Series ${index} asin not available on: ${this.asin}`)
}
return undefined
}) as SeriesInterface[]

return seriesArr
}

/**
* Fetches the html page and checks it's response
* @returns {Promise<cheerio.CheerioAPI | undefined>} return text from the html page
Expand Down Expand Up @@ -126,17 +87,12 @@ class ScrapeHelper {
.toArray()
.map(element => dom(element))

const series = dom('li.seriesLabel a')
.toArray()
.map(element => dom(element))

const tags = dom('div.bc-chip-group a')
.toArray()
.map(element => dom(element))

const returnJson = {
genres: Array<GenreInterface>(genres.length + tags.length),
series: Array<SeriesInterface>(series.length)
genres: Array<GenreInterface>(genres.length + tags.length)
} as HtmlBookInterface

// Combine genres and tags
Expand All @@ -153,12 +109,6 @@ class ScrapeHelper {
returnJson.genres = genreArr as GenreInterface[]
}

// Series
if (series.length) {
const seriesRaw = dom('li.seriesLabel').html()!
returnJson.series = this.collectSeries(series, seriesRaw)
}

return returnJson
}

Expand All @@ -173,17 +123,6 @@ class ScrapeHelper {
const ASIN = url.match(asinRegex)![0]
return ASIN
}

/**
* Regex to return just the book position from HTML input
* @param {JSDOM} html block/object to retrieve book number from.
* @returns {string} Cleaned book position string, like "Book 3"
*/
getBookFromHTML (html): string {
const bookRegex = /(Book ?(\d*\.)?\d+[+-]?[\d]?)/gm
const matches = html.match(bookRegex)
return matches
}
}

export default ScrapeHelper
33 changes: 2 additions & 31 deletions src/helpers/books/audibleStitch.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { ApiBookInterface, BookInterface, HtmlBookInterface, SeriesInterface } from '../../interfaces/books/index'
import { ApiBookInterface, BookInterface, HtmlBookInterface } from '../../interfaces/books/index'

class StitchHelper {
apiRes: ApiBookInterface;
Expand All @@ -19,41 +19,12 @@ class StitchHelper {
}
}

/**
* Sets series' keys if they exist
*/
async setSeriesOrder () {
if (this.apiRes.publicationName) {
if (this.htmlRes) {
const htmlSeries = this.htmlRes.series

// If multiple series, set one with seriesPrimary as primary
if (htmlSeries) {
if (htmlSeries.length > 1) {
htmlSeries.forEach((item) => {
if (item.name === this.apiRes.publicationName) {
this.tempJson.seriesPrimary = item
} else {
this.tempJson.seriesSecondary = item
}
})
} else {
this.tempJson.seriesPrimary = htmlSeries[0]
}
}
} else {
this.tempJson.seriesPrimary = { name: this.tempJson.publicationName } as SeriesInterface
}
delete this.tempJson.publicationName
}
}

/**
* Call functions in the class to parse final JSON
* @returns {Promise<BookInterface>}
*/
async process (): Promise<BookInterface> {
Promise.all([this.includeGenres(), this.setSeriesOrder()])
Promise.all([this.includeGenres()])
this.bookJson = this.tempJson
return this.bookJson
}
Expand Down
2 changes: 2 additions & 0 deletions src/interfaces/audible/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/* eslint-disable camelcase */
import { SeriesInterface } from '../books'
import { AuthorInterface, NarratorInterface } from '../people/index'

interface Codecs {
Expand Down Expand Up @@ -56,6 +57,7 @@ export interface AudibleInterface {
rating: Ratings;
release_date: string;
runtime_length_min: number;
series: SeriesInterface[];
social_media_images: any;
subtitle?: string;
thesaurus_subject_keywords: [string]
Expand Down
3 changes: 0 additions & 3 deletions src/interfaces/books/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,4 @@ export interface ApiBookInterface extends CoreBook {
// What we expect to keep from Audible's HTML pages
export interface HtmlBookInterface {
genres?: GenreInterface[];
series?: SeriesInterface[];
seriesPrimary?: SeriesInterface;
seriesSecondary?: SeriesInterface;
}

0 comments on commit 0c7c7db

Please sign in to comment.