Skip to content

Commit

Permalink
Merge pull request #14 from carmolim/scan-log
Browse files Browse the repository at this point in the history
[feat] loging url scans
  • Loading branch information
carmolim committed Sep 9, 2023
2 parents 1af2f03 + c786a73 commit 6aaa31c
Show file tree
Hide file tree
Showing 6 changed files with 256 additions and 166 deletions.
102 changes: 50 additions & 52 deletions components/Ad.js
Original file line number Diff line number Diff line change
@@ -1,108 +1,106 @@
'use strict';

const path = require( 'path' )
const config = require( '../config' )
const notifier = require( './Notifier' )
const log = require('simple-node-logger').createSimpleLogger( path.join( __dirname, '../', config.logFile ) )
const adRepository = require('../repositories/adRepositorie.js')

class Ad{

constructor( ad ){
this.id = ad.id
this.url = ad.url
this.title = ad.title
this.searchTerm = ad.searchTerm
this.searchId = ad.searchId
this.price = ad.price
this.valid = false
this.saved = null,
this.notify = ad.notify
const path = require('path')
const config = require('../config')
const notifier = require('./Notifier')
const log = require('simple-node-logger').createSimpleLogger(path.join(__dirname, '../', config.logFile))
const adRepository = require('../repositories/adRepository.js')

class Ad {

constructor(ad) {
this.id = ad.id
this.url = ad.url
this.title = ad.title
this.searchTerm = ad.searchTerm
this.price = ad.price
this.valid = false
this.saved = null,
this.notify = ad.notify
}

process = async () => {

if ( !this.isValidAd() ) {
log.debug( 'Ad not valid' );
if (!this.isValidAd()) {
log.debug('Ad not valid');
return false
}

try{
try {

// check if this entry was already added to DB
if( await this.alreadySaved() ){
if (await this.alreadySaved()) {
return this.checkPriceChange()
}
else{

else {
// create a new entry in the database
return this.addToDataBase()
}
} catch( error ){
log.error( error );

} catch (error) {
log.error(error);
}
}

alreadySaved = async () => {
try {
this.saved = await adRepository.getAd(this.id)
return true
} catch ( error ) {
} catch (error) {
return false
}
}

addToDataBase = async () => {

try {
await adRepository.createAd(this)
log.info('Ad ' + this.id + ' added to the database')

// because in the first run all the ads are new
if( this.notify ){

// because in the first run all the ads are new
if (this.notify) {
try {
const msg = 'New ad found!\n' + this.title + ' - R$' + this.price + '\n\n' + this.url
log.info('Ad ' + this.id + ' added to the database')
await notifier.sendNotification( msg )
await notifier.sendNotification(msg)
} catch (error) {
log.error('Could not send a notification')
}
}
}
catch ( error ) {
log.error( error )

catch (error) {
log.error(error)
}
}

updatePrice = async () => {
log.info( 'updatePrice' )
log.info('updatePrice')

try {
await adRepository.updateAd(this)
} catch ( error ) {
log.error( error )
} catch (error) {
log.error(error)
}
}

checkPriceChange = async () => {

if( this.price !== this.saved.price ){
if (this.price !== this.saved.price) {

await this.updatePrice(this)

// just send a notification if the price dropped
if( this.price < this.saved.price ){
if (this.price < this.saved.price) {

log.info('This ad had a price reduction: ' + this.url )
log.info('This ad had a price reduction: ' + this.url)

const decreasePercentage = Math.abs( Math.round( ( ( this.price - this.saved.price ) / this.saved.price ) * 100 ) )
const decreasePercentage = Math.abs(Math.round(((this.price - this.saved.price) / this.saved.price) * 100))

const msg = 'Price drop found! '+ decreasePercentage +'% OFF!\n' +
'From R$' + this.saved.price + ' to R$' + this.price + '\n\n' + this.url
const msg = 'Price drop found! ' + decreasePercentage + '% OFF!\n' +
'From R$' + this.saved.price + ' to R$' + this.price + '\n\n' + this.url

await notifier.sendNotification( msg )
await notifier.sendNotification(msg)
}
}
}
Expand All @@ -112,15 +110,15 @@ class Ad{
// let's clean those empty ads
isValidAd = () => {

if ( !isNaN(this.price) && this.url && this.id ){
if (!isNaN(this.price) && this.url && this.id) {
this.valid = true
return true
}
else{
else {
this.valid = false
return false
}
}
}

module.exports = Ad
module.exports = Ad
98 changes: 51 additions & 47 deletions components/Scraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ const config = require('../config')
const path = require('path')
const axios = require('axios')
const cheerio = require('cheerio')
const log = require('simple-node-logger').createSimpleLogger( path.join( __dirname, '../', config.logFile ) );
const log = require('simple-node-logger').createSimpleLogger(path.join(__dirname, '../', config.logFile));

const scraperRepository = require('../repositories/scrapperRepository.js')

const adRepository = require('../repositories/adRepositorie.js')
const Ad = require('./Ad.js')

let page = 1
Expand All @@ -27,27 +28,46 @@ const scraper = async (url) => {

const parsedUrl = new URL(url)
const searchTerm = parsedUrl.searchParams.get('q') || ''
const searchId = hashCode(url);
const notify = await termAlreadySearched(searchId)
const notify = await urlAlreadySearched(url)

do {
url = setUrlParam(url, 'o', page)

currentUrl = setUrlParam(url, 'o', page)
try {
const response = await axios( url )
const response = await axios(currentUrl)
const html = response.data;
const $ = cheerio.load(html)
nextPage = await scrapePage($, searchTerm, searchId, notify)
nextPage = await scrapePage($, searchTerm, notify, url)
} catch (error) {
log.error( 'Could not fetch the url ' + url)
log.error('Could not fetch the url ' + currentUrl)
}

page++

} while (nextPage);


log.info('Valid ads: ' + validAds)

if (validAds) {
const averagePrice = sumPrices / validAds;

log.info('Maximum price: ' + maxPrice)
log.info('Minimum price: ' + minPrice)
log.info('Average price: ' + sumPrices / validAds)

const scrapperLog = {
url,
adsFound: validAds,
averagePrice,
minPrice,
maxPrice,
}

await scraperRepository.saveLog(scrapperLog)
}
}

const scrapePage = async ($, searchTerm, searchId, notify) => {
const scrapePage = async ($, searchTerm, notify) => {
try {
const script = $('script[id="__NEXT_DATA__"]').text()
const adList = JSON.parse(script).props.pageProps.ads
Expand All @@ -58,64 +78,56 @@ const scrapePage = async ($, searchTerm, searchId, notify) => {

adsFound += adList.length

log.info( `Checking new ads for: ${searchTerm}` )
log.info( 'Ads found: ' + adsFound )
log.info(`Checking new ads for: ${searchTerm}`)
log.info('Ads found: ' + adsFound)

for( let i = 0; i < adList.length; i++ ){
for (let i = 0; i < adList.length; i++) {

log.debug( 'Checking ad: ' + (i+1))
const advert = adList[i]
const title = advert.subject
const id = advert.listId
const url = advert.url
const price = parseInt( advert.price?.replace('R$ ', '')?.replace('.', '') || '0' )
log.debug('Checking ad: ' + (i + 1))

const advert = adList[i]
const title = advert.subject
const id = advert.listId
const url = advert.url
const price = parseInt(advert.price?.replace('R$ ', '')?.replace('.', '') || '0')

const result = {
id,
url,
title,
searchTerm,
searchId,
price,
notify
}
const ad = new Ad( result )

const ad = new Ad(result)
ad.process()

if(ad.valid){
if (ad.valid) {
validAds++
minPrice = checkMinPrice(ad.price, minPrice)
maxPrice = checkMaxPrice(ad.price, maxPrice)
sumPrices += ad.price
}
}

log.info( 'Valid ads: ' + validAds )

if (validAds) {
log.info( 'Maximum price: ' + maxPrice)
log.info( 'Minimum price: ' + minPrice)
log.info( 'Average price: ' + sumPrices / validAds)
}
return true
} catch( error ) {
log.error( error );
} catch (error) {
log.error(error);
throw new Error('Scraping failed');
}

}

const termAlreadySearched = async (id) => {
const urlAlreadySearched = async (url) => {
try {
const ad = await adRepository.getAdsBySearchId(id, 1)
const ad = await scraperRepository.getLogsByUrl(url, 1)
if (ad.length) {
return true
}
return false
} catch (error) {
log.error( error )
log.error(error)
return false
}
}
Expand All @@ -129,23 +141,15 @@ const setUrlParam = (url, param, value) => {
}

const checkMinPrice = (price, minPrice) => {
if(price < minPrice) return price
if (price < minPrice) return price
else return minPrice
}

const checkMaxPrice = (price, maxPrice) => {
if(price > maxPrice) return price
if (price > maxPrice) return price
else return maxPrice
}

const hashCode = function(s) {
var h = 0, l = s.length, i = 0;
if ( l > 0 )
while (i < l)
h = (h << 5) - h + s.charCodeAt(i++) | 0;
return h;
};

module.exports = {
scraper
}
}

0 comments on commit 6aaa31c

Please sign in to comment.