Skip to content
This repository has been archived by the owner on Dec 31, 2020. It is now read-only.

Commit

Permalink
First draft of movie scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
alexnault committed May 21, 2015
1 parent 2a1f5fc commit 987659e
Showing 1 changed file with 111 additions and 38 deletions.
149 changes: 111 additions & 38 deletions lib/projection.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@ var Projection = function() {

// Find showtimes of nearby theaters
Projection.prototype.findTheaters = function(near, callback) {
var err,
result = {};

var url = this.GOOGLE_ENDPOINT + '?near=' + near;

request(url, function (error, response, body) {
Expand All @@ -26,6 +23,8 @@ Projection.prototype.findTheaters = function(near, callback) {
}

var $ = cheerio.load(body);

// List of theaters returned
var theaters = [];

$('.theater').each(function(i, t) {
Expand All @@ -42,46 +41,21 @@ Projection.prototype.findTheaters = function(near, callback) {
// Google movie info format : Duration - Rating - Genre - Trailer - IMDB
t.find('.showtimes .movie').each(function(j, m) {
m = $(m);
var infos = m.find('.info').text().split(' - ');

var cursor = 0,
duration = null,
rating = null,
genre = null,
trailer = null;

if (infos[cursor].match(/(1|2|hr|min)/)){
duration = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(G|\+|13|16|18)/)){
rating = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(^\D*$)/)){
genre = infos[cursor].trim();
cursor++;
}
var infos = formatInfos(m.find('.info').text().split(' - '));
var showtimes = formatShowtimes(m.find('.times').text().split(' '));

var trailer = null;
if (m.find('.info a').attr('href') && (m.find('.info a').attr('href').match(/(youtube|vimeo|daily)/))) {
trailer = m.find('.info a').attr('href').replace('/url?q=', '').trim();
}


var showtimes = m.find('.times').text().split(' ');
showtimes = showtimes.map(function(s) {
return s.trim(); // TODO new Date (w/ am/pm)
//return new Date(s);
});

//console.log(infos);
var movie = {
title: m.find('.name').text(),
duration: duration,
rating: rating,
genre: genre,
duration: infos.duration,
rating: infos.rating,
genre: infos.genre,
trailer: trailer,
showtimes: showtimes
};
Expand All @@ -92,20 +66,119 @@ Projection.prototype.findTheaters = function(near, callback) {
theaters.push(theater);
});

callback(err, theaters);
callback(null, theaters);
})

};

// q=movie+name
// Find a movie showtimes in nearby theaters
Projection.prototype.findMovie = function(callback) {
// TODO
Projection.prototype.findMovie = function(near, movieName, callback) {
var url = this.GOOGLE_ENDPOINT + '?near=' + near + '&q=' + "Furious+7";

request(url, function (error, response, body) {
if (error) {
callback(error);
return;
}

if (response.statusCode !== 200) {
callback(response.statusCode);
return;
}

var $ = cheerio.load(body);

var m = $('.movie');

var content = m.find('.desc .info').not('.info.links').html().split('<br>');

var persons = $(content[1]).text();
console.log(persons);
var categories = persons.split(' - ');
var director = categories[0].replace('Director: ', '');
var cast = categories[1].replace('Cast: ', '').split(', ');

var infos = formatInfos(content[0].split(' - '));

var trailer = null;
if (m.find('.info a').attr('href') && (m.find('.info a').attr('href').match(/(youtube|vimeo|daily)/))) {
trailer = m.find('.info a').attr('href').replace('/url?q=', '').trim();
}

var desc = m.find('span[itemprop="description"]').text() + m.find('#SynopsisSecond0').clone().children().remove().end().text().trim();

var movie = {
title: m.find('.desc h2').text(),
desc: desc,
director: director,
cast: cast,
duration: infos.duration,
rating: infos.rating,
genre: infos.genre,
trailer: trailer,
theaters: []
};

$('.showtimes .theater').each(function(i, t) {
t = $(t);

var theater = {
name: t.find('.name').text(),
address: t.find('.address').text(),
showtimes: formatShowtimes(t.find('.times').text().split(' '))
};

movie.theaters.push(theater);
});

callback(null, movie);
})

};

var formatInfos = function(infos) {
var cursor = 0,
duration = null,
rating = null,
genre = null;

if (infos[cursor].match(/(1|2|hr|min)/)){
duration = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(G|\+|13|16|18)/)){
rating = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(^\D*$)/)){
genre = infos[cursor].trim();
cursor++;
}

return {
duration: duration,
rating: rating,
genre: genre
};
};

var formatShowtimes = function(showtimes) {
// TODO what to do with dubbed?
return showtimes.map(function(s) {
return s.trim(); // TODO new Date (w/ am/pm)
//return new Date(s);
});
};

// var p = new Projection();
var p = new Projection();
// p.findTheaters('Montreal', function(err, theaters) {
// console.log(theaters[2].movies[0 ]);
// });
p.findMovie('Montreal', 'Furious 7', function(err, movie) {
console.log(movie);
});

module.exports = Projection;

0 comments on commit 987659e

Please sign in to comment.