Skip to content
This repository has been archived by the owner on Dec 31, 2020. It is now read-only.

Commit

Permalink
First draft of theaters scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
alexnault committed May 20, 2015
1 parent 4b969c8 commit 2a1f5fc
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 28 deletions.
81 changes: 66 additions & 15 deletions lib/projection.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@ var request = require('request'),
cheerio = require('cheerio');

var Projection = function() {
this.GOOGLE_ENDPOINT = "http://www.google.com/movies";
this.GOOGLE_ENDPOINT = 'http://www.google.com/movies';
};

// Find showtimes of nearby theaters
Projection.prototype.findTheaters = function(near, callback) {
var err,
result = {};

var url = this.GOOGLE_ENDPOINT + "?near=" + near;
var url = this.GOOGLE_ENDPOINT + '?near=' + near;

request(url, function (error, response, body) {
if (error) {
Expand All @@ -26,27 +26,73 @@ Projection.prototype.findTheaters = function(near, callback) {
}

var $ = cheerio.load(body);

var theaters = $(".theater").text();

//result = $(".theater").find(".desc .name").text();

var theaters = [];

$(".theater").each(function(i, value) {
$('.theater').each(function(i, t) {
t = $(t);

var theater = {
name: $(this).find(".desc .name").text(),
address: $(this).find(".desc .info").text().trim('-')[0],
phone: $(this).find(".desc .info").text().trim(' - ')[1]
name: t.find('.desc .name').text(),
address: t.find('.desc .info').text().split(' - ')[0].trim(),
phone: t.find('.desc .info').text().split(' - ')[1].trim(),
note: t.find('.desc .closure').text(), //status
movies: []
};

// Google movie info format : Duration - Rating - Genre - Trailer - IMDB
t.find('.showtimes .movie').each(function(j, m) {
m = $(m);
var infos = m.find('.info').text().split(' - ');

var cursor = 0,
duration = null,
rating = null,
genre = null,
trailer = null;

if (infos[cursor].match(/(1|2|hr|min)/)){
duration = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(G|\+|13|16|18)/)){
rating = infos[cursor].trim();
cursor++;
}

if (infos[cursor].match(/(^\D*$)/)){
genre = infos[cursor].trim();
cursor++;
}

if (m.find('.info a').attr('href') && (m.find('.info a').attr('href').match(/(youtube|vimeo|daily)/))) {
trailer = m.find('.info a').attr('href').replace('/url?q=', '').trim();
}


var showtimes = m.find('.times').text().split(' ');
showtimes = showtimes.map(function(s) {
return s.trim(); // TODO new Date (w/ am/pm)
//return new Date(s);
});

//console.log(infos);
var movie = {
title: m.find('.name').text(),
duration: duration,
rating: rating,
genre: genre,
trailer: trailer,
showtimes: showtimes
};

theater.movies.push(movie);
});

theaters.push(theater);
//theaters.push(value.text());
});

result = theaters;

callback(err, result);
callback(err, theaters);
})

};
Expand All @@ -57,4 +103,9 @@ Projection.prototype.findMovie = function(callback) {
// TODO
};

// var p = new Projection();
// p.findTheaters('Montreal', function(err, theaters) {
// console.log(theaters[2].movies[0 ]);
// });

module.exports = Projection;
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"name": "projection",
"version": "1.0.0",
"description": "A nodejs package for movie showtimes around the globe",
"version": "0.1.0",
"description": "Fast acquisition of movie showtimes around the globe",
"main": "index.js",
"scripts": {
"test": "mocha ./test/**.js",
"test": "mocha ./test/*.js",
"test-travis": "istanbul cover ./node_modules/mocha/bin/_mocha -- -R spec ./test/*.js"
},
"repository": {
Expand Down
20 changes: 10 additions & 10 deletions test/tests.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'use strict';

var assert = require("assert")
var assert = require('assert')
var Projection = require('../lib/projection');

var p; // Projection
Expand All @@ -19,13 +19,13 @@ describe('Projection', function(){
describe('new Projection()', function(){
it('should have the corresponding endpoint', function(){
p = new Projection();
assert.equal(p.GOOGLE_ENDPOINT, "http://www.google.com/movies");
assert.equal(p.GOOGLE_ENDPOINT, 'http://www.google.com/movies');
})
})

describe('#findTheaters()', function(){
it('should find theaters by town', function(done){
p.findTheaters("Montreal", function(err, theaters) {
p.findTheaters('Montreal', function(err, theaters) {
//console.log(theaters);

assert.equal(err, null);
Expand All @@ -37,30 +37,30 @@ describe('Projection', function(){
})

it('should find theaters by zipcode', function(done){
p.findTheaters("Montreal", function(err, theaters) {
p.findTheaters('Montreal', function(err, theaters) {
assert.equal(err, null);
done();
});
})

it('should find theaters by lat/long', function(done){
p.findTheaters("Montreal", function(err, theaters) {
p.findTheaters('Montreal', function(err, theaters) {
assert.equal(err, null);
done();
});
})

it('should return requestjs error', function(done){
p.GOOGLE_ENDPOINT = "abc" // Override endpoint
p.findTheaters("Montreal", function(err, theaters) {
assert.equal(err, "Error: Invalid URI \"abc?near=Montreal\"");
p.GOOGLE_ENDPOINT = 'abc'; // Override endpoint
p.findTheaters('Montreal', function(err, theaters) {
assert.equal(err, 'Error: Invalid URI "abc?near=Montreal"');
done();
});
})

it('should return a 404 error', function(done){
p.GOOGLE_ENDPOINT = "http://httpstat.us/404" // Override endpoint
p.findTheaters("Montreal", function(err, theaters) {
p.GOOGLE_ENDPOINT = 'http://httpstat.us/404' // Override endpoint
p.findTheaters('Montreal', function(err, theaters) {
assert.equal(err, 404);
done();
});
Expand Down

0 comments on commit 2a1f5fc

Please sign in to comment.