Skip to content

Commit

Permalink
Movies import with ratings, lengths, genre and budgets. Export to csv
Browse files Browse the repository at this point in the history
git-svn-id: svn://had.co.nz/projects/projects/movies@116 688fc0e7-72e3-0310-a89e-c7106b9812cc
  • Loading branch information
hadley committed Feb 13, 2005
1 parent 5a84be0 commit 23b727f
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 10 deletions.
8 changes: 8 additions & 0 deletions analysis.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
library(lattice); trellis.par.set(col.whitebg())
setwd("~/documents/movies")

m <- read.csv("movies.csv")

xyplot(length ~ year, m)
xyplot(budget ~ year, m)
xyplot(log(budget) ~ year, m)
15 changes: 15 additions & 0 deletions export.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
require "csv"
require "import"

movies = Movie.find_all 'length > 0 AND budget > 0'

outfile = File.open('movies.csv', 'wb')
CSV::Writer.generate(outfile) do |csv|
csv << ['title', 'year', 'budget', 'length', 'rating', 'votes']

movies.each do |m|
csv << [m.title, m.year, m.budget, m.length, m.imdb_rating, m.imdb_votes]
end
end

outfile.close
66 changes: 59 additions & 7 deletions import.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,18 @@
)

class Movie < ActiveRecord::Base
has_many :genres
has_many :ratings
end

class Genre < ActiveRecord::Base
belongs_to :movie
end

class Ratings < ActiveRecord::Base
end


def import_movies
#$100,000 Pyramid, The (2001) (VG) 2001
title_re = /^([a-zA-z ]+)\s+\([0-9]+\)\s+([0-9]+)$/ix
Expand Down Expand Up @@ -54,7 +64,7 @@ def import_budgets
File.new("business.list").each(dashes) do |l|
if match = title_re.match(l)
if bt = budget_re.match(l)
title, year, budget = match[1], match[2], bt[1]
title, year, budget = match[1], match[2], bt[1].gsub!(",","").to_i

m = Movie.find_by_title_and_year title, year
if m
Expand All @@ -67,10 +77,52 @@ def import_budgets
end
end

import_movies
import_times
import_budgets
def import_genres
#D2: The Mighty Ducks (1994) Family
genre_re = /^([a-z ]*?)\s+\(([0-9]+)\)\s+(.*?)$/ix

File.new("genres.list").each_line do |l|
if match = genre_re.match(l)
title, year, genre = match[1], match[2], match[3]

m = Movie.find_by_title_and_year title, year
if m
#puts "#{title} $#{genre}"
m.genres.create({"genre" => genre})
end
end
end
end

def import_ratings
#.0.1112000 14 5.9 365 Nights in Hollywood (1934)
ratings_re = /([0-9.]+) \s+ ([0-9]+) \s+ ([0-9.]+) \s+ ([a-z ]+?) \s+ \(([0-9]+)\)/ix
f = File.new("ratings.list")
f.each_line do |l|
if match = ratings_re.match(l)
rating, votes, outof10, title, year = match[1], match[2], match[3].to_f, match[4], match[5]

#puts "#{title} #{outof10} #{votes}";

m = Movie.find_by_title_and_year title, year
if m
m.update_attributes({'imdb_votes' => votes, 'imdb_rating' => outof10})
end

if f.lineno % 1000 == 0
puts "#{title} #{outof10} #{votes}";
puts f.lineno
end
end
end
end

import_genres
#import_movies
#import_times
#import_budgets

puts Movie.count( "budget > 0")
puts Movie.count( "length > 0")
puts Movie.count( "budget > 0 and length > 0")
#puts Movie.count( "budget > 0")
#puts Movie.count( "length > 0")
#puts Movie.count( "budget > 0 and length > 0")
#puts Movie.count( "budget > 0 and length > 0 and imdb_votes > 0")
8 changes: 5 additions & 3 deletions movies.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ id INTEGER PRIMARY KEY,
title varchar(250),
year integer,
budget integer,
length integer
length integer,
imdb_rating integer,
imdb_votes integer
);

CREATE TABLE Ratings (id INTEGER PRIMARY KEY , movie_id medium_int, score varchar(10), outof10 float);
CREATE TABLE Genres (id INTEGER PRIMARY KEY , movie_id medium_int, genre varchar(50));
CREATE TABLE Ratings (id INTEGER PRIMARY KEY, movie_id integer, score varchar(10), outof10 float, votes integer);
CREATE TABLE Genres (id INTEGER PRIMARY KEY , movie_id integer, genre varchar(50));

CREATE INDEX title on Movies (title);
CREATE INDEX year on Movies (year);
Expand Down

0 comments on commit 23b727f

Please sign in to comment.