Loads some of the TSV files of https://datasets.imdbws.com/ into a MySQL server.
Edit load-data.sh
to set your MySQL info and then run it:
./load-data.sh
This will require around 500MB in your /tmp/
directory. After the data load is done, you can experiment with ad hoc queries. Examples:
select
r.averageRating, r.numVotes, b.startYear, b.primaryTitle
from
title_ratings r, title_basics b
where
r.tconst = b.tconst and
b.titleType = 'movie' and
b.startYear > 1998 and
r.numVotes > 100000 and
r.averageRating > 6
order by r.averageRating desc
limit 50;
select
b.startYear, avg(averageRating), count(*)
from
title_ratings r, title_basics b
where
r.tconst = b.tconst and
b.titleType = 'movie' and
r.numVotes > 1000
group by b.startYear
order by b.startYear;
select
count(*)
from
title_ratings r, title_basics b
where
r.tconst = b.tconst and
b.titleType = 'movie' and
r.numVotes > 10;
select
b.startYear, avg(b.runtimeMinutes), count(*)
from
title_ratings r, title_basics b
where
r.tconst = b.tconst and
b.titleType = 'movie' and
r.numVotes > 1000 and
r.averageRating > 7 and
b.startYear > year(CURDATE()) - 30 and
b.runtimeMinutes is not null
group by b.startYear
order by b.startYear;
select
round(r.averageRating), avg(b.runtimeMinutes), count(*)
from
title_ratings r, title_basics b
where
r.tconst = b.tconst and
b.titleType = 'movie' and
r.numVotes > 100 and
b.startYear > year(CURDATE()) - 15 and
b.runtimeMinutes is not null
group by round(r.averageRating)
order by round(r.averageRating);