# MovieLens Analytics

In [1]:
from movielens_analysis import Ratings, Tags

### Class Ratings

In [2]:
ratings = Ratings('../datasets/ml-latest-small/ratings.csv')

#### number of reviews by year

In [3]:
%%timeit -r1 -n1
print(ratings.Movies(ratings).dist_by_year())

{1996: 358, 1999: 82, 2000: 296, 2001: 70, 2005: 121, 2006: 4, 2007: 1, 2011: 39, 2015: 29}
1.78 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### number of reviews by rating

In [4]:
%%timeit -r1 -n1
print(ratings.Movies(ratings).dist_by_rating())

{0.5: 24, 1.0: 39, 1.5: 11, 2.0: 57, 2.5: 7, 3.0: 253, 3.5: 17, 4.0: 292, 4.5: 33, 5.0: 267}
1.65 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### top-n movies by the number of ratings

In [5]:
%%timeit -r1 -n1
print(ratings.Movies(ratings).top_by_num_of_ratings(10))

{'"Usual Suspects': 4, 'Pulp Fiction (1994)': 4, '"Fugitive': 4, "Schindler's List (1993)": 4, 'Batman (1989)': 4, '"Silence of the Lambs': 4, 'Fargo (1996)': 4, 'Aladdin (1992)': 4, 'Beauty and the Beast (1991)': 4, 'Toy Story (1995)': 3}
1.1 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### top-n movies by the average or median of the ratings

In [6]:
%%timeit -r1 -n1
print(ratings.Movies(ratings).top_by_ratings(10))

{'Bottle Rocket (1996)': 5.0, 'Canadian Bacon (1995)': 5.0, 'Star Wars: Episode IV - A New Hope (1977)': 5.0, 'James and the Giant Peach (1996)': 5.0, '"Wizard of Oz': 5.0, 'Citizen Kane (1941)': 5.0, '"Adventures of Robin Hood': 5.0, 'Mr. Smith Goes to Washington (1939)': 5.0, 'Winnie the Pooh and the Blustery Day (1968)': 5.0, '"Three Caballeros': 5.0}
2.72 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### top-n movies by the variance of the ratings

In [7]:
%%timeit -r1 -n1
print(ratings.Movies(ratings).top_controversial(10))

{'Bambi (1942)': 10.12, '"Rescuers': 10.12, 'My Fair Lady (1964)': 10.12, '"Matrix': 8.0, 'Good Will Hunting (1997)': 6.12, 'Courage Under Fire (1996)': 6.12, "Schindler's List (1993)": 4.56, 'Dazed and Confused (1993)': 4.5, 'Fight Club (1999)': 4.5, '"Talented Mr. Ripley': 4.5}
2.22 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### distribution of users by the number of ratings

In [8]:
%%timeit -r1 -n1
print(ratings.Users(ratings).distribution_by_count())

{314: 1, 232: 1, 216: 1, 126: 1, 44: 1, 39: 1, 29: 1}
692 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


####  top-n users with the biggest variance of their ratings

In [9]:
%%timeit -r1 -n1
print(ratings.Users(ratings).top_users_by_variance(5))

{3: 4.37, 4: 1.73, 7: 1.67, 5: 0.98, 6: 0.72}
1.64 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### distribution of users by average or median ratings

In [10]:
%%timeit -r1 -n1
print(ratings.Users(ratings).distribution_by_rating())

{4.37: 1, 3.95: 1, 3.64: 1, 3.56: 1, 3.49: 1, 3.35: 1, 2.44: 1}
1.18 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Class Tags

In [11]:
tags = Tags('../datasets/ml-latest-small/tags.csv')

#### top-n tags with most words inside

In [12]:
%%timeit -r1 -n1
print(tags.most_words(5))

{'Something for everyone in this one... saw it without and plan on seeing it with kids!': 16, 'the catholic church is the most corrupt organization in history': 10, 'Oscar (Best Music - Original Score)': 6, 'Everything you want is here': 5, 'based on a true story': 5}
1.59 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### top-n longest tags

In [13]:
%%timeit -r1 -n1
print(tags.longest(5))

['Something for everyone in this one... saw it without and plan on seeing it with kids!', 'the catholic church is the most corrupt organization in history', 'audience intelligence underestimated', 'Oscar (Best Music - Original Score)', 'assassin-in-training (scene)']
1.3 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### intersection between top-n tags

In [14]:
%%timeit -r1 -n1
print(tags.most_words_and_longest(5))

['Oscar (Best Music - Original Score)', 'Something for everyone in this one... saw it without and plan on seeing it with kids!', 'the catholic church is the most corrupt organization in history']
1.59 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### most popular tags

In [15]:
%%timeit -r1 -n1
print(tags.most_popular(10))

{'funny': 15, 'sci-fi': 14, 'twist ending': 12, 'dark comedy': 12, 'atmospheric': 10, 'superhero': 10, 'comedy': 10, 'action': 10, 'suspense': 10, 'Leonardo DiCaprio': 9}
486 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


#### tags that include the word

In [16]:
%%timeit -r1 -n1
print(tags.tags_with('fict'))

['science fiction']
947 μs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
