## Importing needed packages

In [1]:
from movielens_analysis import Ratings, Tags, Movies
import matplotlib.pyplot as plt

## Getting info about ratings

In [2]:
ratings = Ratings('ratings.csv');
rmovies = ratings.Movies('movies.csv', ratings);

# Top 20 films with the highest number of scores

In [3]:
%matplotlib notebook
top_by_num_of_ratings = rmovies.top_by_num_of_ratings(20);
i = 1;
for key, value in top_by_num_of_ratings.items():
    print(f"{i}:{key} - {value} scores");
    i += 1;
fig, ax = plt.subplots();
ax.bar([int(i) for i in range(20)], list(top_by_num_of_ratings.values()), color = '#297083');
ax.set_facecolor('seashell');
ax.set_xticks([i for i in range(20)]);
ax.set_xticklabels([i for i in range(1, 21)]);
fig.set_facecolor('floralwhite');
fig.set_figwidth(9);
fig.set_label('Films with the highest number of scores');
plt.show()


1:Forrest Gump (1994) - 32596 scores
2:"Shawshank Redemption, The (1994)" - 32432 scores
3:Pulp Fiction (1994) - 31796 scores
4:"Silence of the Lambs, The (1991)" - 29540 scores
5:"Matrix, The (1999)" - 29031 scores
6:Star Wars: Episode IV - A New Hope (1977) - 27266 scores
7:Jurassic Park (1993) - 25604 scores
8:Schindler's List (1993) - 24065 scores
9:Braveheart (1995) - 23558 scores
10:Fight Club (1999) - 23492 scores
11:Toy Story (1995) - 22940 scores
12:Terminator 2: Judgment Day (1991) - 22818 scores
13:Star Wars: Episode V - The Empire Strikes Back (1980) - 22764 scores
14:"Lord of the Rings: The Fellowship of the Ring, The (2001)" - 22243 scores
15:"Usual Suspects, The (1995)" - 22102 scores
16:Star Wars: Episode VI - Return of the Jedi (1983) - 21815 scores
17:Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981) - 21716 scores
18:American Beauty (1999) - 21390 scores
19:"Godfather, The (1972)" - 20761 scores
20:"Lord of the Rings: The Two Towers, The 

<IPython.core.display.Javascript object>

## Distribution of user scores

In [4]:
dist_ratings = rmovies.dist_by_rating()
i = 1;
for key, value in dist_ratings.items():
    print(f"{key} - {value} users rated film with this mark");
    i += 1;
fig, ax = plt.subplots();
ax.bar([int(i) for i in range(len(dist_ratings.keys()))], list(dist_ratings.values()), color = '#297083');
ax.set_facecolor('seashell');
ax.set_xticks([i for i in range(len(dist_ratings.keys()))]);
ax.set_xticklabels([i for i in dist_ratings.keys()]);
fig.set_facecolor('floralwhite');
fig.set_figwidth(9);
fig.set_label('Distribution by rating scores');
plt.show()


0.5 - 160856 users rated film with this mark
1.0 - 315269 users rated film with this mark
1.5 - 162483 users rated film with this mark
2.0 - 655406 users rated film with this mark
2.5 - 509285 users rated film with this mark
3.0 - 1954436 users rated film with this mark
3.5 - 1277999 users rated film with this mark
4.0 - 2647335 users rated film with this mark
4.5 - 877586 users rated film with this mark
5.0 - 1439344 users rated film with this mark


<IPython.core.display.Javascript object>

## Getting info about tags

In [6]:
tags = Tags("tags.csv");

# Top 20 popular tags

In [7]:
most_popular = tags.most_popular(20);
i = 1;
for key, value in most_popular.items():
    print(f"{i}:{key} - this tag has been found {value} times");
    i += 1;
fig, ax = plt.subplots();
ax.bar([int(i) for i in range(20)], list(most_popular.values()), color = '#297083');
ax.set_facecolor('seashell');
ax.set_xticks([i for i in range(20)]);
ax.set_xticklabels([i for i in range(1, 21)]);
fig.set_facecolor('floralwhite');
fig.set_figwidth(9);
fig.set_label('Top 20 tags');
plt.show()

1:sci-fi - this tag has been found 8330 times
2:atmospheric - this tag has been found 6516 times
3:action - this tag has been found 5907 times
4:comedy - this tag has been found 5702 times
5:surreal - this tag has been found 5326 times
6:based on a book - this tag has been found 5079 times
7:twist ending - this tag has been found 4820 times
8:funny - this tag has been found 4738 times
9:visually appealing - this tag has been found 4526 times
10:dystopia - this tag has been found 4257 times
11:dark comedy - this tag has been found 4026 times
12:BD-R - this tag has been found 3966 times
13:thought-provoking - this tag has been found 3844 times
14:romance - this tag has been found 3791 times
15:stylized - this tag has been found 3728 times
16:quirky - this tag has been found 3677 times
17:psychology - this tag has been found 3625 times
18:woman director - this tag has been found 3589 times
19:fantasy - this tag has been found 3523 times
20:classic - this tag has been found 3456 times


<IPython.core.display.Javascript object>

## Getting info about movies

In [8]:
movies = Movies("movies.csv");

# Distribution of occuring genres

In [9]:
genres = movies.dist_by_genres();
i = 1;
for key, value in genres.items():
    print(f"{i}:{key} - this genre associated with {value} films");
    i += 1;
fig, ax = plt.subplots();
ax.bar([int(i) for i in range(20)], list(most_popular.values())[:20], color = '#297083');
ax.set_facecolor('seashell');
ax.set_xticks([i for i in range(20)]);
ax.set_xticklabels([i for i in range(1, 21)]);
fig.set_facecolor('floralwhite');
fig.set_figwidth(9);
fig.set_label('Top 20 genres');
plt.show()

1:Drama - this genre associated with 25606 films
2:Comedy - this genre associated with 16870 films
3:Thriller - this genre associated with 8654 films
4:Romance - this genre associated with 7719 films
5:Action - this genre associated with 7348 films
6:Horror - this genre associated with 5989 films
7:Documentary - this genre associated with 5605 films
8:Crime - this genre associated with 5319 films
9:(no genres listed) - this genre associated with 5062 films
10:Adventure - this genre associated with 4145 films
11:Sci-Fi - this genre associated with 3595 films
12:Children - this genre associated with 2935 films
13:Animation - this genre associated with 2929 films
14:Mystery - this genre associated with 2925 films
15:Fantasy - this genre associated with 2731 films
16:War - this genre associated with 1874 films
17:Western - this genre associated with 1399 films
18:Musical - this genre associated with 1054 films
19:Film-Noir - this genre associated with 353 films
20:IMAX - this genre associa

<IPython.core.display.Javascript object>

### All data was taken from https://grouplens.org/datasets/movielens/