In [7]:
import os
import logging

import psycopg2
import psycopg2.extensions
from pymongo import MongoClient
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, Float, MetaData, String
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base


logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

In [12]:
# Задание по Psycopg2
# --------------------------------------------------------------

logger.info("Создаём подключёние к Postgres")
params = {
    "host": "localhost", #os.environ['APP_POSTGRES_HOST'],
    "port": "5433", #os.environ['APP_POSTGRES_PORT'],
    "user": 'postgres'
}
conn = psycopg2.connect(**params)

# дополнительные настройки
psycopg2.extensions.register_type(
    psycopg2.extensions.UNICODE,
    conn
)
conn.set_isolation_level(
    psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT
)
cursor = conn.cursor()

# ВАШ КОД ЗДЕСЬ
# -------------
# таблица movies_top
# movieId (id фильма), ratings_num(число рейтингов), ratings_avg (средний рейтинг фильма)

sql_str = "select * into movies_top from (select movieId, count(rating) ratings_num, avg(rating) ratings_avg from ratings group by movieId) t"

# -------------

cursor.execute(sql_str)
conn.commit()

# Проверка - выгружаем данные
cursor.execute("SELECT * FROM movies_top LIMIT 10")
logger.info(
    "Выгружаем данные из таблицы movies_top: (movieId, ratings_num, ratings_avg)\n{}".format(
        [i for i in cursor.fetchall()])
)



2018-12-25 23:57:17,792 : INFO : Создаём подключёние к Postgres
2018-12-25 23:57:18,864 : INFO : Выгружаем данные из таблицы movies_top: (movieId, ratings_num, ratings_avg)
[(790, 6, 3.16666666666667), (146688, 2, 3.25), (69906, 1, 5.0), (139056, 1, 3.0), (165533, 1, 3.0), (4790, 5, 2.7), (26745, 5, 3.3), (3936, 18, 3.44444444444444), (88837, 3, 3.16666666666667), (31297, 1, 4.5)]


In [16]:
os.environ['APP_POSTGRES_HOST'] = 'localhost'
os.environ['APP_POSTGRES_PORT'] = '5433'

In [20]:

# Задание по SQLAlchemy
# --------------------------------------------------------------
Base = declarative_base()


class MoviesTop(Base):
    __tablename__ = 'movies_top'

    movieid = Column(Integer, primary_key=True)
    ratings_num = Column(Float)
    ratings_avg = Column(Float)

    def __repr__(self):
        return "<User(movieid='%s', ratings_num='%s', ratings_avg='%s')>" % (self.movieid, self.ratings_num, self.ratings_avg)


# Создаём сессию

engine = create_engine('postgresql://postgres:@{}:{}'.format(os.environ['APP_POSTGRES_HOST'], os.environ['APP_POSTGRES_PORT']))
Session = sessionmaker(bind=engine)
session = Session()


# --------------------------------------------------------------
# Ваш код здесь
# выберите контент у которого больше 15 оценок (используйте filter)
# и средний рейтинг больше 3.5 (filter ещё раз)
# отсортированный по среднему рейтингу (используйте order_by())
# id такого контента нужно сохранить в массив top_rated_content_ids

# MoviesTop = "select movieId filter (where ratings_num > 15 and ratings_avg > ) from movies_top"

top_rated_query = session.query(MoviesTop).filter(MoviesTop.ratings_num > 15).filter(MoviesTop.ratings_avg > 3.5).order_by(MoviesTop.ratings_avg.desc())


logger.info("Выборка из top_rated_query\n{}".format([i for i in top_rated_query.limit(4)]))

top_rated_content_ids = [
    i[0] for i in top_rated_query.values(MoviesTop.movieid)
][:5]
# --------------------------------------------------------------



2018-12-27 22:41:52,564 : INFO : Выборка из top_rated_query
[<User(movieid='159817', ratings_num='23.0', ratings_avg='4.47826086956522')>, <User(movieid='2937', ratings_num='32.0', ratings_avg='4.46875')>, <User(movieid='38304', ratings_num='19.0', ratings_avg='4.44736842105263')>, <User(movieid='2330', ratings_num='31.0', ratings_avg='4.43548387096774')>]


In [39]:
os.environ['APP_MONGO_HOST'] = 'localhost'
os.environ['APP_MONGO_PORT'] = '27018'

In [48]:
# Задание по PyMongo
mongo = MongoClient(**{
    'host': os.environ['APP_MONGO_HOST'],
    'port': int(os.environ['APP_MONGO_PORT'])
})

# Получите доступ к коллекции tags
db = mongo.get_database(name="movie")

collection = db['tags']

agg_filename = 'keywords.csv'
tag_data = []
if db.tags.count() == 0:
    with open(agg_filename, 'r') as f:
        for line in f.readlines():
            try:
                movieId, tags = line.strip().split('\t')
                tags = eval(tags)
                for tag in tags:
                    print(tags)
                    tag.update({'movieId': movieId})
                    tag_data.append(
                        tag
                    )
            except:
                    print(line)
    collection.insert_many(tag_data)

# id контента используйте для фильтрации - передайте его в модификатор $in внутри find
# в выборку должны попать теги фильмов из массива top_rated_content_ids
mongo_query = tags_collection.count()#find(
        #{'id': {'$in': top_rated_content_ids}}
#)

# mongo_docs = [
  #  i for i in mongo_query
#]

#print("Достали документы из Mongo: {}".format(mongo_docs[:5]))
print(mongo_query)
#id_tags = [(i['id'], i['name']) for i in mongo_docs]



  


id,keywords

862,"[{'id': 931, 'name': 'jealousy'}, {'id': 4290, 'name': 'toy'}, {'id': 5202, 'name': 'boy'}, {'id': 6054, 'name': 'friendship'}, {'id': 9713, 'name': 'friends'}, {'id': 9823, 'name': 'rivalry'}, {'id': 165503, 'name': 'boy next door'}, {'id': 170722, 'name': 'new toy'}, {'id': 187065, 'name': 'toy comes to life'}]"

8844,"[{'id': 10090, 'name': 'board game'}, {'id': 10941, 'name': 'disappearance'}, {'id': 15101, 'name': ""based on children's book""}, {'id': 33467, 'name': 'new home'}, {'id': 158086, 'name': 'recluse'}, {'id': 158091, 'name': 'giant insect'}]"

15602,"[{'id': 1495, 'name': 'fishing'}, {'id': 12392, 'name': 'best friend'}, {'id': 179431, 'name': 'duringcreditsstinger'}, {'id': 208510, 'name': 'old men'}]"

31357,"[{'id': 818, 'name': 'based on novel'}, {'id': 10131, 'name': 'interracial relationship'}, {'id': 14768, 'name': 'single mother'}, {'id': 15160, 'name': 'divorce'}, {'id': 33455, 'name': 'chick flick'}]"

11862,"[{'id': 1009, 'name': 'baby'}, {'

21253,"[{'id': 441, 'name': 'assassination'}, {'id': 983, 'name': 'brazilian'}, {'id': 1704, 'name': 'ambassador'}, {'id': 6089, 'name': 'freedom'}, {'id': 8029, 'name': 'drama'}, {'id': 11121, 'name': 'plot'}, {'id': 18263, 'name': 'revolutionary'}]"

6116,"[{'id': 637, 'name': 'pop'}, {'id': 1740, 'name': 'pop culture'}, {'id': 3490, 'name': 'pop star'}]"

9457,"[{'id': 4953, 'name': 'ocean liner'}, {'id': 33696, 'name': 'sea monster'}, {'id': 155508, 'name': 'jewel heist'}, {'id': 196917, 'name': 'armed robbery'}]"

281289,[]

43911,[]

39424,"[{'id': 10183, 'name': 'independent film'}]"

44361,"[{'id': 157499, 'name': 'mother son relationship'}]"

11702,"[{'id': 279, 'name': 'usa'}, {'id': 478, 'name': 'china'}, {'id': 1376, 'name': 'assault rifle'}, {'id': 1589, 'name': 'sniper'}, {'id': 1788, 'name': 'chinatown'}, {'id': 2231, 'name': 'drug dealer'}, {'id': 2708, 'name': 'hitman'}, {'id': 3739, 'name': 'funeral'}, {'id': 8997, 'name': 'conscientious objector'}, {'id': 9748, 'name


26578,"[{'id': 470, 'name': 'spy'}, {'id': 5565, 'name': 'biography'}]"

415072,"[{'id': 178867, 'name': 'lone ranger'}, {'id': 232793, 'name': 'feature version of serial film'}]"

43103,"[{'id': 818, 'name': 'based on novel'}, {'id': 1415, 'name': 'small town'}, {'id': 1936, 'name': 'blackmail'}, {'id': 6093, 'name': 'assault'}, {'id': 11254, 'name': 'loss of virginity'}, {'id': 33720, 'name': 'guilt'}, {'id': 163874, 'name': 'unwed pregnancy'}, {'id': 183126, 'name': 'new job'}, {'id': 188991, 'name': 'drunkenness'}]"

43194,[]

9563,"[{'id': 579, 'name': 'american football'}, {'id': 1482, 'name': 'trainer'}, {'id': 4613, 'name': 'training'}, {'id': 5621, 'name': 'american football coach'}, {'id': 6075, 'name': 'sport'}, {'id': 155802, 'name': 'american football stadium'}]"

1850,"[{'id': 837, 'name': 'show business'}, {'id': 3485, 'name': 'comedian'}, {'id': 5970, 'name': 'wrestling'}]"

926,"[{'id': 2902, 'name': 'space battle'}, {'id': 9831, 'name': 'spaceship'}, {'id': 11931, 'n

6106,"[{'id': 393, 'name': 'civil war'}, {'id': 407, 'name': 'dictator'}, {'id': 736, 'name': 'journalist'}, {'id': 1008, 'name': 'guerrilla'}, {'id': 1014, 'name': 'loss of lover'}, {'id': 2020, 'name': 'revolution'}, {'id': 5299, 'name': 'war correspondent'}, {'id': 5861, 'name': 'civil rights movement '}, {'id': 6760, 'name': 'picture journalist'}, {'id': 7605, 'name': 'el salvador'}, {'id': 7606, 'name': 'dictatorship'}]"

22477,"[{'id': 10183, 'name': 'independent film'}]"

19200,"[{'id': 7002, 'name': 'vigilante'}, {'id': 10183, 'name': 'independent film'}, {'id': 10714, 'name': 'serial killer'}]"

11300,"[{'id': 242, 'name': 'new york'}, {'id': 293, 'name': 'female nudity'}, {'id': 572, 'name': 'sex'}, {'id': 642, 'name': 'robbery'}, {'id': 772, 'name': 'handcuffs'}, {'id': 2483, 'name': 'nudity'}, {'id': 3713, 'name': 'chase'}, {'id': 6149, 'name': 'police'}, {'id': 6259, 'name': 'psychopath'}, {'id': 9935, 'name': 'travel'}, {'id': 10123, 'name': 'dark comedy'}, {'id': 10183, 


37662,"[{'id': 5339, 'name': 'railway car'}, {'id': 5905, 'name': 'father'}, {'id': 9180, 'name': 'yorkshire'}, {'id': 10103, 'name': 'children'}, {'id': 156091, 'name': 'missing person'}]"

11916,"[{'id': 75, 'name': 'gunslinger'}, {'id': 534, 'name': 'mexico'}, {'id': 4653, 'name': 'french'}, {'id': 6956, 'name': 'treasure hunt'}, {'id': 33625, 'name': 'illegal prostitution'}]"

14551,"[{'id': 642, 'name': 'robbery'}, {'id': 797, 'name': 'showdown'}, {'id': 2284, 'name': 'horseback riding'}, {'id': 6062, 'name': 'hostility'}, {'id': 6093, 'name': 'assault'}, {'id': 7281, 'name': 'shooting'}, {'id': 9897, 'name': 'rifle'}, {'id': 10261, 'name': 'stealing'}, {'id': 11112, 'name': 'search'}, {'id': 11836, 'name': 'contest'}, {'id': 14852, 'name': 'poker game'}, {'id': 156510, 'name': 'horse and carriage'}, {'id': 159743, 'name': 'fourth of july'}, {'id': 166529, 'name': 'winchester rifle'}, {'id': 168547, 'name': 'prize'}, {'id': 188235, 'name': 'u.s. cavalry'}]"

34759,[]

51426,"[{'i

42801,"[{'id': 549, 'name': 'prostitute'}, {'id': 924, 'name': 'italian'}, {'id': 10183, 'name': 'independent film'}]"

12627,"[{'id': 254, 'name': 'france'}, {'id': 1610, 'name': 'country estate'}, {'id': 1664, 'name': 'eroticism'}, {'id': 5375, 'name': 'artwork'}, {'id': 5756, 'name': 'man-woman relation'}, {'id': 5918, 'name': 'painting'}, {'id': 6023, 'name': 'drawing and painting'}]"

86404,"[{'id': 90, 'name': 'paris'}, {'id': 549, 'name': 'prostitute'}, {'id': 703, 'name': 'detective'}]"

18671,"[{'id': 1419, 'name': 'gun'}, {'id': 155436, 'name': 'classic noir'}, {'id': 187554, 'name': 'person running amok'}, {'id': 195402, 'name': 'film noir'}]"

17218,"[{'id': 395, 'name': 'gambling'}, {'id': 396, 'name': 'transporter'}, {'id': 2792, 'name': 'boxer'}, {'id': 4487, 'name': 'boxing match'}, {'id': 6075, 'name': 'sport'}, {'id': 155436, 'name': 'classic noir'}, {'id': 155453, 'name': 'gritty'}]"

41316,"[{'id': 187056, 'name': 'woman director'}]"

5179,"[{'id': 3130, 'name': 'ed


40096,"[{'id': 983, 'name': 'brazilian'}, {'id': 10267, 'name': 'comedy'}]"

11847,"[{'id': 642, 'name': 'robbery'}, {'id': 779, 'name': 'martial arts'}, {'id': 5733, 'name': 'turkey'}, {'id': 12193, 'name': 'reporter'}, {'id': 12354, 'name': 'hong kong'}, {'id': 14955, 'name': 'fighting'}]"

52999,"[{'id': 10183, 'name': 'independent film'}, {'id': 187056, 'name': 'woman director'}]"

41714,"[{'id': 10183, 'name': 'independent film'}]"

11056,"[{'id': 1452, 'name': 'loss of child'}, {'id': 2642, 'name': 'solar eclipse'}, {'id': 3358, 'name': 'haunted house'}, {'id': 18035, 'name': 'family'}, {'id': 18130, 'name': 'psychopathy'}]"

27967,[]

919,"[{'id': 4409, 'name': 'military base'}, {'id': 4942, 'name': 'victim of murder'}, {'id': 5135, 'name': 'jeep'}, {'id': 210024, 'name': 'anime'}]"

57558,[]

13437,"[{'id': 293, 'name': 'female nudity'}, {'id': 2708, 'name': 'hitman'}, {'id': 3088, 'name': 'bodyguard'}, {'id': 6220, 'name': 'mafia boss'}, {'id': 10267, 'name': 'comedy'}, {'id'


25645,"[{'id': 780, 'name': 'kung fu'}, {'id': 4556, 'name': 'protection'}, {'id': 33344, 'name': 'bandit'}, {'id': 209157, 'name': 'childhood sweetheart'}]"

58311,[]

18672,"[{'id': 184656, 'name': 'wuxia'}]"

39776,[]

11559,"[{'id': 572, 'name': 'sex'}, {'id': 1533, 'name': 'isolation'}, {'id': 2343, 'name': 'magic'}, {'id': 4436, 'name': 'addicted'}, {'id': 13073, 'name': 'fear'}, {'id': 13154, 'name': 'world'}, {'id': 14601, 'name': 'explosion'}, {'id': 14964, 'name': 'drug'}, {'id': 40895, 'name': 'illness'}, {'id': 159947, 'name': 'parents'}, {'id': 170344, 'name': 'imagination'}, {'id': 197823, 'name': 'lonely'}, {'id': 202760, 'name': 'hopeless'}, {'id': 202764, 'name': 'dolls'}]"

31513,"[{'id': 4344, 'name': 'musical'}, {'id': 6075, 'name': 'sport'}, {'id': 193515, 'name': 'american football hero'}]"

10348,"[{'id': 830, 'name': 'car race'}, {'id': 3296, 'name': 'car mechanic'}, {'id': 6149, 'name': 'police'}, {'id': 6996, 'name': 'car garage'}]"

26718,"[{'id': 10909, 'na


14145,"[{'id': 1299, 'name': 'monster'}]"

9010,"[{'id': 2199, 'name': 'opera'}, {'id': 2390, 'name': 'migraine'}, {'id': 7954, 'name': 'salzburg'}, {'id': 8224, 'name': 'cannabis'}, {'id': 9826, 'name': 'murder'}, {'id': 13042, 'name': 'soccer'}, {'id': 34125, 'name': 'catholic school'}, {'id': 172391, 'name': 'catholic priest'}]"

13022,"[{'id': 223059, 'name': 'animal horror'}]"

68259,"[{'id': 227952, 'name': 'centro de capacitación cinematográfica (ccc)'}]"

9783,"[{'id': 428, 'name': 'nurse'}, {'id': 2800, 'name': 'psychology'}, {'id': 9937, 'name': 'suspense'}, {'id': 11221, 'name': 'blood'}, {'id': 11612, 'name': 'hospital'}]"

10190,"[{'id': 1415, 'name': 'small town'}, {'id': 1525, 'name': 'puberty'}, {'id': 1975, 'name': 'arabian'}, {'id': 2337, 'name': 'pedophilia'}, {'id': 2952, 'name': 'gulf war'}, {'id': 5975, 'name': 'menstruation'}, {'id': 6325, 'name': 'ethnic stereotype'}, {'id': 9608, 'name': 'arab-american'}, {'id': 9609, 'name': 'houston'}, {'id': 10183, 'name': 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



240881,"[{'id': 1228, 'name': '1970s'}, {'id': 18035, 'name': 'family'}, {'id': 187056, 'name': 'woman director'}, {'id': 199029, 'name': 'dealer'}]"

274478,[]

56539,"[{'id': 822, 'name': 'airport'}, {'id': 1003, 'name': 'photographer'}, {'id': 1667, 'name': 'pimp'}, {'id': 2708, 'name': 'hitman'}, {'id': 9826, 'name': 'murder'}, {'id': 10410, 'name': 'conspiracy'}, {'id': 10601, 'name': 'blaxploitation'}, {'id': 14818, 'name': 'model'}]"

106136,[]

49391,"[{'id': 4379, 'name': 'time travel'}, {'id': 156580, 'name': 'ghost story'}]"

162059,"[{'id': 8881, 'name': 'jury'}, {'id': 12193, 'name': 'reporter'}]"

16365,[]

152986,"[{'id': 395, 'name': 'gambling'}, {'id': 1332, 'name': 'cheating'}, {'id': 1459, 'name': 'marriage proposal'}, {'id': 6038, 'name': 'marriage'}, {'id': 13027, 'name': 'wedding'}]"

142012,"[{'id': 417, 'name': 'corruption'}, {'id': 5340, 'name': 'investigation'}, {'id': 6149, 'name': 'police'}]"

54165,[]

105404,[]

16460,"[{'id': 195032, 'name': 'garfield'}]"


79735,"[{'id': 1721, 'name': 'fight'}, {'id': 2492, 'name': 'bank robber'}, {'id': 10085, 'name': 'betrayal'}, {'id': 18106, 'name': 'knife throwing'}, {'id': 161155, 'name': 'kids'}, {'id': 199955, 'name': 'gun in safe'}]"

48669,"[{'id': 3739, 'name': 'funeral'}, {'id': 3880, 'name': 'snuff'}, {'id': 11040, 'name': 'masked killer'}, {'id': 15193, 'name': 'motle'}]"

77964,"[{'id': 5809, 'name': 'marriage crisis'}, {'id': 10909, 'name': 'lawyer'}, {'id': 11677, 'name': 'secretary'}]"

90957,"[{'id': 1936, 'name': 'blackmail'}, {'id': 10453, 'name': 'con artist'}]"

95919,"[{'id': 9799, 'name': 'romantic comedy'}, {'id': 12428, 'name': 'taiwanese'}]"

72483,"[{'id': 237, 'name': 'gay'}, {'id': 1009, 'name': 'baby'}, {'id': 6017, 'name': 'genetics'}, {'id': 208591, 'name': 'abortion'}, {'id': 208873, 'name': 'acceptance'}]"

191717,[]

29609,"[{'id': 1956, 'name': 'world war ii'}, {'id': 3485, 'name': 'comedian'}, {'id': 4663, 'name': 'lovers'}, {'id': 8079, 'name': 'cornwall'}, {'id':

267654,[]

3004,"[{'id': 1295, 'name': 'europe'}, {'id': 6674, 'name': 'chair'}, {'id': 10280, 'name': 'farce'}, {'id': 10911, 'name': 'inheritance'}, {'id': 155331, 'name': 'jewel'}]"

118150,"[{'id': 818, 'name': 'based on novel'}, {'id': 1453, 'name': 'amnesia'}, {'id': 176799, 'name': 'detective agency'}]"

287391,[]

38421,"[{'id': 9813, 'name': 'french riviera'}]"

142979,[]

163333,[]

65718,[]

44541,[]

131507,[]

213831,[]

105403,[]

127092,[]

66178,[]

206296,"[{'id': 4183, 'name': 'wife'}, {'id': 4344, 'name': 'musical'}, {'id': 6038, 'name': 'marriage'}, {'id': 15160, 'name': 'divorce'}]"

11925,"[{'id': 255, 'name': 'male nudity'}, {'id': 2483, 'name': 'nudity'}, {'id': 8085, 'name': 'stockholm'}, {'id': 12392, 'name': 'best friend'}, {'id': 13027, 'name': 'wedding'}, {'id': 157813, 'name': 'naked'}]"

181454,[]

201085,"[{'id': 3358, 'name': 'haunted house'}, {'id': 6015, 'name': 'mining'}, {'id': 9826, 'name': 'murder'}, {'id': 14898, 'name': 'spitting blood'}, {'id':


25939,"[{'id': 2450, 'name': 'auschwitz'}]"

78992,[]

26357,"[{'id': 470, 'name': 'spy'}, {'id': 10988, 'name': 'based on tv series'}]"

29467,"[{'id': 378, 'name': 'prison'}, {'id': 156121, 'name': 'ex-con'}, {'id': 195402, 'name': 'film noir'}]"

84365,"[{'id': 2792, 'name': 'boxer'}, {'id': 195402, 'name': 'film noir'}]"

124026,"[{'id': 3546, 'name': 'florida keys'}, {'id': 6212, 'name': 'smuggling'}, {'id': 155493, 'name': 'campy'}]"

300168,[]

132316,"[{'id': 10540, 'name': 'bollywood'}, {'id': 155651, 'name': 'fall in love'}]"

74458,"[{'id': 2387, 'name': 'new delhi india'}, {'id': 159298, 'name': 'himachal pradesh india'}, {'id': 159299, 'name': 'punjab'}, {'id': 159304, 'name': 'punjab india'}]"

40777,"[{'id': 128, 'name': 'love triangle'}, {'id': 2658, 'name': 'new zealand'}, {'id': 4110, 'name': 'bombay india'}, {'id': 14636, 'name': 'india'}]"

188222,"[{'id': 6054, 'name': 'friendship'}, {'id': 12396, 'name': 'hollywood'}, {'id': 162262, 'name': 'movie star'}, {'id': 

77606,"[{'id': 1156, 'name': 'sister sister relationship'}, {'id': 5565, 'name': 'biography'}, {'id': 156321, 'name': 'tv movie'}, {'id': 184948, 'name': 'disney channel'}, {'id': 194988, 'name': 'drag racing'}, {'id': 206832, 'name': 'drag racer'}]"

160718,"[{'id': 470, 'name': 'spy'}, {'id': 10988, 'name': 'based on tv series'}]"

27338,"[{'id': 1453, 'name': 'amnesia'}, {'id': 14780, 'name': 'investigator'}]"

109170,[]

103260,"[{'id': 6075, 'name': 'sport'}, {'id': 155746, 'name': 'deserted island'}]"

27339,[]

32677,"[{'id': 2041, 'name': 'island'}, {'id': 10792, 'name': 'marooned'}]"

20435,"[{'id': 1402, 'name': 'general'}, {'id': 3070, 'name': 'mercenary'}, {'id': 13008, 'name': 'train'}, {'id': 14760, 'name': 'scientist'}, {'id': 157938, 'name': 'soviet'}, {'id': 191748, 'name': 'russian scientist'}, {'id': 210277, 'name': 'death train'}]"

51881,[]

288201,[]

278604,"[{'id': 572, 'name': 'sex'}, {'id': 9831, 'name': 'spaceship'}, {'id': 9964, 'name': 'crude humor'}, {'id'


68252,"[{'id': 14643, 'name': 'battle'}, {'id': 180662, 'name': 'pyschic'}]"

32272,[]

81242,[]

38604,[]

26010,[]

264273,[]

21792,"[{'id': 1453, 'name': 'amnesia'}]"

77175,[]

204053,[]

35851,"[{'id': 380, 'name': 'brother brother relationship'}, {'id': 708, 'name': 'runaway'}, {'id': 2669, 'name': 'motel'}, {'id': 9673, 'name': 'love'}, {'id': 14522, 'name': 'co-worker'}, {'id': 18028, 'name': 'hiding'}, {'id': 157598, 'name': 'store'}, {'id': 187844, 'name': 'flashback'}, {'id': 203360, 'name': 'psychotherapist'}]"

96664,"[{'id': 9950, 'name': 'thriller'}, {'id': 10092, 'name': 'mystery'}]"

5846,"[{'id': 1449, 'name': 'underworld'}]"

195276,[]

56790,"[{'id': 627, 'name': 'killing'}, {'id': 8087, 'name': 'horror'}, {'id': 9707, 'name': 'evil doll'}]"

86279,[]

24889,[]

204553,"[{'id': 198070, 'name': '감시자들'}]"

45089,[]

23609,[]

137409,"[{'id': 6691, 'name': 'drug scene'}, {'id': 11148, 'name': 'police chase'}]"

352128,"[{'id': 732, 'name': 'coma'}, {'id': 818, 'name'

47240,"[{'id': 9937, 'name': 'suspense'}]"

104194,"[{'id': 187056, 'name': 'woman director'}]"

104171,[]

87953,[]

159862,[]

66759,[]

75808,[]

79781,[]

9287,"[{'id': 798, 'name': 'sheriff'}, {'id': 800, 'name': 'bounty'}, {'id': 1582, 'name': 'saloon'}, {'id': 8461, 'name': 'prügel'}, {'id': 18035, 'name': 'family'}, {'id': 160488, 'name': 'hoodlum'}, {'id': 207317, 'name': 'christmas'}]"

34421,[]

108228,"[{'id': 7312, 'name': 'road trip'}]"

323968,[]

120172,"[{'id': 12377, 'name': 'zombie'}, {'id': 14707, 'name': 'brutality'}, {'id': 180447, 'name': 'biological warfare'}, {'id': 229607, 'name': 'female warrior'}]"

177112,[]

36748,"[{'id': 985, 'name': 'candy'}, {'id': 4414, 'name': 'adventure'}, {'id': 4717, 'name': 'camel'}]"

229056,"[{'id': 65, 'name': 'holiday'}, {'id': 818, 'name': 'based on novel'}, {'id': 161174, 'name': 'christmas carol'}, {'id': 180975, 'name': 'charles dickens'}, {'id': 207317, 'name': 'christmas'}, {'id': 214549, 'name': 'short'}]"

42195,[]

2

328480,[]

284250,[]

82655,"[{'id': 1897, 'name': 'oregon'}, {'id': 9887, 'name': 'surrealism'}, {'id': 10292, 'name': 'gore'}, {'id': 10629, 'name': 'vision'}, {'id': 13124, 'name': 'scream'}, {'id': 14629, 'name': 'experimental film'}, {'id': 156206, 'name': 'omelet'}]"

325134,"[{'id': 217451, 'name': 'turkish movie'}]"

159109,[]

354216,"[{'id': 2679, 'name': 'artist'}, {'id': 5540, 'name': 'heavy metal'}, {'id': 5918, 'name': 'painting'}, {'id': 10138, 'name': 'satan'}, {'id': 170865, 'name': 'child kidnapping'}, {'id': 178646, 'name': 'possesion'}]"

376358,[]

174751,[]

45098,[]

57718,[]

132873,[]

333360,"[{'id': 2598, 'name': 'museum'}, {'id': 3660, 'name': 'art collector'}, {'id': 14893, 'name': 'art'}, {'id': 187056, 'name': 'woman director'}]"

370665,[]

333596,"[{'id': 6075, 'name': 'sport'}, {'id': 12425, 'name': 'racism'}]"

331190,"[{'id': 1562, 'name': 'hostage'}, {'id': 9672, 'name': 'based on true story'}, {'id': 9826, 'name': 'murder'}, {'id': 10183, 'name': '

371003,"[{'id': 6273, 'name': 'school trip'}, {'id': 6876, 'name': 'vacation'}, {'id': 8087, 'name': 'horror'}, {'id': 8508, 'name': 'party'}, {'id': 9864, 'name': 'prank'}, {'id': 10714, 'name': 'serial killer'}]"

373314,[]

377853,[]

37839,"[{'id': 4565, 'name': 'dystopia'}]"

56943,"[{'id': 187056, 'name': 'woman director'}]"

108476,"[{'id': 187056, 'name': 'woman director'}]"

159006,[]

343043,"[{'id': 3529, 'name': 'renegade'}, {'id': 9817, 'name': 'behind the scenes'}, {'id': 14989, 'name': 'religious'}, {'id': 50244, 'name': 'making of'}, {'id': 162708, 'name': 'mary magdalene'}]"

352162,"[{'id': 430, 'name': 'venezuela'}, {'id': 6972, 'name': 'caracas'}, {'id': 9826, 'name': 'murder'}, {'id': 10198, 'name': 'gay relationship'}, {'id': 208390, 'name': 'lgbt elderly'}]"

102707,[]

73791,[]

381353,[]

329809,"[{'id': 6148, 'name': 'court'}, {'id': 8881, 'name': 'jury'}, {'id': 33519, 'name': 'courtroom'}, {'id': 222517, 'name': 'legal drama'}, {'id': 235591, 'name': 'jury s

291189,"[{'id': 15009, 'name': 'criminal'}, {'id': 155808, 'name': 'hometown'}]"

375012,"[{'id': 456, 'name': 'mother'}, {'id': 5600, 'name': 'daughter'}, {'id': 156075, 'name': 'evil'}, {'id': 208289, 'name': '1980s'}, {'id': 232122, 'name': 'tehran'}]"

312849,[]

273912,[]

287719,[]

63004,"[{'id': 2343, 'name': 'magic'}]"

213595,[]

52488,[]

98487,"[{'id': 187056, 'name': 'woman director'}]"

43093,"[{'id': 212, 'name': 'london england'}, {'id': 1299, 'name': 'monster'}, {'id': 1489, 'name': 'river thames'}, {'id': 1618, 'name': 'radiation'}, {'id': 4477, 'name': 'paleontologist'}, {'id': 6978, 'name': 'radium'}, {'id': 8079, 'name': 'cornwall'}, {'id': 10124, 'name': 'laboratory'}, {'id': 11100, 'name': 'giant monster'}, {'id': 12616, 'name': 'dinosaur'}, {'id': 33696, 'name': 'sea monster'}, {'id': 174815, 'name': 'cornwall england'}]"

376311,"[{'id': 1994, 'name': 'wolf'}, {'id': 2035, 'name': 'mythology'}, {'id': 10327, 'name': 'undead'}, {'id': 12564, 'name': 'werewolf'},

399097,"[{'id': 6025, 'name': 'photography'}, {'id': 10683, 'name': 'coming of age'}, {'id': 220448, 'name': 'bipolar disorder'}]"

392554,"[{'id': 187056, 'name': 'woman director'}]"

65442,"[{'id': 12420, 'name': 'unsimulated sex'}]"

198057,"[{'id': 1415, 'name': 'small town'}, {'id': 6018, 'name': 'new hampshire'}, {'id': 169977, 'name': 'small town sheriff'}]"

268956,[]

296472,[]

141267,"[{'id': 4654, 'name': 'undercover agent'}, {'id': 6149, 'name': 'police'}, {'id': 10291, 'name': 'organized crime'}, {'id': 10391, 'name': 'mafia'}, {'id': 14536, 'name': 'crime'}, {'id': 14617, 'name': 'mole'}, {'id': 187056, 'name': 'woman director'}]"

396535,"[{'id': 10349, 'name': 'survival'}, {'id': 10586, 'name': 'korea'}, {'id': 12377, 'name': 'zombie'}, {'id': 13008, 'name': 'train'}, {'id': 158540, 'name': 'south korea'}, {'id': 232649, 'name': 'busan'}]"

138115,[]

138118,[]

136071,[]

133879,"[{'id': 157241, 'name': 'criminal investigation'}, {'id': 187056, 'name': 'woman director


381289,"[{'id': 818, 'name': 'based on novel'}, {'id': 2526, 'name': 'human animal relationship'}, {'id': 5484, 'name': 'reincarnation'}, {'id': 9229, 'name': 'grandparents'}, {'id': 15162, 'name': 'dog'}, {'id': 17994, 'name': 'firecracker'}, {'id': 186846, 'name': 'adaptation'}, {'id': 234523, 'name': 'deflated football'}]"

247446,[]

266558,[]

334890,[]

37376,[]

393521,"[{'id': 1227, 'name': 'cemetery'}, {'id': 3739, 'name': 'funeral'}, {'id': 4523, 'name': 'joint'}, {'id': 8224, 'name': 'cannabis'}, {'id': 10163, 'name': 'cancer'}, {'id': 10226, 'name': 'neighbor'}, {'id': 11612, 'name': 'hospital'}, {'id': 34079, 'name': 'death'}, {'id': 156390, 'name': 'face slap'}, {'id': 159103, 'name': 'mourning'}, {'id': 166124, 'name': 'texting'}, {'id': 225616, 'name': 'smoking a joint'}]"

360814,"[{'id': 5565, 'name': 'biography'}, {'id': 5970, 'name': 'wrestling'}, {'id': 6075, 'name': 'sport'}, {'id': 6683, 'name': 'autobiography'}]"

261439,"[{'id': 497, 'name': 'shakespeare'}, {'

31119,"[{'id': 2766, 'name': 'mutation'}, {'id': 9951, 'name': 'alien'}]"

390293,"[{'id': 10637, 'name': 'food'}, {'id': 40844, 'name': 'sustainable'}, {'id': 208736, 'name': 'bugs'}]"

30633,[]

447091,[]

130300,[]

111042,[]

416951,"[{'id': 5565, 'name': 'biography'}, {'id': 9682, 'name': 'history'}]"

338227,[]

252034,[]

198646,[]

11396,"[{'id': 90, 'name': 'paris'}, {'id': 409, 'name': 'africa'}, {'id': 1454, 'name': 'treasure'}, {'id': 3203, 'name': 'pilot'}, {'id': 190451, 'name': 'dragster'}]"

274991,[]

160124,[]

49579,[]

38332,[]

49574,[]

126555,"[{'id': 187056, 'name': 'woman director'}]"

78595,[]

57204,"[{'id': 1010, 'name': 'bar'}, {'id': 6562, 'name': 'celebrity'}, {'id': 7813, 'name': 'hockey player'}, {'id': 8508, 'name': 'party'}, {'id': 9673, 'name': 'love'}, {'id': 10456, 'name': 'athlete'}, {'id': 10726, 'name': 'gang'}, {'id': 156661, 'name': 'threat'}]"

152989,[]

4644,"[{'id': 10183, 'name': 'independent film'}]"

349177,[]

2132,"[{'id': 2334, 'name

TypeError: documents must be a non-empty list

In [None]:

# Задание по Pandas
# --------------------------------------------------------------
# Постройте таблицу их тегов и определите top-5 самых популярных

# формируем DataFrame
tags_df = pd.DataFrame(id_tags, columns=['movieid', 'tags'])

# --------------------------------------------------------------
# Ваш код здесь
# сгруппируйте по названию тега с помощью group_by
# для каждого тега вычислите, в каком количестве фильмов он встречается
# оставьте top-5 самых популярных тегов

top_5_tags = tags_df.head(5)

print(top_5_tags)

logger.info("Домашка выполнена!")
# --------------------------------------------------------------