In [100]:
import numpy as np
import pandas as pd
import seaborn as sns
import calendar
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime

In [101]:
data = pd.read_csv('movie_bd_v5.csv')
data.sample(5)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year
1707,tt0431197,70000000,86658558,The Kingdom,Jamie Foxx|Jennifer Garner|Chris Cooper|Jason ...,Peter Berg,How do you stop an enemy who isn't afraid to die?,A team of U.S. government agents is sent to in...,110,Thriller|Action|Drama,Universal Pictures|Forward Pass|Relativity Med...,8/22/2007,6.4,2007
1139,tt0322259,76000000,236350661,2 Fast 2 Furious,Paul Walker|Tyrese Gibson|Eva Mendes|Cole Haus...,John Singleton,How Fast Do You Want It?,It's a major double-cross when former police o...,107,Action|Crime|Thriller,Mikona Productions GmbH & Co. KG|Universal Pic...,6/5/2003,6.1,2003
618,tt1129442,30000000,108979549,Transporter 3,Jason Statham|FranÃ§ois BerlÃ©and|Natalya Ruda...,Olivier Megaton,"This time, the rules are the same. Except one.",Frank Martin puts the driving gloves on to del...,104,Action|Adventure|Thriller|Crime,Europa Corp|TF1 Films Production|Grive Product...,11/26/2008,5.9,2008
959,tt0267248,25000000,10719357,Abandon,Katie Holmes|Benjamin Bratt|Charlie Hunnam|Zoo...,Stephen Gaghan,Watch who you leave behind.,A psychological thriller about a senior at one...,99,Drama|Mystery|Thriller,Paramount Pictures|Spyglass Entertainment|Touc...,10/14/2002,4.9,2002
1082,tt0320691,22000000,95708457,Underworld,Kate Beckinsale|Scott Speedman|Michael Sheen|S...,Len Wiseman,An immortal battle for supremacy.,Vampires and werewolves have waged a nocturnal...,121,Fantasy|Action|Thriller,Lakeshore Entertainment|Laurinfilm|Subterranea...,9/19/2003,6.5,2003


In [102]:
data.describe()

Unnamed: 0,budget,revenue,runtime,vote_average,release_year
count,1889.0,1889.0,1889.0,1889.0,1889.0
mean,54310830.0,155365300.0,109.658549,6.140762,2007.860773
std,48587210.0,214669800.0,18.017041,0.764763,4.468841
min,5000000.0,2033165.0,63.0,3.3,2000.0
25%,20000000.0,34560580.0,97.0,5.6,2004.0
50%,38000000.0,83615410.0,107.0,6.1,2008.0
75%,72000000.0,178262600.0,120.0,6.6,2012.0
max,380000000.0,2781506000.0,214.0,8.1,2015.0


# Предобработка

In [103]:
answers = {} # создадим словарь для ответов

# тут другие ваши предобработки колонок например:

#the time given in the dataset is in string format.
#So we need to change this in datetime format
def convert_date(str_date):
    return datetime.strptime(str_date, '%m/%d/%Y')


data.release_date = data.release_date.apply(convert_date)

# Прибыль profit
data['profit'] = data['revenue'] - data['budget']

# Список всех жанров
def get_genres(data):
    """Возвращает список всех уникальных жанров, которые встречаются в датафрейме"""
    result = []
    for genres in data.genres:
        result = result + str.split(genres, '|')

    result = list(set(result))
    return result


# Сохраняем список всех жанров в переменную для удобства
genres_list = get_genres(data)


# Создаем датафрейм с колонками imdb_id и отдельной колонкой на каждый жанр
# В жанровых колонках значения хранятся в виде bool: True если фильм относится к жанра, False если нет
def generate_genres_df(data):
    genres_df = pd.DataFrame(data.imdb_id)
    for genre in genres_list:
        genres_df[genre] = False
        genre_df = data[data.genres.str.contains(genre, na=False)]
        for ix1 in genre_df.index.values.tolist():
            for ix2 in genres_df.index.values.tolist():
                if ix1 == ix2:
                    genres_df.loc[ix2, genre] = True

    return genres_df


genre_df = generate_genres_df(data)


# Объединяем жанровый датафрейм с общим
data_with_genre = data.merge(genre_df, on='imdb_id', how='inner')


# Список актеров
def get_actors(data):
    """Возвращает список всех уникальных актеров, которые встречаются в датафрейме"""
    result = []
    for cast in data.cast:
        result = result + str.split(cast, '|')

    result = list(set(result))
    return result


actors_list = get_actors(data)


# Список режисеров
def get_directors(data):
    """Возвращает список всех уникальных режисеров, которые встречаются в датафрейме"""
    result = []
    for director in data.director:
        result = result + str.split(director, '|')

    result = list(set(result))
    return result


director_list = get_directors(data)


# Список студий
def get_studio(data):
    """Возвращает список всех уникальных студий, которые встречаются в датафрейме"""
    result = []
    for studio in data.production_companies:
        result = result + str.split(studio, '|')

    result = list(set(result))
    return result


studio_list = get_studio(data)



data.sample(5)

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
376,tt1320253,80000000,274470394,The Expendables,Sylvester Stallone|Jason Statham|Dolph Lundgre...,Sylvester Stallone,Choose Your Weapon.,Barney Ross leads a band of highly skilled mer...,103,Action|Thriller,Millennium Films,2010-08-03,5.9,2010,194470394
537,tt0240462,70000000,176104344,Dr. Dolittle 2,Eddie Murphy|Kristen Wilson|Raven-SymonÃ©|Kyla...,Steve Carr,The doctor is in again.,"Dr. John Dolittle the beloved doctor is back, ...",87,Comedy|Family|Romance|Fantasy,Twentieth Century Fox Film Corporation,2001-06-22,4.9,2001,106104344
162,tt2294449,50000000,188441614,22 Jump Street,Jonah Hill|Channing Tatum|Dave Franco|Ice Cube...,Phil Lord|Christopher Miller,They're not 21 anymore,After making their way through high school (tw...,112,Crime|Comedy|Action,Columbia Pictures|Original Film|Media Rights C...,2014-06-05,7.0,2014,138441614
1408,tt0361089,35000000,19478106,Valiant,Ewan McGregor|Ricky Gervais|Tim Curry|Jim Broa...,Gary Chapman,"Some pigeons eat crumbs, others make history.",The animated comedy tells the story of a lowly...,76,Animation|Family|Adventure,Vanguard Films|Scanbox,2005-03-25,5.3,2005,-15521894
441,tt1038686,26000000,67918658,Legion,Paul Bettany|Dennis Quaid|Kevin Durand|Kate Wa...,Scott Stewart,"When the last angel falls, the fight for manki...","When God loses faith in humankind, he sends hi...",100,Horror,Bold Films,2010-01-21,5.0,2010,41918658


# 1. У какого фильма из списка самый большой бюджет?

Использовать варианты ответов в коде решения запрещено.    
Вы думаете и в жизни у вас будут варианты ответов?)

In [104]:
# в словарь вставляем номер вопроса и ваш ответ на него
# Пример: 
# answers['1'] = '2. Spider-Man 3 (tt0413300)'
# запишите свой вариант ответа
answers['1'] = '723. Pirates of the Caribbean: On Stranger Tides (tt1298650)'
# если ответили верно, можете добавить комментарий со значком "+"

In [105]:
# тут пишем ваш код для решения данного вопроса:
data[data.budget == data.budget.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
723,tt1298650,380000000,1021683000,Pirates of the Caribbean: On Stranger Tides,Johnny Depp|PenÃ©lope Cruz|Geoffrey Rush|Ian M...,Rob Marshall,Live Forever Or Die Trying.,Captain Jack Sparrow crosses paths with a woma...,136,Adventure|Action|Fantasy,Walt Disney Pictures|Jerry Bruckheimer Films|M...,2011-05-11,6.3,2011,641683000


# 2. Какой из фильмов самый длительный (в минутах)?

In [106]:
# думаю логику работы с этим словарем вы уже поняли, 
# по этому не буду больше его дублировать
answers['2'] = '1157. Gods and Generals (tt0279111)'

In [107]:
data[data.runtime == data.runtime.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1157,tt0279111,56000000,12923936,Gods and Generals,Stephen Lang|Jeff Daniels|Robert Duvall|Kevin ...,Ronald F. Maxwell,The nations heart was touched by...,The film centers mostly around the personal an...,214,Drama|History|War,Turner Pictures|Antietam Filmworks,2003-02-21,5.8,2003,-43076064


# 3. Какой из фильмов самый короткий (в минутах)?





In [108]:
answers['3'] = '768. Winnie the Pooh (tt1449283)'

In [109]:
data[data.runtime == data.runtime.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
768,tt1449283,30000000,14460000,Winnie the Pooh,Jim Cummings|Travis Oates|Jim Cummings|Bud Luc...,Stephen Anderson|Don Hall,Oh Pooh.,"During an ordinary day in Hundred Acre Wood, W...",63,Animation|Family,Walt Disney Pictures|Walt Disney Animation Stu...,2011-04-13,6.8,2011,-15540000


# 4. Какова средняя длительность фильмов?


In [110]:
answers['4'] = 109.6585494970884

In [111]:
data.runtime.mean()

109.6585494970884

# 5. Каково медианное значение длительности фильмов? 

In [112]:
answers['5'] = 107.0

In [113]:
data.runtime.median()

107.0

# 6. Какой самый прибыльный фильм?
#### Внимание! Здесь и далее под «прибылью» или «убытками» понимается разность между сборами и бюджетом фильма. (прибыль = сборы - бюджет) в нашем датасете это будет (profit = revenue - budget) 

In [114]:
answers['6'] = '239. Avatar (tt0499549)'

In [115]:
# лучше код получения столбца profit вынести в Предобработку что в начале
data[data.profit == data.profit.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
239,tt0499549,237000000,2781505847,Avatar,Sam Worthington|Zoe Saldana|Sigourney Weaver|S...,James Cameron,Enter the World of Pandora.,"In the 22nd century, a paraplegic Marine is di...",162,Action|Adventure|Fantasy|Science Fiction,Ingenious Film Partners|Twentieth Century Fox ...,2009-12-10,7.1,2009,2544505847


# 7. Какой фильм самый убыточный? 

In [116]:
answers['7'] = '1245. The Lone Ranger (tt1210819)'

In [117]:
data[data.profit == data.profit.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1245,tt1210819,255000000,89289910,The Lone Ranger,Johnny Depp|Armie Hammer|William Fichtner|Hele...,Gore Verbinski,Never Take Off the Mask,The Texas Rangers chase down a gang of outlaws...,149,Action|Adventure|Western,Walt Disney Pictures|Jerry Bruckheimer Films|I...,2013-07-03,6.0,2013,-165710090


# 8. У скольких фильмов из датасета объем сборов оказался выше бюджета?

In [118]:
answers['8'] = 1478

In [119]:
len(data[data.profit > 0])

1478

# 9. Какой фильм оказался самым кассовым в 2008 году?

In [120]:
answers['9'] = '599. The Dark Knight (tt0468569)'

In [121]:
data_2008 = data[data.release_year == 2008]
data_2008[data_2008.profit == data_2008.profit.max()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
599,tt0468569,185000000,1001921825,The Dark Knight,Christian Bale|Michael Caine|Heath Ledger|Aaro...,Christopher Nolan,Why So Serious?,Batman raises the stakes in his war on crime. ...,152,Drama|Action|Crime|Thriller,DC Comics|Legendary Pictures|Warner Bros.|Syncopy,2008-07-16,8.1,2008,816921825


# 10. Самый убыточный фильм за период с 2012 по 2014 г. (включительно)?


In [122]:
answers['10'] = '1245. The Lone Ranger (tt1210819)'

In [123]:
data_2012_2014 = data[(data.release_year >= 2012) & (data.release_year <= 2014)]
data_2012_2014[data_2012_2014.profit == data_2012_2014.profit.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
1245,tt1210819,255000000,89289910,The Lone Ranger,Johnny Depp|Armie Hammer|William Fichtner|Hele...,Gore Verbinski,Never Take Off the Mask,The Texas Rangers chase down a gang of outlaws...,149,Action|Adventure|Western,Walt Disney Pictures|Jerry Bruckheimer Films|I...,2013-07-03,6.0,2013,-165710090


# 11. Какого жанра фильмов больше всего?

In [124]:
answers['11'] = 'Drama'

In [125]:
genres = dict.fromkeys(genres_list, 0)

for genre in genres_list:
    genres[genre] = genre_df[genre].values.sum()
    
max(genres, key=genres.get)

'Drama'

# 12. Фильмы какого жанра чаще всего становятся прибыльными? 

In [126]:
answers['12'] = 'Drama'

In [127]:
genres_profit = dict.fromkeys(genres_list, 0)
for genre in genres_list:
    profit_df = data_with_genre.query(f"profit > 0 & `{genre}` == True")
    genres_profit[genre] = len(profit_df.index)

max(genres_profit, key=genres_profit.get)

'Drama'

# 13. У какого режиссера самые большие суммарные кассовые сборы?

In [128]:
answers['13'] = 'Peter Jackson'

In [129]:
# Список директоров
director_list = data_with_genre.director.unique()
# Словарь с кассовыми сборами режисеров
direcotr_profit = dict.fromkeys(director_list, 0)

for director in director_list:
    direcotr_profit[director] = data[data.director == director].revenue.sum()

max(direcotr_profit, key=direcotr_profit.get)

'Peter Jackson'

# 14. Какой режисер снял больше всего фильмов в стиле Action?

In [130]:
answers['14'] = 'Robert Rodriguez'

In [131]:
director_action_count = dict.fromkeys(director_list, 0)
for director in director_list:
    director_df = data[data.genres.str.match("Action", na=False) & (data.director.str.contains(director))]
    director_action_count[director] = len(director_df)

max(director_action_count, key=director_action_count.get)

'Robert Rodriguez|Frank Miller|Quentin Tarantino'

# 15. Фильмы с каким актером принесли самые высокие кассовые сборы в 2012 году? 

In [132]:
answers['15'] = 'Chris Hemsworth'

In [133]:
data_2012 = data[data.release_year == 2012]

actors_revenue_2012 = dict.fromkeys(actors_list, 0)
for actor in actors_list:
    actor_df = data_2012[(data_2012.cast.str.contains(actor))]
    actors_revenue_2012[actor] = actor_df.revenue.sum()

max(actors_revenue_2012, key=actors_revenue_2012.get)

'Chris Hemsworth'

# 16. Какой актер снялся в большем количестве высокобюджетных фильмов?

In [134]:
answers['16'] = 'Matt Damon'

In [135]:
data_high_budget = data[data.budget > data.budget.mean()]

actors_revenue = dict.fromkeys(actors_list, 0)
for actor in actors_list:
    actor_df = data_high_budget[(data_high_budget.cast.str.contains(actor))]
    actors_revenue[actor] = len(actor_df)

max(actors_revenue, key=actors_revenue.get)

'Matt Damon'

# 17. В фильмах какого жанра больше всего снимался Nicolas Cage? 

In [136]:
answers['17'] = 'Action'

In [137]:
genres_nicolas_cage = dict.fromkeys(genres_list, 0)
for genre in genres_list:
    nicolas_cage_df = data_with_genre[data_with_genre.cast.str.contains("Nicolas Cage")]
    nicolas_cage_df = nicolas_cage_df.query(f'`{genre}` == True')
    genres_nicolas_cage[genre] = len(nicolas_cage_df.index)

max(genres_nicolas_cage, key=genres_nicolas_cage.get)

'Action'

# 18. Самый убыточный фильм от Paramount Pictures

In [138]:
answers['18'] = 'K-19: The Widowmaker (tt0267626)'

In [139]:
paramount_df = data[data.production_companies.str.contains("Paramount Pictures")]
paramount_df[paramount_df.profit == paramount_df.profit.min()]

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit
925,tt0267626,100000000,35168966,K-19: The Widowmaker,Harrison Ford|Liam Neeson|Peter Sarsgaard|Joss...,Kathryn Bigelow,Fate has found its hero.,When Russia's first nuclear submarine malfunct...,138,Thriller|Drama|History,Paramount Pictures|Intermedia Films|National G...,2002-07-19,6.0,2002,-64831034


# 19. Какой год стал самым успешным по суммарным кассовым сборам?

In [140]:
answers['19'] = '2015'

In [141]:
years_list = data['release_year'].unique()
year_revenue = dict.fromkeys(years_list, 0)
for year in years_list:
    year_df = data[data.release_year == year]
    year_revenue[year] = year_df.revenue.sum()

max(year_revenue, key=year_revenue.get)

2015

# 20. Какой самый прибыльный год для студии Warner Bros?

In [142]:
answers['20'] = '2014'

In [143]:
warner_df = data[data.production_companies.str.contains('Warner Bros')]

warner_year_profit = dict.fromkeys(years_list, 0)
for year in years_list:
    year_warner_df = warner_df[warner_df.release_year == year]
    warner_year_profit[year] = year_warner_df.profit.sum()

max(warner_year_profit, key=warner_year_profit.get)

2014

# 21. В каком месяце за все годы суммарно вышло больше всего фильмов?

In [144]:
answers['21'] = 'Сентябрь'

In [145]:
months_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

all_months = dict.fromkeys(months_list, 0)
for year in years_list:
    for month in months_list:
        date_min = datetime(year, month, 1)
        date_max = datetime(year, month, calendar.monthrange(year, month)[1])
        month_df = data[(date_max >= data.release_date) & (data.release_date >= date_min)]
        all_months[month] += len(month_df)

print(max(all_months, key=all_months.get))

9


# 22. Сколько суммарно вышло фильмов летом? (за июнь, июль, август)

In [146]:
answers['22'] = 450

In [147]:
summer_month = [6, 7, 8]
summer_movies_count = 0
for year in years_list:
    month_count = dict.fromkeys(summer_month, 0)
    for month in summer_month:
        date_min = datetime(year, month, 1)
        date_max = datetime(year, month, calendar.monthrange(year, month)[1])
        month_df = data[(date_max >= data.release_date) & (data.release_date >= date_min)]
        summer_movies_count += len(month_df)

print(summer_movies_count)

450


# 23. Для какого режиссера зима – самое продуктивное время года? 

In [148]:
answers['23'] = 'Peter Jackson'

In [149]:
winter_month = [12, 1, 2]

director_winter = dict.fromkeys(director_list, 0)
for director in director_list:
    c = 0
    for year in years_list:
        month_count = dict.fromkeys(summer_month, 0)
        for month in winter_month:
            date_min = datetime(year, month, 1)
            date_max = datetime(year, month, calendar.monthrange(year, month)[1])
            winter_df = data[(date_max >= data.release_date) & (data.release_date >= date_min) & (data.director == director)]
            c += len(winter_df)
    director_winter[director] = c


max(director_winter, key=director_winter.get)

'Peter Jackson'

# 24. Какая студия дает самые длинные названия своим фильмам по количеству символов?

In [150]:
answers['24'] = 'Four By Two Productions'

In [151]:
studio_title_len = dict.fromkeys(studio_list, 0)

data['original_title_len'] = data.original_title.apply(lambda x: len(str(x)))

for studio in studio_list:
    studio_df = data[data.production_companies.str.contains(studio)]
    studio_title_len[studio] = studio_df['original_title_len'].mean()

max(studio_title_len, key=studio_title_len.get)

  return func(self, *args, **kwargs)


'Four By Two Productions'

# 25. Описание фильмов какой студии в среднем самые длинные по количеству слов?

In [152]:
answers['25'] = 'Midnight Picture Show'

In [153]:
data['overview_len'] = data.overview.apply(lambda x: len(str(x)))
studio_description_len = dict.fromkeys(studio_list, 0)
for studio in studio_list:
    studio_df = data[data.production_companies.str.contains(studio)]
    studio_description_len[studio] = studio_df['overview_len'].mean()


max(studio_description_len, key=studio_description_len.get)

'Midnight Picture Show'

# 26. Какие фильмы входят в 1 процент лучших по рейтингу? 
по vote_average

In [154]:
answers['26'] = 'Inside Out, The Dark Knight, 12 Years a Slave'

In [155]:
data.sort_values(by=['vote_average'], ascending=False).head(round(len(data) * 0.01))

Unnamed: 0,imdb_id,budget,revenue,original_title,cast,director,tagline,overview,runtime,genres,production_companies,release_date,vote_average,release_year,profit,original_title_len,overview_len
599,tt0468569,185000000,1001921825,The Dark Knight,Christian Bale|Michael Caine|Heath Ledger|Aaro...,Christopher Nolan,Why So Serious?,Batman raises the stakes in his war on crime. ...,152,Drama|Action|Crime|Thriller,DC Comics|Legendary Pictures|Warner Bros.|Syncopy,2008-07-16,8.1,2008,816921825,15,396
118,tt0816692,165000000,621752480,Interstellar,Matthew McConaughey|Jessica Chastain|Anne Hath...,Christopher Nolan,Mankind was born on Earth. It was never meant ...,Interstellar chronicles the adventures of a gr...,169,Adventure|Drama|Science Fiction,Paramount Pictures|Legendary Pictures|Warner B...,2014-11-05,8.0,2014,456752480,12,222
125,tt2084970,14000000,233555708,The Imitation Game,Benedict Cumberbatch|Keira Knightley|Matthew G...,Morten Tyldum,The true enigma was the man who cracked the code.,Based on the real life story of legendary cryp...,113,History|Drama|Thriller|War,Black Bear Pictures|Bristol Automotive,2014-11-14,8.0,2014,219555708,18,287
9,tt2096673,175000000,853708609,Inside Out,Amy Poehler|Phyllis Smith|Richard Kind|Bill Ha...,Pete Docter,Meet the little voices inside your head.,"Growing up can be a bumpy road, and it's no ex...",94,Comedy|Animation|Family,Walt Disney Pictures|Pixar Animation Studios|W...,2015-06-09,8.0,2015,678708609,10,638
34,tt3170832,6000000,35401758,Room,Brie Larson|Jacob Tremblay|Joan Allen|Sean Bri...,Lenny Abrahamson,Love knows no boundaries,Jack is a young boy of 5 years old who has liv...,117,Drama|Thriller,Element Pictures|No Trace Camping|A24|Duperele...,2015-10-16,8.0,2015,29401758,4,243
1183,tt0993846,100000000,392000694,The Wolf of Wall Street,Leonardo DiCaprio|Jonah Hill|Margot Robbie|Kyl...,Martin Scorsese,EARN. SPEND. PARTY.,A New York stockbroker refuses to cooperate in...,180,Crime|Drama|Comedy,Paramount Pictures|Appian Way|EMJAG Production...,2013-12-25,7.9,2013,292000694,23,200
128,tt2267998,61000000,369330363,Gone Girl,Ben Affleck|Rosamund Pike|Carrie Coon|Neil Pat...,David Fincher,You don't know what you've got 'til it's...,With his wife's disappearance having become th...,145,Mystery|Thriller|Drama,Twentieth Century Fox Film Corporation|Regency...,2014-10-01,7.9,2014,308330363,9,169
1191,tt2024544,20000000,187000000,12 Years a Slave,Chiwetel Ejiofor|Michael Fassbender|Lupita Nyo...,Steve McQueen,The extraordinary true story of Solomon Northup,"In the pre-Civil War United States, Solomon No...",134,Drama|History,Plan B Entertainment|Regency Enterprises|River...,2013-10-18,7.9,2013,167000000,16,379
119,tt2015381,170000000,773312399,Guardians of the Galaxy,Chris Pratt|Zoe Saldana|Dave Bautista|Vin Dies...,James Gunn,All heroes start somewhere.,"Light years from Earth, 26 years after being a...",121,Action|Science Fiction|Adventure,Marvel Studios|Moving Picture Company (MPC)|Bu...,2014-07-30,7.9,2014,603312399,23,164
1081,tt0167260,94000000,1118888979,The Lord of the Rings: The Return of the King,Elijah Wood|Ian McKellen|Viggo Mortensen|Liv T...,Peter Jackson,The eye of the enemy is moving.,Aragorn is revealed as the heir to the ancient...,201,Adventure|Fantasy|Action,WingNut Films|New Line Cinema,2003-12-01,7.9,2003,1024888979,45,254


# 27. Какие актеры чаще всего снимаются в одном фильме вместе?


In [156]:
answers['27'] = 'Daniel Radcliffe and Rupert Grint'

In [157]:
actor_pairs = []

for actor in actors_list:
    for actor2 in actors_list:
        actor_pairs.append(f'{actor} and {actor2}')


actor_together = dict.fromkeys(actor_pairs, 0)

for ix in data.index.values.tolist():
    cast = data.loc[ix, 'cast']
    actors = str.split(cast, '|')
    for actor in actors:
        for actor2 in actors:
            if actor != actor2:
                actor_together[f'{actor} and {actor2}'] += 1

max(actor_together, key=actor_together.get)

'Daniel Radcliffe and Rupert Grint'

# Submission

In [158]:
# в конце можно посмотреть свои ответы к каждому вопросу
answers

{'1': '723. Pirates of the Caribbean: On Stranger Tides (tt1298650)',
 '2': '1157. Gods and Generals (tt0279111)',
 '3': '768. Winnie the Pooh (tt1449283)',
 '4': 109.6585494970884,
 '5': 107.0,
 '6': '239. Avatar (tt0499549)',
 '7': '1245. The Lone Ranger (tt1210819)',
 '8': 1478,
 '9': '599. The Dark Knight (tt0468569)',
 '10': '1245. The Lone Ranger (tt1210819)',
 '11': 'Drama',
 '12': 'Drama',
 '13': 'Peter Jackson',
 '14': 'Robert Rodriguez',
 '15': 'Chris Hemsworth',
 '16': 'Matt Damon',
 '17': 'Action',
 '18': 'K-19: The Widowmaker (tt0267626)',
 '19': '2015',
 '20': '2014',
 '21': 'Сентябрь',
 '22': 450,
 '23': 'Peter Jackson',
 '24': 'Four By Two Productions',
 '25': 'Midnight Picture Show',
 '26': 'Inside Out, The Dark Knight, 12 Years a Slave',
 '27': 'Daniel Radcliffe and Rupert Grint'}

In [159]:
# и убедиться что ни чего не пропустил)
len(answers)

27