In [1]:
import numpy as np
import pandas as pd

In [2]:
movies = pd.read_csv("Datasets/imdb-top-1000.csv")

movies.head()

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
0,The Shawshank Redemption,1994,142,Drama,9.3,Frank Darabont,Tim Robbins,2343110,28341469.0,80.0
1,The Godfather,1972,175,Crime,9.2,Francis Ford Coppola,Marlon Brando,1620367,134966411.0,100.0
2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0
3,The Godfather: Part II,1974,202,Crime,9.0,Francis Ford Coppola,Al Pacino,1129952,57300000.0,90.0
4,12 Angry Men,1957,96,Crime,9.0,Sidney Lumet,Henry Fonda,689845,4360000.0,96.0


In [3]:
movies.shape

(1000, 10)

In [4]:
# we can pass multiple columns if we want as a list for group-by

genres = movies.groupby(by="Genre")

print(type(genres))

<class 'pandas.core.groupby.generic.DataFrameGroupBy'>


In [5]:
genres.get_group("Drama")

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
0,The Shawshank Redemption,1994,142,Drama,9.3,Frank Darabont,Tim Robbins,2343110,28341469.0,80.0
9,Fight Club,1999,139,Drama,8.8,David Fincher,Brad Pitt,1854740,37030102.0,66.0
11,Forrest Gump,1994,142,Drama,8.8,Robert Zemeckis,Tom Hanks,1809221,330252182.0,82.0
17,One Flew Over the Cuckoo's Nest,1975,133,Drama,8.7,Milos Forman,Jack Nicholson,918088,112000000.0,83.0
20,Soorarai Pottru,2020,153,Drama,8.6,Sudha Kongara,Suriya,54995,556832648.0,
...,...,...,...,...,...,...,...,...,...,...
990,Giù la testa,1971,157,Drama,7.6,Sergio Leone,Rod Steiger,30144,696690.0,77.0
993,Blowup,1966,111,Drama,7.6,Michelangelo Antonioni,David Hemmings,56513,632532802.0,82.0
996,Giant,1956,201,Drama,7.6,George Stevens,Elizabeth Taylor,34075,195217415.0,84.0
997,From Here to Eternity,1953,118,Drama,7.6,Fred Zinnemann,Burt Lancaster,43374,30500000.0,85.0


In [6]:
len(genres)

14

In [7]:
# Applying builtin aggregation fuctions on groupby objects

genres.std(numeric_only=True)

Unnamed: 0_level_0,Runtime,IMDB_Rating,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Action,28.500706,0.304258,432946.814748,225672400.0,12.421252
Adventure,33.31732,0.229781,301188.347642,169754300.0,12.345393
Animation,14.530471,0.253221,262173.231571,209184000.0,8.813646
Biography,25.514466,0.26714,271284.191372,136325100.0,11.028187
Comedy,22.946213,0.228771,188653.570564,194651300.0,11.82916
Crime,27.689231,0.335477,373999.730656,157119100.0,13.099102
Drama,27.74049,0.267229,305554.162841,220116400.0,12.744687
Family,10.606602,0.0,137008.302816,304841200.0,16.970563
Fantasy,12.727922,0.141421,22179.111299,76068610.0,
Film-Noir,4.0,0.152753,54649.083277,70484720.0,1.527525


### find the top 3 genres by total earning

In [8]:
movies.groupby(by="Genre")['Gross'].sum().sort_values(ascending=False).reset_index().head(3)

Unnamed: 0,Genre,Gross
0,Drama,35409970000.0
1,Action,32632260000.0
2,Comedy,15663870000.0


### find the genre with highest avg IMDB rating

In [9]:
movies.groupby(by="Genre")["IMDB_Rating"].mean().sort_values(ascending=False).reset_index().head(1)

Unnamed: 0,Genre,IMDB_Rating
0,Western,8.35


### find director with most popularity

In [10]:
movies.groupby(by="Director")["No_of_Votes"].sum().sort_values(ascending=False).reset_index().head(1)

Unnamed: 0,Director,No_of_Votes
0,Christopher Nolan,11578345


### find the highest rated movie of each genre

In [11]:
movies.groupby(by="Genre")['IMDB_Rating'].max()

Genre
Action       9.0
Adventure    8.6
Animation    8.6
Biography    8.9
Comedy       8.6
Crime        9.2
Drama        9.3
Family       7.8
Fantasy      8.1
Film-Noir    8.1
Horror       8.5
Mystery      8.4
Thriller     7.8
Western      8.8
Name: IMDB_Rating, dtype: float64

### find number of movies done by each actor

In [12]:
# First way

movies['Star1'].value_counts()

Tom Hanks          12
Robert De Niro     11
Al Pacino          10
Clint Eastwood     10
Humphrey Bogart     9
                   ..
Preity Zinta        1
Javier Bardem       1
Ki-duk Kim          1
Vladimir Garin      1
Robert Donat        1
Name: Star1, Length: 660, dtype: int64

In [13]:
# Total number of actor

movies['Star1'].nunique()

660

In [14]:
# Second way using groupby

movies.groupby(by="Star1")['Star1'].count().sort_values(ascending=False)

Star1
Tom Hanks             12
Robert De Niro        11
Clint Eastwood        10
Al Pacino             10
Leonardo DiCaprio      9
                      ..
Glen Hansard           1
Giuseppe Battiston     1
Giulietta Masina       1
Gerardo Taracena       1
Ömer Faruk Sorak       1
Name: Star1, Length: 660, dtype: int64

In [15]:
# GroupBy Attributes and Methods
# -------------------------------
# find total number of groups -> len
# find items in each group -> size
# first()/last() -> nth item
# get_group -> vs filtering
# groups
# describe
# sample
# nunique

In [16]:
len(movies.groupby('Genre'))

14

In [17]:
movies['Genre'].nunique()

14

In [18]:
movies.groupby('Genre').size()

Genre
Action       172
Adventure     72
Animation     82
Biography     88
Comedy       155
Crime        107
Drama        289
Family         2
Fantasy        2
Film-Noir      3
Horror        11
Mystery       12
Thriller       1
Western        4
dtype: int64

In [19]:
# Compute the first non-null entry of each column

genres.first()

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Action,The Dark Knight,2008,152,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0
Adventure,Interstellar,2014,169,8.6,Christopher Nolan,Matthew McConaughey,1512360,188020017.0,74.0
Animation,Sen to Chihiro no kamikakushi,2001,125,8.6,Hayao Miyazaki,Daveigh Chase,651376,10055859.0,96.0
Biography,Schindler's List,1993,195,8.9,Steven Spielberg,Liam Neeson,1213505,96898818.0,94.0
Comedy,Gisaengchung,2019,132,8.6,Bong Joon Ho,Kang-ho Song,552778,53367844.0,96.0
Crime,The Godfather,1972,175,9.2,Francis Ford Coppola,Marlon Brando,1620367,134966411.0,100.0
Drama,The Shawshank Redemption,1994,142,9.3,Frank Darabont,Tim Robbins,2343110,28341469.0,80.0
Family,E.T. the Extra-Terrestrial,1982,115,7.8,Steven Spielberg,Henry Thomas,372490,435110554.0,91.0
Fantasy,Das Cabinet des Dr. Caligari,1920,76,8.1,Robert Wiene,Werner Krauss,57428,337574718.0,
Film-Noir,The Third Man,1949,104,8.1,Carol Reed,Orson Welles,158731,449191.0,97.0


In [20]:
genres.first(numeric_only=True)

Unnamed: 0_level_0,Runtime,IMDB_Rating,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Action,152,9.0,2303232,534858444.0,84.0
Adventure,169,8.6,1512360,188020017.0,74.0
Animation,125,8.6,651376,10055859.0,96.0
Biography,195,8.9,1213505,96898818.0,94.0
Comedy,132,8.6,552778,53367844.0,96.0
Crime,175,9.2,1620367,134966411.0,100.0
Drama,142,9.3,2343110,28341469.0,80.0
Family,115,7.8,372490,435110554.0,91.0
Fantasy,76,8.1,57428,337574718.0,
Film-Noir,104,8.1,158731,449191.0,97.0


In [21]:
genres.first(min_count=5)

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Action,The Dark Knight,2008.0,152.0,9.0,Christopher Nolan,Christian Bale,2303232.0,534858444.0,84.0
Adventure,Interstellar,2014.0,169.0,8.6,Christopher Nolan,Matthew McConaughey,1512360.0,188020017.0,74.0
Animation,Sen to Chihiro no kamikakushi,2001.0,125.0,8.6,Hayao Miyazaki,Daveigh Chase,651376.0,10055859.0,96.0
Biography,Schindler's List,1993.0,195.0,8.9,Steven Spielberg,Liam Neeson,1213505.0,96898818.0,94.0
Comedy,Gisaengchung,2019.0,132.0,8.6,Bong Joon Ho,Kang-ho Song,552778.0,53367844.0,96.0
Crime,The Godfather,1972.0,175.0,9.2,Francis Ford Coppola,Marlon Brando,1620367.0,134966411.0,100.0
Drama,The Shawshank Redemption,1994.0,142.0,9.3,Frank Darabont,Tim Robbins,2343110.0,28341469.0,80.0
Family,,,,,,,,,
Fantasy,,,,,,,,,
Film-Noir,,,,,,,,,


In [22]:
genres.last()

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Action,Escape from Alcatraz,1979,112,7.6,Don Siegel,Clint Eastwood,121731,43000000.0,76.0
Adventure,Kelly's Heroes,1970,144,7.6,Brian G. Hutton,Clint Eastwood,45338,1378435.0,50.0
Animation,The Jungle Book,1967,78,7.6,Wolfgang Reitherman,Phil Harris,166409,141843612.0,65.0
Biography,Midnight Express,1978,121,7.6,Alan Parker,Brad Davis,73662,35000000.0,59.0
Comedy,Breakfast at Tiffany's,1961,115,7.6,Blake Edwards,Audrey Hepburn,166544,679874270.0,76.0
Crime,The 39 Steps,1935,86,7.6,Alfred Hitchcock,Robert Donat,51853,302787539.0,93.0
Drama,Lifeboat,1944,97,7.6,Alfred Hitchcock,Tallulah Bankhead,26471,852142728.0,78.0
Family,Willy Wonka & the Chocolate Factory,1971,100,7.8,Mel Stuart,Gene Wilder,178731,4000000.0,67.0
Fantasy,Nosferatu,1922,94,7.9,F.W. Murnau,Max Schreck,88794,445151978.0,
Film-Noir,Shadow of a Doubt,1943,108,7.8,Alfred Hitchcock,Teresa Wright,59556,123353292.0,94.0


In [23]:
genres.last(numeric_only=True)

Unnamed: 0_level_0,Runtime,IMDB_Rating,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Action,112,7.6,121731,43000000.0,76.0
Adventure,144,7.6,45338,1378435.0,50.0
Animation,78,7.6,166409,141843612.0,65.0
Biography,121,7.6,73662,35000000.0,59.0
Comedy,115,7.6,166544,679874270.0,76.0
Crime,86,7.6,51853,302787539.0,93.0
Drama,97,7.6,26471,852142728.0,78.0
Family,100,7.8,178731,4000000.0,67.0
Fantasy,94,7.9,88794,445151978.0,
Film-Noir,108,7.8,59556,123353292.0,94.0


In [24]:
# Take the nth row from each group if n is an int, otherwise a subset of rows.

genres.nth(6)

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Action,Star Wars: Episode V - The Empire Strikes Back,1980,124,8.7,Irvin Kershner,Mark Hamill,1159315,290475067.0,82.0
Adventure,North by Northwest,1959,136,8.3,Alfred Hitchcock,Cary Grant,299198,13275000.0,98.0
Animation,WALL·E,2008,98,8.4,Andrew Stanton,Ben Burtt,999790,223808164.0,95.0
Biography,Braveheart,1995,178,8.3,Mel Gibson,Mel Gibson,959181,75600000.0,68.0
Comedy,The Great Dictator,1940,125,8.4,Charles Chaplin,Charles Chaplin,203150,288475.0,
Crime,Se7en,1995,127,8.6,David Fincher,Morgan Freeman,1445096,100125643.0,65.0
Drama,It's a Wonderful Life,1946,130,8.6,Frank Capra,James Stewart,405801,82385199.0,89.0
Horror,Get Out,2017,104,7.7,Jordan Peele,Daniel Kaluuya,492851,176040665.0,85.0
Mystery,Sleuth,1972,138,8.0,Joseph L. Mankiewicz,Laurence Olivier,44748,4081254.0,


In [25]:
# Apply filtering

# This is one way

genres.get_group(name="Action")

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0
5,The Lord of the Rings: The Return of the King,2003,201,Action,8.9,Peter Jackson,Elijah Wood,1642758,377845905.0,94.0
8,Inception,2010,148,Action,8.8,Christopher Nolan,Leonardo DiCaprio,2067042,292576195.0,74.0
10,The Lord of the Rings: The Fellowship of the Ring,2001,178,Action,8.8,Peter Jackson,Elijah Wood,1661481,315544750.0,92.0
13,The Lord of the Rings: The Two Towers,2002,179,Action,8.7,Peter Jackson,Elijah Wood,1485555,342551365.0,87.0
...,...,...,...,...,...,...,...,...,...,...
968,Falling Down,1993,113,Action,7.6,Joel Schumacher,Michael Douglas,171640,40903593.0,56.0
979,Lethal Weapon,1987,109,Action,7.6,Richard Donner,Mel Gibson,236894,65207127.0,68.0
982,Mad Max 2,1981,96,Action,7.6,George Miller,Mel Gibson,166588,12465371.0,77.0
983,The Warriors,1979,92,Action,7.6,Walter Hill,Michael Beck,93878,22490039.0,65.0


In [26]:
# This is second way

movies[movies['Genre'] == "Action"]

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0
5,The Lord of the Rings: The Return of the King,2003,201,Action,8.9,Peter Jackson,Elijah Wood,1642758,377845905.0,94.0
8,Inception,2010,148,Action,8.8,Christopher Nolan,Leonardo DiCaprio,2067042,292576195.0,74.0
10,The Lord of the Rings: The Fellowship of the Ring,2001,178,Action,8.8,Peter Jackson,Elijah Wood,1661481,315544750.0,92.0
13,The Lord of the Rings: The Two Towers,2002,179,Action,8.7,Peter Jackson,Elijah Wood,1485555,342551365.0,87.0
...,...,...,...,...,...,...,...,...,...,...
968,Falling Down,1993,113,Action,7.6,Joel Schumacher,Michael Douglas,171640,40903593.0,56.0
979,Lethal Weapon,1987,109,Action,7.6,Richard Donner,Mel Gibson,236894,65207127.0,68.0
982,Mad Max 2,1981,96,Action,7.6,George Miller,Mel Gibson,166588,12465371.0,77.0
983,The Warriors,1979,92,Action,7.6,Walter Hill,Michael Beck,93878,22490039.0,65.0


In [27]:
# It return the every index number as a list
genres.groups

{'Action': [2, 5, 8, 10, 13, 14, 16, 29, 30, 31, 39, 42, 44, 55, 57, 59, 60, 63, 68, 72, 106, 109, 129, 130, 134, 140, 142, 144, 152, 155, 160, 161, 166, 168, 171, 172, 177, 181, 194, 201, 202, 216, 217, 223, 224, 236, 241, 262, 275, 294, 308, 320, 325, 326, 331, 337, 339, 340, 343, 345, 348, 351, 353, 356, 357, 362, 368, 369, 375, 376, 390, 410, 431, 436, 473, 477, 479, 482, 488, 493, 496, 502, 507, 511, 532, 535, 540, 543, 564, 569, 570, 573, 577, 582, 583, 602, 605, 608, 615, 623, ...], 'Adventure': [21, 47, 93, 110, 114, 116, 118, 137, 178, 179, 191, 193, 209, 226, 231, 247, 267, 273, 281, 300, 301, 304, 306, 323, 329, 361, 366, 377, 402, 406, 415, 426, 458, 470, 497, 498, 506, 513, 514, 537, 549, 552, 553, 566, 576, 604, 609, 618, 638, 647, 675, 681, 686, 692, 711, 713, 739, 755, 781, 797, 798, 851, 873, 884, 912, 919, 947, 957, 964, 966, 984, 991], 'Animation': [23, 43, 46, 56, 58, 61, 66, 70, 101, 135, 146, 151, 158, 170, 197, 205, 211, 213, 219, 229, 230, 242, 245, 246, 270, 33

In [28]:
genres.describe()

Unnamed: 0_level_0,Runtime,Runtime,Runtime,Runtime,Runtime,Runtime,Runtime,Runtime,IMDB_Rating,IMDB_Rating,...,Gross,Gross,Metascore,Metascore,Metascore,Metascore,Metascore,Metascore,Metascore,Metascore
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Genre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Action,172.0,129.046512,28.500706,45.0,110.75,127.5,143.25,321.0,172.0,7.949419,...,267443700.0,936662225.0,143.0,73.41958,12.421252,33.0,65.0,74.0,82.0,98.0
Adventure,72.0,134.111111,33.31732,88.0,109.0,127.0,149.0,228.0,72.0,7.9375,...,199807000.0,874211619.0,64.0,78.4375,12.345393,41.0,69.75,80.5,87.25,100.0
Animation,82.0,99.585366,14.530471,71.0,90.0,99.5,106.75,137.0,82.0,7.930488,...,252061200.0,873839108.0,75.0,81.093333,8.813646,61.0,75.0,82.0,87.5,96.0
Biography,88.0,136.022727,25.514466,93.0,120.0,129.0,146.25,209.0,88.0,7.938636,...,98299240.0,753585104.0,79.0,76.240506,11.028187,48.0,70.5,76.0,84.5,97.0
Comedy,155.0,112.129032,22.946213,68.0,96.0,106.0,124.5,188.0,155.0,7.90129,...,81078090.0,886752933.0,125.0,78.72,11.82916,45.0,72.0,79.0,88.0,99.0
Crime,107.0,126.392523,27.689231,80.0,106.5,122.0,141.5,229.0,107.0,8.016822,...,71021630.0,790482117.0,87.0,77.08046,13.099102,47.0,69.5,77.0,87.0,100.0
Drama,289.0,124.737024,27.74049,64.0,105.0,121.0,137.0,242.0,289.0,7.957439,...,116446100.0,924558264.0,241.0,79.701245,12.744687,28.0,72.0,82.0,89.0,100.0
Family,2.0,107.5,10.606602,100.0,103.75,107.5,111.25,115.0,2.0,7.8,...,327332900.0,435110554.0,2.0,79.0,16.970563,67.0,73.0,79.0,85.0,91.0
Fantasy,2.0,85.0,12.727922,76.0,80.5,85.0,89.5,94.0,2.0,8.0,...,418257700.0,445151978.0,0.0,,,,,,,
Film-Noir,3.0,104.0,4.0,100.0,102.0,104.0,106.0,108.0,3.0,7.966667,...,62730680.0,123353292.0,3.0,95.666667,1.527525,94.0,95.0,96.0,96.5,97.0


In [29]:
# Return DataFrame with counts of unique elements in each position.

genres.nunique()

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Action,172,61,78,15,123,121,172,172,50
Adventure,72,49,58,10,59,59,72,72,33
Animation,82,35,41,11,51,77,82,82,29
Biography,88,44,56,13,76,72,88,88,40
Comedy,155,72,70,11,113,133,155,155,44
Crime,106,56,65,14,86,85,107,107,39
Drama,289,83,95,14,211,250,288,287,52
Family,2,2,2,1,2,2,2,2,2
Fantasy,2,2,2,2,2,2,2,2,0
Film-Noir,3,3,3,3,3,3,3,3,3


### agg method

In [30]:
# passing dict

# when we want to apply different method on differet columns

genres.agg(
    {
        'Runtime':'mean',
        'IMDB_Rating':'mean',
        'No_of_Votes':'sum',
        'Gross':'sum',
        'Metascore':'min'
    }
)

Unnamed: 0_level_0,Runtime,IMDB_Rating,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Action,129.046512,7.949419,72282412,32632260000.0,33.0
Adventure,134.111111,7.9375,22576163,9496922000.0,41.0
Animation,99.585366,7.930488,21978630,14631470000.0,61.0
Biography,136.022727,7.938636,24006844,8276358000.0,48.0
Comedy,112.129032,7.90129,27620327,15663870000.0,45.0
Crime,126.392523,8.016822,33533615,8452632000.0,47.0
Drama,124.737024,7.957439,61367304,35409970000.0,28.0
Family,107.5,7.8,551221,439110600.0,67.0
Fantasy,85.0,8.0,146222,782726700.0,
Film-Noir,104.0,7.966667,367215,125910500.0,94.0


In [31]:
# passing list

# If we pass list then it will apply on all the column individually 

genres.agg(['min','max','mean','sum'])

  genres.agg(['min','max','mean','sum'])


Unnamed: 0_level_0,Runtime,Runtime,Runtime,Runtime,IMDB_Rating,IMDB_Rating,IMDB_Rating,IMDB_Rating,No_of_Votes,No_of_Votes,No_of_Votes,No_of_Votes,Gross,Gross,Gross,Gross,Metascore,Metascore,Metascore,Metascore
Unnamed: 0_level_1,min,max,mean,sum,min,max,mean,sum,min,max,mean,sum,min,max,mean,sum,min,max,mean,sum
Genre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
Action,45,321,129.046512,22196,7.6,9.0,7.949419,1367.3,25312,2303232,420246.581395,72282412,3296.0,936662225.0,189722400.0,32632260000.0,33.0,98.0,73.41958,10499.0
Adventure,88,228,134.111111,9656,7.6,8.6,7.9375,571.5,29999,1512360,313557.819444,22576163,61001.0,874211619.0,131901700.0,9496922000.0,41.0,100.0,78.4375,5020.0
Animation,71,137,99.585366,8166,7.6,8.6,7.930488,650.3,25229,999790,268032.073171,21978630,128985.0,873839108.0,178432600.0,14631470000.0,61.0,96.0,81.093333,6082.0
Biography,93,209,136.022727,11970,7.6,8.9,7.938636,698.6,27254,1213505,272805.045455,24006844,21877.0,753585104.0,94049520.0,8276358000.0,48.0,97.0,76.240506,6023.0
Comedy,68,188,112.129032,17380,7.6,8.6,7.90129,1224.7,26337,939631,178195.658065,27620327,1305.0,886752933.0,101057200.0,15663870000.0,45.0,99.0,78.72,9840.0
Crime,80,229,126.392523,13524,7.6,9.2,8.016822,857.8,27712,1826188,313398.271028,33533615,6013.0,790482117.0,78996560.0,8452632000.0,47.0,100.0,77.08046,6706.0
Drama,64,242,124.737024,36049,7.6,9.3,7.957439,2299.7,25088,2343110,212343.612457,61367304,3600.0,924558264.0,122525900.0,35409970000.0,28.0,100.0,79.701245,19208.0
Family,100,115,107.5,215,7.8,7.8,7.8,15.6,178731,372490,275610.5,551221,4000000.0,435110554.0,219555300.0,439110600.0,67.0,91.0,79.0,158.0
Fantasy,76,94,85.0,170,7.9,8.1,8.0,16.0,57428,88794,73111.0,146222,337574718.0,445151978.0,391363300.0,782726700.0,,,,0.0
Film-Noir,100,108,104.0,312,7.8,8.1,7.966667,23.9,59556,158731,122405.0,367215,449191.0,123353292.0,41970180.0,125910500.0,94.0,97.0,95.666667,287.0


In [32]:
# Adding both the syntax

genres.agg(
    {
        'Runtime':['min','mean'],
        'IMDB_Rating':'mean',
        'No_of_Votes':['sum','max'],
        'Gross':'sum',
        'Metascore':'min'
    }
)

Unnamed: 0_level_0,Runtime,Runtime,IMDB_Rating,No_of_Votes,No_of_Votes,Gross,Metascore
Unnamed: 0_level_1,min,mean,mean,sum,max,sum,min
Genre,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Action,45,129.046512,7.949419,72282412,2303232,32632260000.0,33.0
Adventure,88,134.111111,7.9375,22576163,1512360,9496922000.0,41.0
Animation,71,99.585366,7.930488,21978630,999790,14631470000.0,61.0
Biography,93,136.022727,7.938636,24006844,1213505,8276358000.0,48.0
Comedy,68,112.129032,7.90129,27620327,939631,15663870000.0,45.0
Crime,80,126.392523,8.016822,33533615,1826188,8452632000.0,47.0
Drama,64,124.737024,7.957439,61367304,2343110,35409970000.0,28.0
Family,100,107.5,7.8,551221,372490,439110600.0,67.0
Fantasy,76,85.0,8.0,146222,88794,782726700.0,
Film-Noir,100,104.0,7.966667,367215,158731,125910500.0,94.0


In [33]:
# looping on groups

df = pd.DataFrame(columns=movies.columns)

df

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore


In [34]:
for group, data in genres:
    print(type(data[data['IMDB_Rating'] == data['IMDB_Rating'].max()]))


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [35]:
for group, data in genres:
    print(data[data['IMDB_Rating'] == data['IMDB_Rating'].max()])

      Series_Title Released_Year  Runtime   Genre  IMDB_Rating  \
2  The Dark Knight          2008      152  Action          9.0   

            Director           Star1  No_of_Votes        Gross  Metascore  
2  Christopher Nolan  Christian Bale      2303232  534858444.0       84.0  
    Series_Title Released_Year  Runtime      Genre  IMDB_Rating  \
21  Interstellar          2014      169  Adventure          8.6   

             Director                Star1  No_of_Votes        Gross  \
21  Christopher Nolan  Matthew McConaughey      1512360  188020017.0   

    Metascore  
21       74.0  
                     Series_Title Released_Year  Runtime      Genre  \
23  Sen to Chihiro no kamikakushi          2001      125  Animation   

    IMDB_Rating        Director          Star1  No_of_Votes       Gross  \
23          8.6  Hayao Miyazaki  Daveigh Chase       651376  10055859.0   

    Metascore  
23       96.0  
       Series_Title Released_Year  Runtime      Genre  IMDB_Rating  \
7  Schi

In [36]:
# Find out the maximum rating movie for each genre 


for group, data in genres:
    temp_df = data[data['IMDB_Rating'] == data['IMDB_Rating'].max()]
    pd.concat(objs=[df, temp_df])

In [37]:
df

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore


In [38]:
# use builin function apply()

genres.apply(func=min)

Unnamed: 0_level_0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Action,300,1924,45,Action,7.6,Abhishek Chaubey,Aamir Khan,25312,3296.0,
Adventure,2001: A Space Odyssey,1925,88,Adventure,7.6,Akira Kurosawa,Aamir Khan,29999,61001.0,
Animation,Akira,1940,71,Animation,7.6,Adam Elliot,Adrian Molina,25229,128985.0,
Biography,12 Years a Slave,1928,93,Biography,7.6,Adam McKay,Adrien Brody,27254,21877.0,
Comedy,(500) Days of Summer,1921,68,Comedy,7.6,Alejandro G. Iñárritu,Aamir Khan,26337,1305.0,
Crime,12 Angry Men,1931,80,Crime,7.6,Akira Kurosawa,Ajay Devgn,27712,6013.0,
Drama,1917,1925,64,Drama,7.6,Aamir Khan,Abhay Deol,25088,3600.0,
Family,E.T. the Extra-Terrestrial,1971,100,Family,7.8,Mel Stuart,Gene Wilder,178731,4000000.0,67.0
Fantasy,Das Cabinet des Dr. Caligari,1920,76,Fantasy,7.9,F.W. Murnau,Max Schreck,57428,337574718.0,
Film-Noir,Shadow of a Doubt,1941,100,Film-Noir,7.8,Alfred Hitchcock,Humphrey Bogart,59556,449191.0,94.0


### find number of movies starting with A for each group

In [39]:
def myfunc(group):
    return group['Series_Title'].str.startswith('A').sum()

In [40]:
genres.apply(func=myfunc).reset_index()

Unnamed: 0,Genre,0
0,Action,10
1,Adventure,2
2,Animation,2
3,Biography,9
4,Comedy,14
5,Crime,4
6,Drama,21
7,Family,0
8,Fantasy,0
9,Film-Noir,0


In [41]:
genres2 = movies.groupby(by="Genre", group_keys=True)

print(type(genres2))

<class 'pandas.core.groupby.generic.DataFrameGroupBy'>


In [42]:
# find ranking of each movie in the group according to IMDB score

# Provide the rank of values within each group.

def movie_rank(group):
    group['genre_rank'] = group['IMDB_Rating'].rank(ascending=False)
    return group

In [43]:
genres.apply(func=movie_rank)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  genres.apply(func=movie_rank)


Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore,genre_rank
0,The Shawshank Redemption,1994,142,Drama,9.3,Frank Darabont,Tim Robbins,2343110,28341469.0,80.0,1.0
1,The Godfather,1972,175,Crime,9.2,Francis Ford Coppola,Marlon Brando,1620367,134966411.0,100.0,1.0
2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0,1.0
3,The Godfather: Part II,1974,202,Crime,9.0,Francis Ford Coppola,Al Pacino,1129952,57300000.0,90.0,2.5
4,12 Angry Men,1957,96,Crime,9.0,Sidney Lumet,Henry Fonda,689845,4360000.0,96.0,2.5
...,...,...,...,...,...,...,...,...,...,...,...
995,Breakfast at Tiffany's,1961,115,Comedy,7.6,Blake Edwards,Audrey Hepburn,166544,679874270.0,76.0,147.0
996,Giant,1956,201,Drama,7.6,George Stevens,Elizabeth Taylor,34075,195217415.0,84.0,272.5
997,From Here to Eternity,1953,118,Drama,7.6,Fred Zinnemann,Burt Lancaster,43374,30500000.0,85.0,272.5
998,Lifeboat,1944,97,Drama,7.6,Alfred Hitchcock,Tallulah Bankhead,26471,852142728.0,78.0,272.5


In [44]:
genres2.apply(func=movie_rank)

Unnamed: 0_level_0,Unnamed: 1_level_0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore,genre_rank
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Action,2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0,1.0
Action,5,The Lord of the Rings: The Return of the King,2003,201,Action,8.9,Peter Jackson,Elijah Wood,1642758,377845905.0,94.0,2.0
Action,8,Inception,2010,148,Action,8.8,Christopher Nolan,Leonardo DiCaprio,2067042,292576195.0,74.0,3.5
Action,10,The Lord of the Rings: The Fellowship of the Ring,2001,178,Action,8.8,Peter Jackson,Elijah Wood,1661481,315544750.0,92.0,3.5
Action,13,The Lord of the Rings: The Two Towers,2002,179,Action,8.7,Peter Jackson,Elijah Wood,1485555,342551365.0,87.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...,...
Thriller,700,Wait Until Dark,1967,108,Thriller,7.8,Terence Young,Audrey Hepburn,27733,17550741.0,81.0,1.0
Western,12,"Il buono, il brutto, il cattivo",1966,161,Western,8.8,Sergio Leone,Clint Eastwood,688390,6100000.0,90.0,1.0
Western,48,Once Upon a Time in the West,1968,165,Western,8.5,Sergio Leone,Henry Fonda,302844,5321508.0,80.0,2.0
Western,115,Per qualche dollaro in più,1965,132,Western,8.3,Sergio Leone,Clint Eastwood,232772,15000000.0,74.0,3.0


In [45]:
# find normalized IMDB rating group wise

def normalized_rating(group):
    group['norm_rating'] = (group['IMDB_Rating'] - group['IMDB_Rating'].min())/(group['IMDB_Rating'].max() - group['IMDB_Rating'].min())
    return group

genres2.apply(func=normalized_rating)

Unnamed: 0_level_0,Unnamed: 1_level_0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore,norm_rating
Genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Action,2,The Dark Knight,2008,152,Action,9.0,Christopher Nolan,Christian Bale,2303232,534858444.0,84.0,1.000000
Action,5,The Lord of the Rings: The Return of the King,2003,201,Action,8.9,Peter Jackson,Elijah Wood,1642758,377845905.0,94.0,0.928571
Action,8,Inception,2010,148,Action,8.8,Christopher Nolan,Leonardo DiCaprio,2067042,292576195.0,74.0,0.857143
Action,10,The Lord of the Rings: The Fellowship of the Ring,2001,178,Action,8.8,Peter Jackson,Elijah Wood,1661481,315544750.0,92.0,0.857143
Action,13,The Lord of the Rings: The Two Towers,2002,179,Action,8.7,Peter Jackson,Elijah Wood,1485555,342551365.0,87.0,0.785714
...,...,...,...,...,...,...,...,...,...,...,...,...
Thriller,700,Wait Until Dark,1967,108,Thriller,7.8,Terence Young,Audrey Hepburn,27733,17550741.0,81.0,
Western,12,"Il buono, il brutto, il cattivo",1966,161,Western,8.8,Sergio Leone,Clint Eastwood,688390,6100000.0,90.0,1.000000
Western,48,Once Upon a Time in the West,1968,165,Western,8.5,Sergio Leone,Henry Fonda,302844,5321508.0,80.0,0.700000
Western,115,Per qualche dollaro in più,1965,132,Western,8.3,Sergio Leone,Clint Eastwood,232772,15000000.0,74.0,0.500000


In [46]:
# groupby on multiple cols

duo = movies.groupby(by=['Director','Star1'])
duo

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000020F649F2DA0>

In [47]:
# size
duo.size()

Director             Star1         
Aamir Khan           Amole Gupte       1
Aaron Sorkin         Eddie Redmayne    1
Abdellatif Kechiche  Léa Seydoux       1
Abhishek Chaubey     Shahid Kapoor     1
Abhishek Kapoor      Amit Sadh         1
                                      ..
Zaza Urushadze       Lembit Ulfsak     1
Zoya Akhtar          Hrithik Roshan    1
                     Vijay Varma       1
Çagan Irmak          Çetin Tekindor    1
Ömer Faruk Sorak     Cem Yilmaz        1
Length: 898, dtype: int64

In [48]:
# get_group
duo.get_group(('Aamir Khan', 'Amole Gupte'))

Unnamed: 0,Series_Title,Released_Year,Runtime,Genre,IMDB_Rating,Director,Star1,No_of_Votes,Gross,Metascore
65,Taare Zameen Par,2007,165,Drama,8.4,Aamir Khan,Amole Gupte,168895,1223869.0,


In [49]:
# find the most earning actor -> director combo

duo["Gross"].sum().sort_values(ascending=False).reset_index().head(1)

Unnamed: 0,Director,Star1,Gross
0,Akira Kurosawa,Toshirô Mifune,2999877000.0


In [50]:
# agg on multiple groupby

duo.agg(['min','max','mean'])

  duo.agg(['min','max','mean'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Runtime,Runtime,Runtime,IMDB_Rating,IMDB_Rating,IMDB_Rating,No_of_Votes,No_of_Votes,No_of_Votes,Gross,Gross,Gross,Metascore,Metascore,Metascore
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean,min,max,mean,min,max,mean,min,max,mean,min,max,mean
Director,Star1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
Aamir Khan,Amole Gupte,165,165,165.0,8.4,8.4,8.4,168895,168895,168895.0,1223869.0,1223869.0,1223869.0,,,
Aaron Sorkin,Eddie Redmayne,129,129,129.0,7.8,7.8,7.8,89896,89896,89896.0,853090410.0,853090410.0,853090410.0,77.0,77.0,77.0
Abdellatif Kechiche,Léa Seydoux,180,180,180.0,7.7,7.7,7.7,138741,138741,138741.0,2199675.0,2199675.0,2199675.0,89.0,89.0,89.0
Abhishek Chaubey,Shahid Kapoor,148,148,148.0,7.8,7.8,7.8,27175,27175,27175.0,218428303.0,218428303.0,218428303.0,,,
Abhishek Kapoor,Amit Sadh,130,130,130.0,7.7,7.7,7.7,32628,32628,32628.0,1122527.0,1122527.0,1122527.0,40.0,40.0,40.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zaza Urushadze,Lembit Ulfsak,87,87,87.0,8.2,8.2,8.2,40382,40382,40382.0,144501.0,144501.0,144501.0,73.0,73.0,73.0
Zoya Akhtar,Hrithik Roshan,155,155,155.0,8.1,8.1,8.1,67927,67927,67927.0,3108485.0,3108485.0,3108485.0,,,
Zoya Akhtar,Vijay Varma,154,154,154.0,8.0,8.0,8.0,31886,31886,31886.0,5566534.0,5566534.0,5566534.0,65.0,65.0,65.0
Çagan Irmak,Çetin Tekindor,112,112,112.0,8.3,8.3,8.3,78925,78925,78925.0,461855363.0,461855363.0,461855363.0,,,


### find the best(in-terms of metascore(avg)) actor -> genre combo

In [51]:
movies.groupby(by=['Star1', 'Genre'])["Metascore"].mean().reset_index().sort_values(by="Metascore", ascending=False).head(1)

Unnamed: 0,Star1,Genre,Metascore
230,Ellar Coltrane,Drama,100.0


### Excercise

In [52]:
ipl = pd.read_csv("Datasets/deliveries.csv")

ipl.head()

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
0,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,1,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
1,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,2,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
2,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,3,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,4,0,4,,,
3,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,4,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,0,0,,,
4,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,1,5,DA Warner,S Dhawan,TS Mills,0,...,0,0,0,0,0,2,2,,,


In [53]:
ipl.shape

(179078, 21)

In [54]:
ipl.columns

Index(['match_id', 'inning', 'batting_team', 'bowling_team', 'over', 'ball',
       'batsman', 'non_striker', 'bowler', 'is_super_over', 'wide_runs',
       'bye_runs', 'legbye_runs', 'noball_runs', 'penalty_runs',
       'batsman_runs', 'extra_runs', 'total_runs', 'player_dismissed',
       'dismissal_kind', 'fielder'],
      dtype='object')

### find the top 10 batsman in terms of runs

In [55]:
ipl.groupby(by="batsman")["batsman_runs"].sum().sort_values(ascending=False).reset_index().head(10)

Unnamed: 0,batsman,batsman_runs
0,V Kohli,5434
1,SK Raina,5415
2,RG Sharma,4914
3,DA Warner,4741
4,S Dhawan,4632
5,CH Gayle,4560
6,MS Dhoni,4477
7,RV Uthappa,4446
8,AB de Villiers,4428
9,G Gambhir,4223


### find the batsman with max number of sixes

In [56]:
all_sixes = ipl[ipl["batsman_runs"] == 6]

all_sixes.head(3)

Unnamed: 0,match_id,inning,batting_team,bowling_team,over,ball,batsman,non_striker,bowler,is_super_over,...,bye_runs,legbye_runs,noball_runs,penalty_runs,batsman_runs,extra_runs,total_runs,player_dismissed,dismissal_kind,fielder
10,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,2,4,DA Warner,S Dhawan,A Choudhary,0,...,0,0,0,0,6,0,6,,,
47,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,8,4,MC Henriques,S Dhawan,TM Head,0,...,0,0,0,0,6,0,6,,,
75,1,1,Sunrisers Hyderabad,Royal Challengers Bangalore,13,2,Yuvraj Singh,MC Henriques,A Choudhary,0,...,0,0,0,0,6,0,6,,,


In [57]:
all_sixes.groupby(by="batsman")["batsman_runs"].count().sort_values(ascending=False).reset_index().head(1)

Unnamed: 0,batsman,batsman_runs
0,CH Gayle,327


### find batsman with most number of 4's and 6's in last 5 overs

In [58]:
# find out the last five over

last_five_overs = ipl[ipl["over"] > 15]

# find out the every 4's and 6's in last 5 overs

last_five_overs = last_five_overs[ (last_five_overs['batsman_runs'] == 4) | (last_five_overs['batsman_runs'] == 6) ]

last_five_overs.groupby(by="batsman")["batsman"].count().sort_values(ascending=False).head(1)

batsman
MS Dhoni    340
Name: batsman, dtype: int64

### find V Kohli's record against all teams

In [59]:
record_kohli = ipl[ipl["batsman"] == "V Kohli"]

record_kohli.groupby(by="bowling_team")["batsman_runs"].sum().reset_index()

Unnamed: 0,bowling_team,batsman_runs
0,Chennai Super Kings,749
1,Deccan Chargers,306
2,Delhi Capitals,66
3,Delhi Daredevils,763
4,Gujarat Lions,283
5,Kings XI Punjab,636
6,Kochi Tuskers Kerala,50
7,Kolkata Knight Riders,675
8,Mumbai Indians,628
9,Pune Warriors,128


### Create a function that can return the highest score of any batsman

In [60]:
def highest_score(batsman):
    temp_df = ipl[ipl['batsman'] == batsman]
    return temp_df.groupby('match_id')['batsman_runs'].sum().sort_values(ascending=False).head(1).values[0]

In [61]:
highest_score("DA Warner")

126