In [17]:
# Import Dependencies
import pandas as pd

In [18]:
# Create a path to the csv and read it into a Pandas DataFrame
csv_path = "../Resources/ted_talks.csv"
ted_df = pd.read_csv(csv_path)
ted_df.head()

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869


In [19]:
# Figure out the minimum and maximum views for a TED Talk
ted_df.max()['views']

47227110

In [20]:
ted_df.min()['views']

50443

In [21]:
ted_df.describe()

Unnamed: 0,comments,duration,languages,views
count,2550.0,2550.0,2550.0,2550.0
mean,191.562353,826.510196,27.326275,1698297.0
std,282.315223,374.009138,9.563452,2498479.0
min,2.0,135.0,0.0,50443.0
25%,63.0,577.0,23.0,755792.8
50%,118.0,848.0,28.0,1124524.0
75%,221.75,1046.75,33.0,1700760.0
max,6404.0,5256.0,72.0,47227110.0


In [22]:
# Create bins in which to place values based upon TED Talk views
bins = [0, 199999, 399999, 599999, 799999, 999999, 1999999, 2999999, 3999999, 4999999,50000000]

# Create labels for these bins
group_labels = ["0 to 199k", "200k to 399k", "400k to 599k", "600k to 799k", "800k to 999k", "1mil to 2mil", "2mil to 3mil", "3mil to 4mil", "4mil to 5mil", "5mil to 50mil"]

In [23]:
# Slice the data and place it into bins
pd.cut(ted_df["views"], bins, labels = group_labels).head()

0    5mil to 50mil
1     3mil to 4mil
2     1mil to 2mil
3     1mil to 2mil
4    5mil to 50mil
Name: views, dtype: category
Categories (10, object): ['0 to 199k' < '200k to 399k' < '400k to 599k' < '600k to 799k' ... '2mil to 3mil' < '3mil to 4mil' < '4mil to 5mil' < '5mil to 50mil']

In [24]:
# Place the data series into a new column inside of the DataFrame
ted_df['View Group'] = pd.cut(ted_df["views"], bins, labels = group_labels)
ted_df

Unnamed: 0,comments,description,duration,event,languages,main_speaker,name,title,views,View Group
0,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110,5mil to 50mil
1,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520,3mil to 4mil
2,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292,1mil to 2mil
3,200,"In an emotionally charged talk, MacArthur-winn...",1116,TED2006,35,Majora Carter,Majora Carter: Greening the ghetto,Greening the ghetto,1697550,1mil to 2mil
4,593,You've never seen data presented like this. Wi...,1190,TED2006,48,Hans Rosling,Hans Rosling: The best stats you've ever seen,The best stats you've ever seen,12005869,5mil to 50mil
...,...,...,...,...,...,...,...,...,...,...
2545,17,"Between 2008 and 2016, the United States depor...",476,TED2017,4,Duarte Geraldino,Duarte Geraldino: What we're missing in the de...,What we're missing in the debate about immigra...,450430,400k to 599k
2546,6,How can you study Mars without a spaceship? He...,290,TED2017,3,Armando Azua-Bustos,Armando Azua-Bustos: The most Martian place on...,The most Martian place on Earth,417470,400k to 599k
2547,10,Science fiction visions of the future show us ...,651,TED2017,1,Radhika Nagpal,Radhika Nagpal: What intelligent machines can ...,What intelligent machines can learn from a sch...,375647,200k to 399k
2548,32,In an unmissable talk about race and politics ...,1100,TEDxMileHigh,1,Theo E.J. Wilson,Theo E.J. Wilson: A black man goes undercover ...,A black man goes undercover in the alt-right,419309,400k to 599k


In [33]:
# Create a GroupBy object based upon "View Group"
view_group_df = ted_df.groupby(['View Group'])
view_group_df.first()

Unnamed: 0_level_0,comments,description,duration,event,languages,main_speaker,name,title,views
View Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0 to 199k,43,"Singer-songwriter Rokia Traore performs ""Kouna...",386,TEDGlobal 2007,0,Rokia Traore,"Rokia Traore: ""Kounandi""","""Kounandi""",82488
200k to 399k,55,Jehane Noujaim unveils her 2006 TED Prize wish...,1538,TED2006,20,Jehane Noujaim,Jehane Noujaim: My wish: A global day of film,My wish: A global day of film,387877
400k to 599k,50,"The founding mother of the blog revolution, Mo...",1006,TED2006,20,Mena Trott,Mena Trott: Meet the founder of the blog revol...,Meet the founder of the blog revolution,518624
600k to 799k,71,"Accepting the 2006 TED Prize, Dr. Larry Brilli...",1550,TED2006,24,Larry Brilliant,Larry Brilliant: My wish: Help me stop pandemics,My wish: Help me stop pandemics,693341
800k to 999k,46,Architect Joshua Prince-Ramus takes the audien...,1198,TED2006,19,Joshua Prince-Ramus,Joshua Prince-Ramus: Behind the design of Seat...,Behind the design of Seattle's library,967741
1mil to 2mil,124,New York Times columnist David Pogue takes aim...,1286,TED2006,26,David Pogue,David Pogue: Simplicity sells,Simplicity sells,1636292
2mil to 3mil,852,Philosopher Dan Dennett calls for religion -- ...,1485,TED2006,32,Dan Dennett,Dan Dennett: Let's teach religion -- all relig...,Let's teach religion -- all religion -- in sch...,2567958
3mil to 4mil,265,With the same humor and humanity he exuded in ...,977,TED2006,43,Al Gore,Al Gore: Averting the climate crisis,Averting the climate crisis,3200520
4mil to 5mil,242,"Jeff Han shows off a cheap, scalable multi-tou...",527,TED2006,27,Jeff Han,Jeff Han: The radical promise of the multi-tou...,The radical promise of the multi-touch interface,4531020
5mil to 50mil,4553,Sir Ken Robinson makes an entertaining and pro...,1164,TED2006,60,Ken Robinson,Ken Robinson: Do schools kill creativity?,Do schools kill creativity?,47227110


In [26]:
# Find how many rows fall into each bin
view_group_count = ted_df.groupby(['View Group']).count()["name"]
view_group_count

View Group
0 to 199k          32
200k to 399k      135
400k to 599k      234
600k to 799k      307
800k to 999k      339
1mil to 2mil     1004
2mil to 3mil      239
3mil to 4mil       93
4mil to 5mil       68
5mil to 50mil      99
Name: name, dtype: int64

In [27]:
# Get the average of each column within the GroupBy object
average_comments = ted_df.groupby(['View Group']).mean()['comments']
average_duration = ted_df.groupby(['View Group']).mean()['duration']
average_languages = ted_df.groupby(['View Group']).mean()['languages']
average_views = ted_df.groupby(['View Group']).mean()['views']

In [28]:
average_comments

View Group
0 to 199k         76.937500
200k to 399k      81.992593
400k to 599k     107.162393
600k to 799k     118.912052
800k to 999k     119.628319
1mil to 2mil     168.136454
2mil to 3mil     299.481172
3mil to 4mil     360.870968
4mil to 5mil     507.088235
5mil to 50mil    650.393939
Name: comments, dtype: float64

In [29]:
average_duration

View Group
0 to 199k        898.187500
200k to 399k     832.192593
400k to 599k     870.517094
600k to 799k     829.039088
800k to 999k     798.772861
1mil to 2mil     809.899402
2mil to 3mil     832.430962
3mil to 4mil     809.505376
4mil to 5mil     920.514706
5mil to 50mil    884.282828
Name: duration, dtype: float64

In [30]:
average_languages

View Group
0 to 199k         4.062500
200k to 399k     18.785185
400k to 599k     22.940171
600k to 799k     24.400651
800k to 999k     25.678466
1mil to 2mil     27.899402
2mil to 3mil     32.807531
3mil to 4mil     34.258065
4mil to 5mil     35.720588
5mil to 50mil    40.252525
Name: languages, dtype: float64

In [31]:
average_views

View Group
0 to 199k        1.498010e+05
200k to 399k     3.221191e+05
400k to 599k     5.038403e+05
600k to 799k     7.058367e+05
800k to 999k     9.022959e+05
1mil to 2mil     1.368263e+06
2mil to 3mil     2.390719e+06
3mil to 4mil     3.440350e+06
4mil to 5mil     4.468781e+06
5mil to 50mil    1.083801e+07
Name: views, dtype: float64