# Dataframes

'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'

In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [45]:
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [47]:
#Copy and read to get data
nfl_frame = pd.read_clipboard()

In [48]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East


In [49]:
# We can grab the oclumn names with .columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [50]:
#Lets see some specific data columns
DataFrame(nfl_frame,columns=['Team','First Season','Total Games'])

Unnamed: 0,Team,First Season,Total Games
0,Dallas Cowboys,,882
1,Green Bay Packers,,1336
2,Chicago Bears,,1370
3,Miami Dolphins,,800
4,New England Patriots[b],,884


In [51]:
#What happens if we ask for a column that doesn't exist?
DataFrame(nfl_frame,columns=['Team','First Season','Total Games','Stadium','aosdnibdwibuabdiauwbd'])

Unnamed: 0,Team,First Season,Total Games,Stadium,aosdnibdwibuabdiauwbd
0,Dallas Cowboys,,882,,
1,Green Bay Packers,,1336,,
2,Chicago Bears,,1370,,
3,Miami Dolphins,,800,,
4,New England Patriots[b],,884,,


In [52]:
# Call columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [53]:
#We can retrieve individual columns
nfl_frame.Team

0             Dallas Cowboys
1          Green Bay Packers
2              Chicago Bears
3             Miami Dolphins
4    New England Patriots[b]
Name: Team, dtype: object

In [54]:
nfl_frame.Team

0             Dallas Cowboys
1          Green Bay Packers
2              Chicago Bears
3             Miami Dolphins
4    New England Patriots[b]
Name: Team, dtype: object

In [55]:
#We can retrieve rows through indexing
nfl_frame.ix[3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


Rank                             4
Team                Miami Dolphins
Won                            445
Lost                           351
Tied                             4
Pct.                         0.559
First NFL Season              1966
Total Games                    800
Division                  AFC East
Name: 3, dtype: object

In [56]:
#We can also assign value sto entire columns
nfl_frame['Stadium']="Levi's Stadium" #Careful with the ' here
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East,Levi's Stadium
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North,Levi's Stadium
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North,Levi's Stadium
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East,Levi's Stadium
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East,Levi's Stadium


In [57]:
nfl_frame['aosdnibdwibuabdiauwbd'] = 'what'
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium,aosdnibdwibuabdiauwbd
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East,Levi's Stadium,what
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North,Levi's Stadium,what
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North,Levi's Stadium,what
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East,Levi's Stadium,what
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East,Levi's Stadium,what


In [58]:
#Putting numbers for stadiums
nfl_frame["Stadium"] = np.arange(5)
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium,aosdnibdwibuabdiauwbd
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East,0,what
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North,1,what
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North,2,what
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East,3,what
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East,4,what


In [59]:
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division', 'Stadium', 'aosdnibdwibuabdiauwbd'],
      dtype='object')

In [60]:
#Adding a Series to a DataFrame
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])
stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [61]:
nfl_frame['Stadium']=stadiums
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium,aosdnibdwibuabdiauwbd
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East,AT&T Stadium,what
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North,,what
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North,,what
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East,,what
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East,Levi's Stadium,what


In [62]:
#We can also delete columns
del nfl_frame['Stadium']
del nfl_frame['aosdnibdwibuabdiauwbd']
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,502,374,6,0.573,1960,882,NFC East
1,2,Green Bay Packers,737,562,37,0.565,1921,1336,NFC North
2,3,Chicago Bears,749,579,42,0.562,1920,1370,NFC North
3,4,Miami Dolphins,445,351,4,0.559,1966,800,AFC East
4,5,New England Patriots[b],489,386,9,0.558,1960,884,AFC East


In [63]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [64]:
#Now let's see sleection in a DataFrame
dframe = DataFrame(np.arange(25).reshape((5,5)),index=['NYC','LA','SF','DC','Chi'],columns=['A','B','C','D','E'])
#Show
dframe

Unnamed: 0,A,B,C,D,E
NYC,0,1,2,3,4
LA,5,6,7,8,9
SF,10,11,12,13,14
DC,15,16,17,18,19
Chi,20,21,22,23,24


In [67]:
dframe.B

NYC     1
LA      6
SF     11
DC     16
Chi    21
Name: B, dtype: int32

In [70]:
#Select by multiple columns
dframe[['B','E']]

Unnamed: 0,B,E
NYC,1,4
LA,6,9
SF,11,14
DC,16,19
Chi,21,24


In [74]:
#Can also use boolean
dframe[dframe['B']>8]

Unnamed: 0,A,B,C,D,E
SF,10,11,12,13,14
DC,15,16,17,18,19
Chi,20,21,22,23,24


In [75]:
#Can also just shoe a boolean DataFrame
dframe> 10

Unnamed: 0,A,B,C,D,E
NYC,False,False,False,False,False
LA,False,False,False,False,False
SF,False,True,True,True,True
DC,True,True,True,True,True
Chi,True,True,True,True,True


In [78]:
#Can alos use ix as previously discussed to label-index
dframe.ix['LA'] # deprecation warning:  http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated

A    5
B    6
C    7
D    8
E    9
Name: LA, dtype: int32

# Dataframe Math