## Dataframes

In [36]:
from pandas import Series,DataFrame
import pandas as pd
import numpy as np

In [37]:
#Select and Copy some rows from opened webpage
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [39]:
#Create dataframe from data on clipboard 
nfl_df = pd.read_clipboard()
nfl_df

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,898,512,380,6,0.573,1960,NFC East
1,2,Chicago Bears,1386,761,583,42,0.564,1920,NFC North
2,3,Green Bay Packers,1352,743,571,38,0.564,1921,NFC North
3,4,New England Patriots[b],900,500,391,9,0.561,1960,AFC East
4,5,Miami Dolphins,816,452,360,4,0.556,1966,AFC East
5,6,Minnesota Vikings,886,478,397,11,0.546,1961,NFC North
6,7,Baltimore Ravens,368,200,167,1,0.545,1996,AFC North
7,8,New York Giants,1321,692,596,33,0.536,1925,NFC East


In [40]:
#View column headings in dataframe 
nfl_df.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Division'],
      dtype='object')

In [41]:
#Access columns by name
print("Data for column Team:\n",nfl_df.Team,"\n")

#If column name has space use bracket call
print("Data for column 'First NFL Season':\n",nfl_df['First NFL Season'])

Data for column Team:
 0             Dallas Cowboys
1              Chicago Bears
2          Green Bay Packers
3    New England Patriots[b]
4             Miami Dolphins
5          Minnesota Vikings
6           Baltimore Ravens
7            New York Giants
Name: Team, dtype: object 

Data for column 'First NFL Season':
 0    1960
1    1920
2    1921
3    1960
4    1966
5    1961
6    1996
7    1925
Name: First NFL Season, dtype: int64


In [42]:
#Create new dataframe from existing dataframe
nfl_red = DataFrame(nfl_df,columns=['Won','Lost','Tied','Pct','Stadium'])
nfl_red

Unnamed: 0,Won,Lost,Tied,Pct,Stadium
0,512,380,6,,
1,761,583,42,,
2,743,571,38,,
3,500,391,9,,
4,452,360,4,,
5,478,397,11,,
6,200,167,1,,
7,692,596,33,,


Notice how Stadium - which does not exist in the original nfl dataframe - is filled by panda using Nan.

In [43]:
#Get the first few rows. If no rows are passed then default value is 5
nfl_df.head(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,898,512,380,6,0.573,1960,NFC East
1,2,Chicago Bears,1386,761,583,42,0.564,1920,NFC North
2,3,Green Bay Packers,1352,743,571,38,0.564,1921,NFC North


In [44]:
#Get the last few rows. If no rows are passed then default value is 5
nfl_df.tail(4)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
4,5,Miami Dolphins,816,452,360,4,0.556,1966,AFC East
5,6,Minnesota Vikings,886,478,397,11,0.546,1961,NFC North
6,7,Baltimore Ravens,368,200,167,1,0.545,1996,AFC North
7,8,New York Giants,1321,692,596,33,0.536,1925,NFC East


In [45]:
#Retrieve rows in the dataframe via indexing
nfl_df.iloc[3]

Rank                                      4
Team                New England Patriots[b]
GP                                      900
Won                                     500
Lost                                    391
Tied                                      9
Pct.                                  0.561
First NFL Season                       1960
Division                           AFC East
Name: 3, dtype: object

In [46]:
#Fill a default value for NaN column
nfl_red["Stadium"] = "Default Stadium"
nfl_red

Unnamed: 0,Won,Lost,Tied,Pct,Stadium
0,512,380,6,,Default Stadium
1,761,583,42,,Default Stadium
2,743,571,38,,Default Stadium
3,500,391,9,,Default Stadium
4,452,360,4,,Default Stadium
5,478,397,11,,Default Stadium
6,200,167,1,,Default Stadium
7,692,596,33,,Default Stadium


In [47]:
#Add a range of values to a column
nfl_red['Stadium'] = np.arange(8)
nfl_red

Unnamed: 0,Won,Lost,Tied,Pct,Stadium
0,512,380,6,,0
1,761,583,42,,1
2,743,571,38,,2
3,500,391,9,,3
4,452,360,4,,4
5,478,397,11,,5
6,200,167,1,,6
7,692,596,33,,7


In [48]:
#Add a series to a dataframe
#Create a series
stadium_series = Series(["Hrishika Stadium","Neelam Stadium"], index=[4,7])

#Enhance the stadium column
nfl_red["Stadium"] = stadium_series
nfl_red

Unnamed: 0,Won,Lost,Tied,Pct,Stadium
0,512,380,6,,
1,761,583,42,,
2,743,571,38,,
3,500,391,9,,
4,452,360,4,,Hrishika Stadium
5,478,397,11,,
6,200,167,1,,
7,692,596,33,,Neelam Stadium


In [49]:
#delete column
del nfl_red["Stadium"]
nfl_red

Unnamed: 0,Won,Lost,Tied,Pct
0,512,380,6,
1,761,583,42,
2,743,571,38,
3,500,391,9,
4,452,360,4,
5,478,397,11,
6,200,167,1,
7,692,596,33,


In [50]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [51]:
#For full list of ways to create DataFrames from various sources go to teh documentation for pandas:
website = 'http://pandas.pydata.org/pandas-docs/dev/generated/pandas.DataFrame.html'
webbrowser.open(website)

True