In [2]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [3]:
dframe = DataFrame(np.arange(12).reshape(4,3))
# dframe is constructed by casting a 4-row by 3-col numpy array as a pandas DataFrame, pre-filled with values 0-11.
# Here the index defaults to [0,1,2,3], the columns to [0,1,2]

In [4]:
dframe

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11


In [5]:
# Constructing a DataFrame from a Dictionary:
data = {'City':['SF','LA','NYC'],'Population':[837000,3880000,8400000]}
city_frame = DataFrame(data)
# Creates a DataFrame with columns labeled City and Population, indexes of [0,1,2]
city_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,8400000


In [6]:
# Adding a Series to an existing DataFrame:
colors = Series(["Blue","Red"],index=[4,1])
dframe['Color']=colors
# dframe now has a Color column with Blue matched to index 4, Red to 1, and NaN after everything else.
colors

4    Blue
1     Red
dtype: object

In [22]:
#Now we'll learn DataFrames

#Let's get some data to play with. How about the NFL?
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [23]:
#Copy and read to get data
nfl_frame = pd.read_clipboard()

In [37]:
#Show
nfl_frame


Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Levi's Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Levi's Stadium
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Levi's Stadium
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Levi's Stadium
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Levi's Stadium
5,6,New York Giants,684,572,33,0.543,1925,1289,NFC East,Levi's Stadium
6,7,Denver Broncos,465,393,10,0.541,1960,868,AFC West,Levi's Stadium
7,8,Minnesota Vikings,457,387,10,0.541,1961,854,NFC North,Levi's Stadium
8,9,Baltimore Ravens,181,154,1,0.54,1996,336,AFC North,Levi's Stadium
9,10,San Francisco 49ers,522,450,14,0.537,1950,986,NFC West,Levi's Stadium


In [38]:
# We can grab the oclumn names with .columns
nfl_frame.columns

Index(['Rank ', 'Team ', 'Won ', 'Lost ', 'Tied ', 'Pct. ',
       'First NFL Season ', 'Total Games ', 'Division', 'Stadium'],
      dtype='object')

In [39]:
#Lets see some specific data columns
DataFrame(nfl_frame,columns=['Team','First Season','Total Games'])

Unnamed: 0,Team,First Season,Total Games
0,,,
1,,,
2,,,
3,,,
4,,,
5,,,
6,,,
7,,,
8,,,
9,,,


In [40]:
#What happens if we ask for a column that doesn't exist?
DataFrame(nfl_frame,columns=['Team','First Season','Total Games','Stadium'])

Unnamed: 0,Team,First Season,Total Games,Stadium
0,,,,Levi's Stadium
1,,,,Levi's Stadium
2,,,,Levi's Stadium
3,,,,Levi's Stadium
4,,,,Levi's Stadium
5,,,,Levi's Stadium
6,,,,Levi's Stadium
7,,,,Levi's Stadium
8,,,,Levi's Stadium
9,,,,Levi's Stadium


In [41]:
# Call columns
nfl_frame.columns

Index(['Rank ', 'Team ', 'Won ', 'Lost ', 'Tied ', 'Pct. ',
       'First NFL Season ', 'Total Games ', 'Division', 'Stadium'],
      dtype='object')

In [42]:
#We can retrieve individual column
nfl_frame.Team

AttributeError: 'DataFrame' object has no attribute 'Team'

In [31]:
#We can retrieve rows through indexing
nfl_frame.ix[3]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  


Rank                               4
Team                 Miami Dolphins 
Won                              439
Lost                             341
Tied                               4
Pct.                           0.563
First NFL Season                1966
Total Games                     784 
Division                    AFC East
Name: 3, dtype: object

In [33]:
#We can also assign value sto entire columns
nfl_frame['Stadium']="Levi's Stadium" #Careful with the ' here

In [None]:
nfl_frame

In [None]:
# Call columns
nfl_frame.columns

In [None]:
#Adding a Series to a DataFrame
stadiums = Series(["Levi's Stadium","AT&T Stadium"],index=[4,0])

In [None]:
#Now input into the nfl DataFrame
nfl_frame['Stadium']=stadiums

#Show
nfl_frame

In [None]:
#We can also delete columns
del nfl_frame['Stadium']

nfl_frame

In [None]:
#DataFrames can be constructed many ways. Another way is from a dictionary of equal length lists
data = {'City':['SF','LA','NYC'],
        'Population':[837000,3880000,8400000]}

city_frame = DataFrame(data)

#Show
city_frame