### Series

In [2]:
import numpy as np

In [4]:
import pandas as pd

In [6]:
from pandas import Series, DataFrame

In [23]:
obj = Series([3,6,9,2]) #Each value will be indexed automatically if not specified

In [14]:
obj

0    3
1    6
2    9
3    2
dtype: int64

In [16]:
obj.values

array([3, 6, 9, 2], dtype=int64)

In [19]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [61]:
ww2_cas = Series([87,43,30,21,4],index=['USSR','Germany','China','Japan','USA'])

In [62]:
ww2_cas

USSR       87
Germany    43
China      30
Japan      21
USA         4
dtype: int64

In [63]:
ww2_cas['USA']

4

In [68]:
#check for countries having casualtities over 40
ww2_cas[ww2_cas > 40]

USSR       87
Germany    43
dtype: int64

In [69]:
'USSR' in ww2_cas #checking for presence of an index

True

In [33]:
ww2_dict = ww2_cas.to_dict()

In [35]:
ww2_dict

{'USSR': 87, 'Germany': 43, 'China': 30, 'Japan': 21, 'USA': 4}

In [37]:
ww2_series = Series(ww2_dict)

In [39]:
ww2_series

USSR       87
Germany    43
China      30
Japan      21
USA         4
dtype: int64

In [41]:
countries = ['China','Germany','Japan','USA','USSR','Argentina']

In [43]:
obj2 = Series(ww2_dict,index = countries)

In [70]:
obj2

China        30.0
Germany      43.0
Japan        21.0
USA           4.0
USSR         87.0
Argentina     NaN
dtype: float64

In [76]:
pd.notnull(obj2)

China         True
Germany       True
Japan         True
USA           True
USSR          True
Argentina    False
dtype: bool

In [78]:
pd.isnull(obj2)

China        False
Germany      False
Japan        False
USA          False
USSR         False
Argentina     True
dtype: bool

In [83]:
ww2_series + obj2  #Adding 2 series

Argentina      NaN
China         60.0
Germany       86.0
Japan         42.0
USA            8.0
USSR         174.0
dtype: float64

In [85]:
obj2.name = "World War 2 countries"

In [86]:
obj2

China        30.0
Germany      43.0
Japan        21.0
USA           4.0
USSR         87.0
Argentina     NaN
Name: World War 2 countries, dtype: float64

In [88]:
obj2.index.name = "Countries"

In [91]:
obj2

Countries
China        30.0
Germany      43.0
Japan        21.0
USA           4.0
USSR         87.0
Argentina     NaN
Name: World War 2 countries, dtype: float64

### DataFrames
Dataframe is like a spreadsheet. 

In [94]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame  #Can be used directly instead of using pd.Series, pd.DataFrame

In [96]:
import webbrowser
website = 'http://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [98]:
nfl_frame = pd.read_clipboard()

In [100]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [101]:
nfl_frame.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Division'],
      dtype='object')

In [106]:
nfl_frame.Rank   #Call columns of dataframe. Note. Cannot be done for columns with more than one word

0    1
1    2
2    3
3    4
4    5
Name: Rank, dtype: int64

In [109]:
nfl_frame['First NFL Season'] #calling columns with more than one word

0    1960
1    1921
2    1960
3    1920
4    1996
Name: First NFL Season, dtype: int64

In [112]:
#calling multiple columns from dataframe
DataFrame(nfl_frame,columns=['Team','Rank','GP'])

Unnamed: 0,Team,Rank,GP
0,Dallas Cowboys,1,914
1,Green Bay Packers,2,1368
2,New England Patriots,3,916
3,Chicago Bears,4,1402
4,Baltimore Ravens,5,384


In [115]:
DataFrame(nfl_frame,columns=['Team','Rank','GP','Stadium']) #Pandas fills NaN for columns that are not there in source DF

Unnamed: 0,Team,Rank,GP,Stadium
0,Dallas Cowboys,1,914,
1,Green Bay Packers,2,1368,
2,New England Patriots,3,916,
3,Chicago Bears,4,1402,
4,Baltimore Ravens,5,384,


In [121]:
nfl_frame.head(3)  #Retrieves first n rows. Default is 5 without n

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East


In [124]:
nfl_frame.tail(1)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [130]:
nfl_frame.iloc[0]  #Retrieving rows from data frame

Rank                             1
Team                Dallas Cowboys
GP                             914
Won                            520
Lost                           388
Tied                             6
Pct.                         0.572
First NFL Season              1960
Division                  NFC East
Name: 0, dtype: object

In [133]:
nfl_frame['Stadium'] = "Levi's Stadium"

In [135]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,Levi's Stadium
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,Levi's Stadium
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,Levi's Stadium
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,Levi's Stadium
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,Levi's Stadium


In [137]:
len(nfl_frame)

5

In [155]:
nfl_frame['Stadium'] = np.arange(len(nfl_frame))

In [156]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,0
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,1
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,2
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,3
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,4


In [157]:
stadiums = Series(["Levi's Stadium",'AT&T Stadium'],index=[4,0])

In [158]:
stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [159]:
nfl_frame['Stadium'] = stadiums #matching stadium values and updating stadium column

In [160]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,AT&T Stadium
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,Levi's Stadium


In [163]:
del nfl_frame['Stadium']  #deleting a column from dataframe

KeyError: 'Stadium'

In [153]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [167]:
#constructing dataframe from dictonary
data = {'City':['SF','LA','NYC'],'Population':[100,200,300]}

In [172]:
data

{'City': ['SF', 'LA', 'NYC'], 'Population': [100, 200, 300]}

In [174]:
city_frame = DataFrame(data)

In [175]:
city_frame

Unnamed: 0,City,Population
0,SF,100
1,LA,200
2,NYC,300
