## Intro to Series

In [3]:
import numpy as np

import pandas as pd

from pandas import Series, DataFrame

In [4]:
obj = Series([3, 6, 9, 12])
obj

0     3
1     6
2     9
3    12
dtype: int64

In [5]:
obj.values

array([ 3,  6,  9, 12], dtype=int64)

In [6]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [10]:
ww2_cas = Series([8700000, 4300000, 3000000, 2100000, 400000], index=['USSR', 'Germany','China','Japan','USA'])

ww2_cas

USSR       8700000
Germany    4300000
China      3000000
Japan      2100000
USA         400000
dtype: int64

In [11]:
ww2_cas['USA']

400000

In [12]:
# Checking which countries has cas > 4.000.000
ww2_cas[ww2_cas > 4000000]

USSR       8700000
Germany    4300000
dtype: int64

In [17]:
'USSR' in ww2_cas

True

In [18]:
'France' in ww2_cas

False

In [20]:
ww2_dict = ww2_cas.to_dict()

ww2_dict

{'USSR': 8700000,
 'Germany': 4300000,
 'China': 3000000,
 'Japan': 2100000,
 'USA': 400000}

In [22]:
ww2_series = Series(ww2_dict)

ww2_series

USSR       8700000
Germany    4300000
China      3000000
Japan      2100000
USA         400000
dtype: int64

In [25]:
countries = ['China', 'Germany', 'Japan', 'USA', 'USSR', 'France']

In [26]:
obj2 = Series(ww2_dict, index=countries)

obj2

China      3000000.0
Germany    4300000.0
Japan      2100000.0
USA         400000.0
USSR       8700000.0
France           NaN
dtype: float64

In [27]:
pd.isnull(obj2)

China      False
Germany    False
Japan      False
USA        False
USSR       False
France      True
dtype: bool

In [28]:
pd.notnull(obj2)

China       True
Germany     True
Japan       True
USA         True
USSR        True
France     False
dtype: bool

In [29]:
ww2_series

USSR       8700000
Germany    4300000
China      3000000
Japan      2100000
USA         400000
dtype: int64

In [30]:
obj2

China      3000000.0
Germany    4300000.0
Japan      2100000.0
USA         400000.0
USSR       8700000.0
France           NaN
dtype: float64

In [31]:
ww2_series + obj2

China       6000000.0
France            NaN
Germany     8600000.0
Japan       4200000.0
USA          800000.0
USSR       17400000.0
dtype: float64

In [32]:
obj2.name = "World War 2 Casualties"

obj2

China      3000000.0
Germany    4300000.0
Japan      2100000.0
USA         400000.0
USSR       8700000.0
France           NaN
Name: World War 2 Casualties, dtype: float64

In [35]:
obj2.index.name = 'Countries'

obj2

Countries
China      3000000.0
Germany    4300000.0
Japan      2100000.0
USA         400000.0
USSR       8700000.0
France           NaN
Name: World War 2 Casualties, dtype: float64

## DataFrames

In [36]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [37]:
import webbrowser 
website = 'https://en.wikipedia.org/wiki/NFL_win-loss_records'
webbrowser.open(website)

True

In [39]:
nfl_frame = pd.read_clipboard()

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [40]:
nfl_frame.columns

Index(['Rank', 'Team', 'GP', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Division'],
      dtype='object')

In [45]:
nfl_frame['First NFL Season']

0    1960
1    1921
2    1960
3    1920
4    1996
Name: First NFL Season, dtype: int64

In [46]:
DataFrame(nfl_frame, columns=['Team','First NFL Season','GP'])

Unnamed: 0,Team,First NFL Season,GP
0,Dallas Cowboys,1960,914
1,Green Bay Packers,1921,1368
2,New England Patriots,1960,916
3,Chicago Bears,1920,1402
4,Baltimore Ravens,1996,384


In [47]:
DataFrame(nfl_frame, columns=['Team','First NFL Season','GP','Stadium'])

Unnamed: 0,Team,First NFL Season,GP,Stadium
0,Dallas Cowboys,1960,914,
1,Green Bay Packers,1921,1368,
2,New England Patriots,1960,916,
3,Chicago Bears,1920,1402,
4,Baltimore Ravens,1996,384,


In [48]:
nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [50]:
nfl_frame.head(3)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East


In [52]:
nfl_frame.tail(2)

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [54]:
nfl_frame.iloc[3]

Rank                            4
Team                Chicago Bears
GP                          1,402
Won                           769
Lost                          591
Tied                           42
Pct.                        0.563
First NFL Season             1920
Division                NFC North
Name: 3, dtype: object

In [55]:
nfl_frame['Stadium'] = "Levi's Stadium"

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,Levi's Stadium
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,Levi's Stadium
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,Levi's Stadium
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,Levi's Stadium
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,Levi's Stadium


In [56]:
nfl_frame['Stadium'] = np.arange(5)

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,0
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,1
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,2
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,3
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,4


In [59]:
stadiums = Series(["Levi's Stadium", "AT&T Stadium"], index=[4,0])

stadiums

4    Levi's Stadium
0      AT&T Stadium
dtype: object

In [60]:
nfl_frame['Stadium'] = stadiums

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division,Stadium
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East,AT&T Stadium
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North,
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East,
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North,
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North,Levi's Stadium


In [62]:
del nfl_frame['Stadium']

nfl_frame

Unnamed: 0,Rank,Team,GP,Won,Lost,Tied,Pct.,First NFL Season,Division
0,1,Dallas Cowboys,914,520,388,6,0.572,1960,NFC East
1,2,Green Bay Packers,1368,756,574,38,0.567,1921,NFC North
2,3,New England Patriots,916,512,395,9,0.564,1960,AFC East
3,4,Chicago Bears,1402,769,591,42,0.563,1920,NFC North
4,5,Baltimore Ravens,384,214,169,1,0.559,1996,AFC North


In [63]:
data = {'City':['SF','LA','NYC'],'Population':[837000,3880000,8400000]}
data

{'City': ['SF', 'LA', 'NYC'], 'Popul': [837000, 3880000, 8400000]}

In [64]:
city_frame = DataFrame(data)
city_frame

Unnamed: 0,City,Popul
0,SF,837000
1,LA,3880000
2,NYC,8400000


## Index objects

In [65]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd

In [66]:
my_ser = Series([1,2,3,4], index=['A','B','C','D'])

my_ser

A    1
B    2
C    3
D    4
dtype: int64

In [67]:
my_index = my_ser.index

my_index

Index(['A', 'B', 'C', 'D'], dtype='object')

In [70]:
my_index[3]

'D'

In [73]:
my_index[:2]

Index(['A', 'B'], dtype='object')

## Re-index

In [78]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

from numpy.random import randn

In [79]:
ser1 = Series([1,2,3,4], index=['A','B','C','D'])

ser1

A    1
B    2
C    3
D    4
dtype: int64

In [80]:
ser2 = ser1.reindex(['A','B','C','D','E','F'])

ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [81]:
ser2.reindex(['A','B','C','D','E','F','G'], fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [82]:
ser3 = Series(['USA', "Mexico", "Canada"], index=[0,5,10])

ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [87]:
ranger = range(15)
ranger

range(0, 15)

In [88]:
ser3.reindex(ranger, method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [95]:
dframe = DataFrame(randn(25).reshape((5,5)), index=['A','B','D','E','F'], 
                   columns=['col1','col2','col3','col4','col5'])
                   
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.87375,-1.332154,0.145049,0.917972,1.877783
B,-0.271489,1.118799,-1.303154,0.195154,-0.935741
D,0.084093,0.844943,-0.124032,0.559018,-0.038295
E,1.551298,-0.68701,0.015369,-1.285049,1.712408
F,0.259172,-0.471137,-0.886486,0.426184,1.649983


In [96]:
dframe2 = dframe.reindex(['A','B','C','D','E','F'])
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,0.87375,-1.332154,0.145049,0.917972,1.877783
B,-0.271489,1.118799,-1.303154,0.195154,-0.935741
C,,,,,
D,0.084093,0.844943,-0.124032,0.559018,-0.038295
E,1.551298,-0.68701,0.015369,-1.285049,1.712408
F,0.259172,-0.471137,-0.886486,0.426184,1.649983


In [97]:
new_columns = ['col1', 'col2', 'col3', 'col4', 'col5', 'col6']

In [98]:
dframe2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.87375,-1.332154,0.145049,0.917972,1.877783,
B,-0.271489,1.118799,-1.303154,0.195154,-0.935741,
C,,,,,,
D,0.084093,0.844943,-0.124032,0.559018,-0.038295,
E,1.551298,-0.68701,0.015369,-1.285049,1.712408,
F,0.259172,-0.471137,-0.886486,0.426184,1.649983,
