## Series in Pandas

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
obj= Series([3,6,9,12])

In [3]:
obj

0     3
1     6
2     9
3    12
dtype: int64

In [4]:
obj.values

array([ 3,  6,  9, 12])

In [6]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [14]:
#creating series with specified index

ww2_cas = Series([8700000,4200000,3000000,2100000,4000000],index=['USSR','Germany','China','Japan','USA'])

In [15]:
ww2_cas

USSR       8700000
Germany    4200000
China      3000000
Japan      2100000
USA        4000000
dtype: int64

In [16]:
ww2_cas['USA']

4000000

In [17]:
# check with countries with casualties greaster than 4 mill
ww2_cas[ww2_cas>4000000]

USSR       8700000
Germany    4200000
dtype: int64

In [18]:
'USSR' in ww2_cas

True

In [19]:
#converting Series into a dictionary
worldw1_dict=ww2_cas.to_dict()

In [20]:
worldw1_dict

{'China': 3000000,
 'Germany': 4200000,
 'Japan': 2100000,
 'USA': 4000000,
 'USSR': 8700000}

In [21]:
worldw1_dict['China']

3000000

In [22]:
#from dictionary to series
dict_to_series=Series(worldw1_dict)

In [23]:
dict_to_series

China      3000000
Germany    4200000
Japan      2100000
USA        4000000
USSR       8700000
dtype: int64

In [27]:
#indexing can be added from a list
countries=['USSR','Germany','China','Japan','USA','Argentina']

In [150]:
obj2=Series(worldw1_dict,index=countries)

In [151]:
obj2

USSR         8700000.0
Germany      4200000.0
China        3000000.0
Japan        2100000.0
USA          4000000.0
Argentina          NaN
dtype: float64

In [30]:
#check series for Nan values
pd.isnull(obj2)

USSR         False
Germany      False
China        False
Japan        False
USA          False
Argentina     True
dtype: bool

In [31]:
#adding series, where values are added by index
ww2_cas

USSR       8700000
Germany    4200000
China      3000000
Japan      2100000
USA        4000000
dtype: int64

In [32]:
ww2_cas+obj2

Argentina           NaN
China         6000000.0
Germany       8400000.0
Japan         4200000.0
USA           8000000.0
USSR         17400000.0
dtype: float64

In [33]:
#naming Series
obj2.name='World war 2'

In [35]:
obj2

USSR         8700000.0
Germany      4200000.0
China        3000000.0
Japan        2100000.0
USA          4000000.0
Argentina          NaN
Name: World war 2, dtype: float64

In [38]:
#labeling index column
obj2.index.name='Countries'

In [39]:
obj2

Countries
USSR         8700000.0
Germany      4200000.0
China        3000000.0
Japan        2100000.0
USA          4000000.0
Argentina          NaN
Name: World war 2, dtype: float64

## DataFrames

In [41]:
import webbrowser
website='https://en.wikipedia.org/wiki/NFL_win%E2%80%93loss_records'

In [42]:
webbrowser.open(website) #reading data from NFL record table

True

In [50]:
# reading from a clipboard
nfl_frame=pd.read_clipboard()

In [51]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [52]:
# names of columns
nfl_frame.columns

Index(['Rank', 'Team', 'Won', 'Lost', 'Tied', 'Pct.', 'First NFL Season',
       'Total Games', 'Division'],
      dtype='object')

In [53]:
# get one of the columns by name
nfl_frame.Rank

0    1
1    2
2    3
3    4
4    5
Name: Rank, dtype: int64

In [54]:
nfl_frame.Won

0    493
1    730
2    744
3    439
4    476
Name: Won, dtype: int64

In [69]:
np.std(nfl_frame.Won)

132.36102145269203

In [56]:
#if more than one word in the column name
nfl_frame['First NFL Season']

0    1960
1    1921
2    1920
3    1966
4    1960
Name: First NFL Season, dtype: int64

In [57]:
# Multiple columns
DataFrame(nfl_frame,columns=['Team','First NFL Season', 'Total Games'])

Unnamed: 0,Team,First NFL Season,Total Games
0,Dallas Cowboys,1960,866
1,Green Bay Packers,1921,1320
2,Chicago Bears,1920,1354
3,Miami Dolphins,1966,784
4,New England Patriots,1960,868


In [58]:
# if called column does not exist in the df, it will be null
DataFrame(nfl_frame,columns=['Team','First NFL Season', 'Total Games','Stadium'])

Unnamed: 0,Team,First NFL Season,Total Games,Stadium
0,Dallas Cowboys,1960,866,
1,Green Bay Packers,1921,1320,
2,Chicago Bears,1920,1354,
3,Miami Dolphins,1966,784,
4,New England Patriots,1960,868,


In [60]:
nfl_frame.head()

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [62]:
# retrieve row info from index
#either ix or loc method
nfl_frame.loc[3]

Rank                             4
Team                Miami Dolphins
Won                            439
Lost                           341
Tied                             4
Pct.                         0.563
First NFL Season              1966
Total Games                    784
Division                  AFC East
Name: 3, dtype: object

In [63]:
#add column values
nfl_frame['Stadium']="Levi's Stadium"

In [64]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,Levi's Stadium
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,Levi's Stadium
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,Levi's Stadium
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,Levi's Stadium
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Levi's Stadium


In [65]:
#assigning multiple values 
nfl_frame['Stadium']=np.arange(5)

In [66]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,0
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,1
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,2
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,3
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,4


In [74]:
#adding Series to a df
#Assigning stadium by index number
Stadiums= Series(["Levis's Stad","ATNT Stad"],index=[4,0])

In [75]:
Stadiums

4    Levis's Stad
0       ATNT Stad
dtype: object

In [76]:
nfl_frame['Stadium']=Stadiums

In [77]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division,Stadium
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East,ATNT Stad
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North,
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North,
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East,
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East,Levis's Stad


In [78]:
#deleting columns
del nfl_frame['Stadium']

In [79]:
nfl_frame

Unnamed: 0,Rank,Team,Won,Lost,Tied,Pct.,First NFL Season,Total Games,Division
0,1,Dallas Cowboys,493,367,6,0.573,1960,866,NFC East
1,2,Green Bay Packers,730,553,37,0.567,1921,1320,NFC North
2,3,Chicago Bears,744,568,42,0.565,1920,1354,NFC North
3,4,Miami Dolphins,439,341,4,0.563,1966,784,AFC East
4,5,New England Patriots,476,383,9,0.554,1960,868,AFC East


In [80]:
# df from dictionary
data={'City':['SF','LA','NYC'], 'Population':[837000,3880000,840000]}

In [81]:
data

{'City': ['SF', 'LA', 'NYC'], 'Population': [837000, 3880000, 840000]}

In [152]:
cit_frame =DataFrame(data)
cit_frame

Unnamed: 0,City,Population
0,SF,837000
1,LA,3880000
2,NYC,840000


## Index Objects

In [84]:
my_ser = Series([1,2,3,4], index=['A','B','C','D'])

In [85]:
my_ser

A    1
B    2
C    3
D    4
dtype: int64

In [87]:
my_index=my_ser.index

In [88]:
my_index

Index(['A', 'B', 'C', 'D'], dtype='object')

## Reindexing

In [89]:
from numpy.random import randn

In [90]:
ser1 = Series([1,2,3,4], index=['A','B','C','D'])

In [91]:
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [92]:
ser2=ser1.reindex(['A','B','C','D','E','F'])

In [93]:
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [94]:
#values can be filled 
ser2=ser1.reindex(['A','B','C','D','E','F','G'], fill_value=0)

In [95]:
ser2

A    1
B    2
C    3
D    4
E    0
F    0
G    0
dtype: int64

In [96]:
ser3=Series(['USA','Mexico','Canda'], index=[0,5,10])

In [97]:
ser3

0        USA
5     Mexico
10     Canda
dtype: object

In [98]:
#to add index from a function
ranger=range(15)


In [99]:
#ffil will grab the value before Nan and forward it
#so in ser3, ffill will take USA and fill through to index 4
#check out fill methods
ser3.reindex(ranger,method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10     Canda
11     Canda
12     Canda
13     Canda
14     Canda
dtype: object

In [112]:
#reindex rows/columns 
#this will assign random numbers to a 5 by 5 matrix, set the index
#and column names
dframe= DataFrame(randn(25).reshape(5,5), index=['A','B','D','E','F'],
                 columns=['col'+str(i) for i in range(1,6)])

In [113]:
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.486388,0.150216,0.220442,1.225403,0.124122
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734
D,1.524301,0.043304,-0.984024,0.362366,0.230583
E,1.251231,-0.022627,-0.929868,0.729364,0.914717
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762


In [114]:
#reindex dframe
dframe2=dframe.reindex(['A','B','C','D','E','F'])

In [115]:
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,0.486388,0.150216,0.220442,1.225403,0.124122
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734
C,,,,,
D,1.524301,0.043304,-0.984024,0.362366,0.230583
E,1.251231,-0.022627,-0.929868,0.729364,0.914717
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762


In [118]:
#reindex columns
new_columns=['col'+str(i) for i in range(1,7)]
dframe2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.486388,0.150216,0.220442,1.225403,0.124122,
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734,
C,,,,,,
D,1.524301,0.043304,-0.984024,0.362366,0.230583,
E,1.251231,-0.022627,-0.929868,0.729364,0.914717,
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762,


In [119]:
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.486388,0.150216,0.220442,1.225403,0.124122
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734
D,1.524301,0.043304,-0.984024,0.362366,0.230583
E,1.251231,-0.022627,-0.929868,0.729364,0.914717
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762


In [124]:
dframe.loc[['A','B','C','D','E','F'],new_columns]

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.486388,0.150216,0.220442,1.225403,0.124122,
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734,
C,,,,,,
D,1.524301,0.043304,-0.984024,0.362366,0.230583,
E,1.251231,-0.022627,-0.929868,0.729364,0.914717,
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762,


In [138]:
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.486388,0.150216,0.220442,1.225403,0.124122
B,-0.858166,-0.614529,-0.525209,-1.944325,0.600734
D,1.524301,0.043304,-0.984024,0.362366,0.230583
E,1.251231,-0.022627,-0.929868,0.729364,0.914717
F,-0.076463,0.980683,0.096278,-0.181306,-0.189762


## Drop Entry

In [140]:
ser1 = Series(np.arange(3),index=['a','b','c'])

In [141]:
ser1

a    0
b    1
c    2
dtype: int64

In [142]:
ser1.drop('b')

a    0
c    2
dtype: int64

## Drop Entry DF

In [144]:
dframe1=DataFrame(np.arange(9).reshape(3,3), index=['SF','LA','NY'], columns=['pop','size','year'])

In [145]:
dframe1

Unnamed: 0,pop,size,year
SF,0,1,2
LA,3,4,5
NY,6,7,8


In [147]:
dframe1.drop('LA')

Unnamed: 0,pop,size,year
SF,0,1,2
NY,6,7,8


In [148]:
# to drop a column, we use axis=1, axis=0 for rows
dframe1.drop('year', axis=1)

Unnamed: 0,pop,size
SF,0,1
LA,3,4
NY,6,7
