## Pandas series

In [1]:
import numpy as np
import pandas as pd

In [2]:
list1=[1,2,3,4]
pd.Series(list1)

0    1
1    2
2    3
3    4
dtype: int64

In [3]:
pd.Series(list1, ['a','b','c','d'])

a    1
b    2
c    3
d    4
dtype: int64

In [4]:
pd.Series(np.array([1,2]), [1,2])

1    1
2    2
dtype: int32

In [5]:
pd.Series(np.array(['a',22,print]))

0                            a
1                           22
2    <built-in function print>
dtype: object

## Accessing elements

In [6]:
ser1= pd.Series(['a','b','c','d'])
ser1

0    a
1    b
2    c
3    d
dtype: object

In [7]:
ser1[1]

'b'

In [8]:
ser1[1:4]

1    b
2    c
3    d
dtype: object

In [10]:
ser1.values

array(['a', 'b', 'c', 'd'], dtype=object)

In [14]:
ser2= pd.Series([1,2,3,4],['a','b','d','c'])
ser2

a    1
b    2
d    3
c    4
dtype: int64

In [15]:
ser2['b']

2

In [16]:
ser2['b':'c']

b    2
d    3
c    4
dtype: int64

In [17]:
ser2.values

array([1, 2, 3, 4], dtype=int64)

In [18]:
ser2.index

Index(['a', 'b', 'd', 'c'], dtype='object')

In [19]:
ser2

a    1
b    2
d    3
c    4
dtype: int64

In [20]:
ser2.drop('d')

a    1
b    2
c    4
dtype: int64

In [21]:
ser2

a    1
b    2
d    3
c    4
dtype: int64

In [22]:
ser2.drop('d', inplace=True)

In [23]:
ser2

a    1
b    2
c    4
dtype: int64

In [24]:
ser2['d']=5
ser2

a    1
b    2
c    4
d    5
dtype: int64

## DataFrame
collection of series object

In [25]:
data= np.random.rand(5,3)
data

array([[0.07683761, 0.76043916, 0.7662019 ],
       [0.48456736, 0.60239516, 0.62791337],
       [0.50286288, 0.2401668 , 0.04085073],
       [0.38212   , 0.15265363, 0.62824394],
       [0.18990576, 0.15685678, 0.06674203]])

In [26]:
df= pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,0.076838,0.760439,0.766202
1,0.484567,0.602395,0.627913
2,0.502863,0.240167,0.040851
3,0.38212,0.152654,0.628244
4,0.189906,0.156857,0.066742


In [27]:
type(df[0])

pandas.core.series.Series

In [28]:
df2= pd.DataFrame(data, index=['a','b','c','d','e'], columns=['col1','col2','col3'])
df2

Unnamed: 0,col1,col2,col3
a,0.076838,0.760439,0.766202
b,0.484567,0.602395,0.627913
c,0.502863,0.240167,0.040851
d,0.38212,0.152654,0.628244
e,0.189906,0.156857,0.066742


In [29]:
print(df2.columns)

Index(['col1', 'col2', 'col3'], dtype='object')


In [31]:
# using dictionaries
df_new= pd.DataFrame({'col1':[1,2,3], 'col2':[4,5,6]})
df_new

Unnamed: 0,col1,col2
0,1,4
1,2,5
2,3,6


## Accessing and selection

In [33]:
df2

Unnamed: 0,col1,col2,col3
a,0.076838,0.760439,0.766202
b,0.484567,0.602395,0.627913
c,0.502863,0.240167,0.040851
d,0.38212,0.152654,0.628244
e,0.189906,0.156857,0.066742


In [32]:
df2.col1 # risky

a    0.076838
b    0.484567
c    0.502863
d    0.382120
e    0.189906
Name: col1, dtype: float64

In [35]:
df2['col1']

a    0.076838
b    0.484567
c    0.502863
d    0.382120
e    0.189906
Name: col1, dtype: float64

In [36]:
df2[['col2', 'col3']]

Unnamed: 0,col2,col3
a,0.760439,0.766202
b,0.602395,0.627913
c,0.240167,0.040851
d,0.152654,0.628244
e,0.156857,0.066742


## loc[]

In [37]:
# rows
df2.loc['d']

col1    0.382120
col2    0.152654
col3    0.628244
Name: d, dtype: float64

In [38]:
type(df2.loc['d'])

pandas.core.series.Series

In [39]:
# multiple rows
df2.loc[['a','d']]

Unnamed: 0,col1,col2,col3
a,0.076838,0.760439,0.766202
d,0.38212,0.152654,0.628244


In [40]:
# select row and column
df2.loc['a','col1']

0.07683760915219073

In [41]:
# multiple
df2.loc[['a','d'], ['col1','col2']]

Unnamed: 0,col1,col2
a,0.076838,0.760439
d,0.38212,0.152654


## iloc[]

In [43]:
#rows
df2.iloc[0]

col1    0.076838
col2    0.760439
col3    0.766202
Name: a, dtype: float64

In [44]:
df2.iloc[0, 2]

0.766201899022708

In [45]:
df2.iloc[[0,1,3],[1]]

Unnamed: 0,col2
a,0.760439
b,0.602395
d,0.152654


## Based on conditions

In [46]:
df2

Unnamed: 0,col1,col2,col3
a,0.076838,0.760439,0.766202
b,0.484567,0.602395,0.627913
c,0.502863,0.240167,0.040851
d,0.38212,0.152654,0.628244
e,0.189906,0.156857,0.066742


In [47]:
df2>0.6

Unnamed: 0,col1,col2,col3
a,False,True,True
b,False,True,True
c,False,False,False
d,False,False,True
e,False,False,False


In [50]:
df2[df2>0.6]

Unnamed: 0,col1,col2,col3
a,,0.760439,0.766202
b,,0.602395,0.627913
c,,,
d,,,0.628244
e,,,


In [51]:
df2[(df2['col3']>0.6) & (df2['col1']>0.4)]

Unnamed: 0,col1,col2,col3
b,0.484567,0.602395,0.627913


In [67]:
df2[df2['col3']>0.6]['col3']

a    0.766202
b    0.627913
Name: col3, dtype: float64

In [56]:
df2

Unnamed: 0,col1,col2,col3
a,0.076838,0.760439,0.766202
b,0.484567,0.602395,0.627913
c,0.502863,0.240167,0.040851
d,0.38212,0.152654,0.628244
e,0.189906,0.156857,0.066742


In [60]:
df2.drop('col1', axis=1, inplace=True)

In [61]:
df2

Unnamed: 0,col2,col3
a,0.760439,0.766202
b,0.602395,0.627913
c,0.240167,0.040851
d,0.152654,0.628244
e,0.156857,0.066742


In [63]:
df2.drop('d', inplace=True)

In [64]:
df2

Unnamed: 0,col2,col3
a,0.760439,0.766202
b,0.602395,0.627913
c,0.240167,0.040851
e,0.156857,0.066742


In [79]:
# adding row to dataframe
df2.append(pd.Series([10,20], index=['col2','col3'], name='f'))

Unnamed: 0,col2,col3
a,0.760439,0.766202
b,0.602395,0.627913
c,0.240167,0.040851
e,0.156857,0.066742
f,10.0,20.0


In [81]:
# just made a new dataframe, don't worry about code
df3=df2.reset_index(drop=True)
df3

Unnamed: 0,col2,col3
0,0.760439,0.766202
1,0.602395,0.627913
2,0.240167,0.040851
3,0.156857,0.066742


In [84]:
# adding a row [10,20] in middle of df3
df3=df3.append(pd.Series([10,20], index=['col2','col3'], name=0.5))
df3

Unnamed: 0,col2,col3
0.0,0.760439,0.766202
1.0,0.602395,0.627913
2.0,0.240167,0.040851
3.0,0.156857,0.066742
0.5,10.0,20.0


In [85]:
df3.sort_index().reset_index(drop=True)

Unnamed: 0,col2,col3
0,0.760439,0.766202
1,10.0,20.0
2,0.602395,0.627913
3,0.240167,0.040851
4,0.156857,0.066742


In [99]:
# adding in middle of series
ser1= pd.Series([1,2,3,4])
ser1

0    1
1    2
2    3
3    4
dtype: int64

In [100]:
ser1[0.5]=1.5
ser1

0.0    1.0
1.0    2.0
2.0    3.0
3.0    4.0
0.5    1.5
dtype: float64

In [101]:
ser1=ser1.sort_index()

In [102]:
ser1.reset_index(drop=True)

0    1.0
1    1.5
2    2.0
3    3.0
4    4.0
dtype: float64

In [103]:
# making series from dictionary
pd.Series({'col2':10, 'col3':20})

col2    10
col3    20
dtype: int64