## Session 11 (pandas part 1)

### Pandas Intro
### Series & DataFrame

In [1]:
import pandas as pd

### Pandas Series

In [7]:
s1 = pd.Series([5, 3, 1, 2, 7])
s1

0    5
1    3
2    1
3    2
4    7
dtype: int64

In [8]:
s1[2]

1

In [9]:
s1[1:3]

1    3
2    1
dtype: int64

In [10]:
s1.values

array([5, 3, 1, 2, 7])

In [11]:
s1.index

RangeIndex(start=0, stop=5, step=1)

In [22]:
# pandas series indexes can be defined explicitly while,
# numpy array indexes are defined implicitly (0, 1, 2, ...).

In [16]:
s2 = pd.Series([5, 3, 1, 2, 7], index=['a', 'b', 'c', 'd', 'e'])
s2

a    5
b    3
c    1
d    2
e    7
dtype: int64

In [19]:
s2['b']

3

In [21]:
s3 = pd.Series([5, 3, 1, 2, 7], index=[1, 5, 6, 8, 10])
s3

1     5
5     3
6     1
8     2
10    7
dtype: int64

In [23]:
# series can be also constructed from dicts

In [25]:
s4 = pd.Series({'ali': 5, 'saeed': 1, 'sara': 3, 'reza': 11})
s4

ali       5
saeed     1
sara      3
reza     11
dtype: int64

In [30]:
s4['sara']

3

In [31]:
s4.values

array([ 5,  1,  3, 11])

In [32]:
s4.index

Index(['ali', 'saeed', 'sara', 'reza'], dtype='object')

In [33]:
s4.values[2]

3

In [34]:
s4.index[2]

'sara'

### Pandas DataFrames

In [35]:
ages = pd.Series({'ali': 11, 'saeed': 21, 'sara': 5, 'reza': 12})
ages

ali      11
saeed    21
sara      5
reza     12
dtype: int64

In [36]:
birthplaces = pd.Series({'ali': 'tehran', 'saeed': 'bojnord', 'sara': 'mashhad', 'reza': 'ramsar'})
birthplaces

ali       tehran
saeed    bojnord
sara     mashhad
reza      ramsar
dtype: object

In [37]:
df = pd.DataFrame({'age': ages, 'birthplace': birthplaces})
df

Unnamed: 0,age,birthplace
ali,11,tehran
saeed,21,bojnord
sara,5,mashhad
reza,12,ramsar


In [38]:
df['age']

ali      11
saeed    21
sara      5
reza     12
Name: age, dtype: int64

In [40]:
# for getting the rows we can do this :
df.loc['ali']

age               11
birthplace    tehran
Name: ali, dtype: object

In [41]:
df.iloc[2]

age                 5
birthplace    mashhad
Name: sara, dtype: object

In [44]:
# different ways to create a DataFrame :

In [43]:
df1 = pd.DataFrame({'age': ages, 'birthplace': birthplaces})
df1

Unnamed: 0,age,birthplace
ali,11,tehran
saeed,21,bojnord
sara,5,mashhad
reza,12,ramsar


In [46]:
df2 = pd.DataFrame([
    {'age': 11, 'birthplace': 'tehran'},
    {'age': 21, 'birthplace': 'bojnord'},
    {'age': 5, 'birthplace': 'mashhad'},
    {'age': 12, 'birthplace': 'ramsar'},
])
df2

Unnamed: 0,age,birthplace
0,11,tehran
1,21,bojnord
2,5,mashhad
3,12,ramsar


In [47]:
df3 = pd.DataFrame([
    {'age': 11, 'birthplace': 'tehran'},
    {'age': 21, 'birthplace': 'bojnord'},
    {'age': 5, 'birthplace': 'mashhad'},
    {'age': 12, 'birthplace': 'ramsar'},
], index=['ali', 'saeed', 'sara', 'reza'])
df3

Unnamed: 0,age,birthplace
ali,11,tehran
saeed,21,bojnord
sara,5,mashhad
reza,12,ramsar


In [49]:
df4 = pd.DataFrame([[11, 'tehran'], [21, 'bojnord'], [5, 'mashhad'], [12, 'ramsar']])
df4

Unnamed: 0,0,1
0,11,tehran
1,21,bojnord
2,5,mashhad
3,12,ramsar


In [51]:
df5 = pd.DataFrame([[11, 'tehran'], [21, 'bojnord'], [5, 'mashhad'], [12, 'ramsar']],
                  index=['ali', 'saeed', 'sara', 'reza'],
                  columns=['age', 'birthplace'])
df5

Unnamed: 0,age,birthplace
ali,11,tehran
saeed,21,bojnord
sara,5,mashhad
reza,12,ramsar


In [54]:
df6 = pd.DataFrame([['ali', 11, 'tehran'], ['saeed', 21, 'bojnord'],
                    ['sara', 5, 'mashhad'], ['reza', 12, 'ramsar']],
                   columns=['name', 'age', 'birthplace'])
df6

Unnamed: 0,name,age,birthplace
0,ali,11,tehran
1,saeed,21,bojnord
2,sara,5,mashhad
3,reza,12,ramsar


In [56]:
df7 = df6.set_index('name')
df7
# note that 'set_index' method returns new DataFrame, we can also use 'inplace' parameter and
# set it to True if we want the change to be applied to the original DataFrame

Unnamed: 0_level_0,age,birthplace
name,Unnamed: 1_level_1,Unnamed: 2_level_1
ali,11,tehran
saeed,21,bojnord
sara,5,mashhad
reza,12,ramsar


In [57]:
# some examples :

In [58]:
pd.DataFrame({'a': [2*i for i in range(5)], 'b': [3*i for i in range(5)]})

Unnamed: 0,a,b
0,0,0
1,2,3
2,4,6
3,6,9
4,8,12


In [60]:
df = pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 1}])
df

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,1.0


In [61]:
df['b']

0    2
1    3
Name: b, dtype: int64

In [62]:
df['c']

0    NaN
1    1.0
Name: c, dtype: float64