# Pandas

## Pandas Series

In [264]:
import numpy as np
import pandas as pd

In [265]:
pd.Series([1,2,3,4])

0    1
1    2
2    3
3    4
dtype: int64

In [266]:
series = pd.Series([1,2,3,4])
type(series)

pandas.core.series.Series

In [267]:
series.axes

[RangeIndex(start=0, stop=4, step=1)]

In [268]:
series.dtype

dtype('int64')

In [269]:
series.empty

False

In [270]:
series.ndim

1

In [271]:
series.size

4

In [272]:
series.values

array([1, 2, 3, 4], dtype=int64)

In [273]:
series[:4]

0    1
1    2
2    3
3    4
dtype: int64

In [274]:
series.head(2)

0    1
1    2
dtype: int64

In [275]:
series.tail(2)

2    3
3    4
dtype: int64

In [276]:
a=np.array([1,2,3,55,77,888])
a

array([  1,   2,   3,  55,  77, 888])

In [277]:
series = pd.Series(a)
series

0      1
1      2
2      3
3     55
4     77
5    888
dtype: int32

In [278]:
series.index

RangeIndex(start=0, stop=6, step=1)

In [279]:
pd.Series([1,5,0.8,34], index=[1,3,5,7])

1     1.0
3     5.0
5     0.8
7    34.0
dtype: float64

In [280]:
series = pd.Series([1,2,3,66,88,9,0.3], index=['a','b','c','d','e','f','g'])

In [281]:
series['g']

0.3

In [282]:
dictionary = {'reg':10, 'log':11, 'cart':12}
dictionary

{'reg': 10, 'log': 11, 'cart': 12}

In [283]:
series =pd.Series(dictionary)

In [284]:
series['reg']

10

In [285]:
series['log':'cart']

log     11
cart    12
dtype: int64

In [286]:
pd.concat([series,series])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [287]:
series.append(series)

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

In [288]:
series

reg     10
log     11
cart    12
dtype: int64

## Index Operations

In [289]:
series = pd.Series([1,2,3,4,5,66,77])
series

0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64

In [290]:
series.keys

<bound method Series.keys of 0     1
1     2
2     3
3     4
4     5
5    66
6    77
dtype: int64>

In [291]:
list(series.items())

[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 66), (6, 77)]

In [292]:
series.values

array([ 1,  2,  3,  4,  5, 66, 77], dtype=int64)

In [293]:
5 in series

True

In [294]:
series[[4,5]]

4     5
5    66
dtype: int64

In [295]:
series =pd.Series(dictionary)
series

reg     10
log     11
cart    12
dtype: int64

In [296]:
series['reg':'cart']

reg     10
log     11
cart    12
dtype: int64

In [297]:
series[(series>10) & (series<12)]

log    11
dtype: int64

In [298]:
data = pd.Series(['a','b','c'], index = [1,3,5])
data

1    a
3    b
5    c
dtype: object

In [299]:
data[1] # data[0] will gives error

'a'

In [300]:
data[0:3]

1    a
3    b
5    c
dtype: object

In [301]:
data.loc[5]

'c'

In [302]:
data.loc[0:4] # label based indexing location

1    a
3    b
dtype: object

In [303]:
data.iloc[0] # positional indexing, iloc , catch by resetting index

'a'

## Pandas Dataframe

In [304]:
l = [1,4,577,343]
l

[1, 4, 577, 343]

In [305]:
data = pd.DataFrame(l, columns=['values'])
type(data)

pandas.core.frame.DataFrame

In [306]:
data.axes

[RangeIndex(start=0, stop=4, step=1), Index(['values'], dtype='object')]

In [307]:
data.shape

(4, 1)

In [308]:
data.ndim

2

In [309]:
data.values

array([[  1],
       [  4],
       [577],
       [343]], dtype=int64)

In [310]:
data.head(2)

Unnamed: 0,values
0,1
1,4


In [311]:
data.tail(2)

Unnamed: 0,values
2,577
3,343


In [312]:
a=np.array([1,2,3,4,566])
type(a)

numpy.ndarray

In [313]:
pd.DataFrame(a,columns=['values'])

Unnamed: 0,values
0,1
1,2
2,3
3,4
4,566


In [314]:
v = np.arange(1,10).reshape(3,3)
v

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [315]:
data=pd.DataFrame(v,columns=['v1','v2','v3'])
data

Unnamed: 0,v1,v2,v3
0,1,2,3
1,4,5,6
2,7,8,9


In [316]:
data.columns=('value1','value2','value3')
data

Unnamed: 0,value1,value2,value3
0,1,2,3
1,4,5,6
2,7,8,9


In [317]:
data=pd.DataFrame(v,columns=['v1','v2','v3'], index=['a','b','c'])
data

Unnamed: 0,v1,v2,v3
a,1,2,3
b,4,5,6
c,7,8,9


In [318]:
pd.DataFrame(pd.Series([1,2,3,4]),columns=['variable'])

Unnamed: 0,variable
0,1
1,2
2,3
3,4


In [319]:
a = pd.Series([1,2,3,4])
b = pd.Series([1,2,3,4])

In [320]:
pd.DataFrame({'variable1':a,
              'variable2':b})

Unnamed: 0,variable1,variable2
0,1,1
1,2,2
2,3,3
3,4,4


In [321]:
dict_ = {
    'reg':{'RMSE':101,
           'MSE':111,
           'SSE':121},
    'log':{'RMSE':102,
           'MSE':112,
           'SSE':122},
    'cart':{'RMSE':103,
           'MSE':113,
           'SSE':123},
}

In [322]:
pd.DataFrame(dict_)

Unnamed: 0,reg,log,cart
RMSE,101,102,103
MSE,111,112,113
SSE,121,122,123


In [323]:
s1 = np.random.randint(10,size=5)
s2 = np.random.randint(10,size=5)
s3 = np.random.randint(10,size=5)
df = pd.DataFrame({'var1':s1,'var2':s2,'var3':s3})
df

Unnamed: 0,var1,var2,var3
0,5,6,7
1,2,0,2
2,2,1,0
3,7,7,7
4,3,1,6


In [324]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,5,6,7


In [325]:
df.index=['a','b','c','d','e']

In [326]:
df['c':'e']

Unnamed: 0,var1,var2,var3
c,2,1,0
d,7,7,7
e,3,1,6


In [327]:
df.drop('a', axis=0, inplace=True)

In [328]:
df

Unnamed: 0,var1,var2,var3
b,2,0,2
c,2,1,0
d,7,7,7
e,3,1,6


In [329]:
l = ['b','c']

In [330]:
l

['b', 'c']

In [331]:
df.drop(l,axis=0)

Unnamed: 0,var1,var2,var3
d,7,7,7
e,3,1,6


In [332]:
'var1' in df

True

In [333]:
l = ['var1','var2','var7']

In [334]:
for i in l:
    print(i in df)

True
True
False


In [335]:
df['var1'] is df['var2']

False

In [336]:
df['var1'] # dictionary type choice

b    2
c    2
d    7
e    3
Name: var1, dtype: int32

In [337]:
df.var1 # attribute type choice

b    2
c    2
d    7
e    3
Name: var1, dtype: int32

In [338]:
df[['var1','var2']]

Unnamed: 0,var1,var2
b,2,0
c,2,1
d,7,7
e,3,1


In [339]:
l = ['var1','var2']
df[l]

Unnamed: 0,var1,var2
b,2,0
c,2,1
d,7,7
e,3,1


In [340]:
df['added_column'] = df['var1'] / df['var2']
df

Unnamed: 0,var1,var2,var3,added_column
b,2,0,2,inf
c,2,1,0,2.0
d,7,7,7,1.0
e,3,1,6,3.0


In [341]:
df.drop('c',axis=0, inplace=True)

In [342]:
df

Unnamed: 0,var1,var2,var3,added_column
b,2,0,2,inf
d,7,7,7,1.0
e,3,1,6,3.0


In [343]:
df.drop('var1', axis=1, inplace=True)

In [344]:
df

Unnamed: 0,var2,var3,added_column
b,0,2,inf
d,7,7,1.0
e,1,6,3.0


## Reach to observations and variables

In [345]:
df

Unnamed: 0,var2,var3,added_column
b,0,2,inf
d,7,7,1.0
e,1,6,3.0


In [346]:
df.iloc[:3]

Unnamed: 0,var2,var3,added_column
b,0,2,inf
d,7,7,1.0
e,1,6,3.0


In [347]:
df.iloc[:2, :2]

Unnamed: 0,var2,var3
b,0,2
d,7,7


In [348]:
df['var3']

b    2
d    7
e    6
Name: var3, dtype: int32

In [349]:
df.loc[:'e', 'var3']

b    2
d    7
e    6
Name: var3, dtype: int32

In [350]:
df[df.var3 > 4]['var2']

d    7
e    1
Name: var2, dtype: int32

In [351]:
df[(df.var3 > 3 & (df.var3 < 2))]['var2'] # conditional index operations

b    0
d    7
e    1
Name: var2, dtype: int32

In [352]:
df.loc[df.var2 > 3, ['added_column']]

Unnamed: 0,added_column
d,1.0


## Merge - Join Operations

In [353]:
s1 = np.random.randint(10,size=5)
s2 = np.random.randint(10,size=5)
s3 = np.random.randint(10,size=5)
df = pd.DataFrame({'var1':s1,'var2':s2,'var3':s3})
df

Unnamed: 0,var1,var2,var3
0,3,8,8
1,9,1,9
2,3,6,8
3,2,4,2
4,6,9,8


In [354]:
df2 = df+99
df2

Unnamed: 0,var1,var2,var3
0,102,107,107
1,108,100,108
2,102,105,107
3,101,103,101
4,105,108,107


In [355]:
pd.concat([df,df2], axis=0)

Unnamed: 0,var1,var2,var3
0,3,8,8
1,9,1,9
2,3,6,8
3,2,4,2
4,6,9,8
0,102,107,107
1,108,100,108
2,102,105,107
3,101,103,101
4,105,108,107


In [356]:
pd.concat([df,df2], axis=0, ignore_index=True)

Unnamed: 0,var1,var2,var3
0,3,8,8
1,9,1,9
2,3,6,8
3,2,4,2
4,6,9,8
5,102,107,107
6,108,100,108
7,102,105,107
8,101,103,101
9,105,108,107


In [357]:
df2.columns= ['var1','var2','v3']
df2

Unnamed: 0,var1,var2,v3
0,102,107,107
1,108,100,108
2,102,105,107
3,101,103,101
4,105,108,107


In [358]:
pd.concat([df,df2], axis=0, ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,v3,var1,var2,var3
0,,3,8,8.0
1,,9,1,9.0
2,,3,6,8.0
3,,2,4,2.0
4,,6,9,8.0
5,107.0,102,107,
6,108.0,108,100,
7,107.0,102,105,
8,101.0,101,103,
9,107.0,105,108,


In [359]:
pd.concat([df,df2], axis=0, join='inner')

Unnamed: 0,var1,var2
0,3,8
1,9,1
2,3,6
3,2,4
4,6,9
0,102,107
1,108,100
2,102,105
3,101,103
4,105,108


In [360]:
pd.concat([df,df2], join_axes=[df.columns])

  """Entry point for launching an IPython kernel.


Unnamed: 0,var1,var2,var3
0,3,8,8.0
1,9,1,9.0
2,3,6,8.0
3,2,4,2.0
4,6,9,8.0
0,102,107,
1,108,100,
2,102,105,
3,101,103,
4,105,108,


In [361]:
df1 = pd.DataFrame({'calisanlar': ['Ali', 'Veli', 'Ayse', 'Fatma'],
                    'grup': ['Muhasebe', 'Muhendislik', 'Muhendislik', 'İK']})

df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,İK


In [362]:
df2 = pd.DataFrame({'calisanlar': ['Ayse', 'Ali', 'Veli', 'Fatma'],
                    'ilk_giris': [2010, 2009, 2014, 2019]})

df2

Unnamed: 0,calisanlar,ilk_giris
0,Ayse,2010
1,Ali,2009
2,Veli,2014
3,Fatma,2019


In [363]:
pd.merge(df1, df2)

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,İK,2019


In [364]:
pd.merge(df1, df2, on = 'calisanlar')

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,İK,2019


In [365]:
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,İK,2019


In [366]:
df4 = pd.DataFrame({'grup': ['Muhasebe', 'Muhendislik', 'İK'],
                    'mudur': ['Caner', 'Mustafa', 'Berkcan']})

df4

Unnamed: 0,grup,mudur
0,Muhasebe,Caner
1,Muhendislik,Mustafa
2,İK,Berkcan


In [367]:
pd.merge(df3,df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2009,Caner
1,Veli,Muhendislik,2014,Mustafa
2,Ayse,Muhendislik,2010,Mustafa
3,Fatma,İK,2019,Berkcan


In [368]:
df5 = pd.DataFrame({'grup': ['Muhasebe', 'Muhasebe',
                              'Muhendislik', 'Muhendislik', 'İK', 'İK'],
                    'yetenekler': ['matematik', 'excel', 'kodlama', 'linux',
                               'excel', 'yonetim']})

df5

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muhendislik,kodlama
3,Muhendislik,linux
4,İK,excel
5,İK,yonetim


In [369]:
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,İK,2019


In [370]:
pd.merge(df3,df5)

Unnamed: 0,calisanlar,grup,ilk_giris,yetenekler
0,Ali,Muhasebe,2009,matematik
1,Ali,Muhasebe,2009,excel
2,Veli,Muhendislik,2014,kodlama
3,Veli,Muhendislik,2014,linux
4,Ayse,Muhendislik,2010,kodlama
5,Ayse,Muhendislik,2010,linux
6,Fatma,İK,2019,excel
7,Fatma,İK,2019,yonetim


In [371]:
df5 = pd.DataFrame({'grup': ['Muhasebe', 'Muhasebe',
                              'Muhendislik', 'Muhendislik', 'İK', 'İK'],
                    'yetenekler': ['matematik', 'excel', 'kodlama', 'linux',
                               'excel', 'yonetim']})

df5

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muhendislik,kodlama
3,Muhendislik,linux
4,İK,excel
5,İK,yonetim


In [372]:
df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,İK


In [373]:
pd.merge(df1,df5)

Unnamed: 0,calisanlar,grup,yetenekler
0,Ali,Muhasebe,matematik
1,Ali,Muhasebe,excel
2,Veli,Muhendislik,kodlama
3,Veli,Muhendislik,linux
4,Ayse,Muhendislik,kodlama
5,Ayse,Muhendislik,linux
6,Fatma,İK,excel
7,Fatma,İK,yonetim


In [374]:
df3 = pd.DataFrame({'name': ['Ali', 'Veli', 'Ayse', 'Fatma'],
                    'maas': [70000, 80000, 120000, 90000]})

df3

Unnamed: 0,name,maas
0,Ali,70000
1,Veli,80000
2,Ayse,120000
3,Fatma,90000


In [375]:
df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,İK


In [376]:
pd.merge(df1,df3, left_on='calisanlar', right_on='name').drop('calisanlar', axis=1)

Unnamed: 0,grup,name,maas
0,Muhasebe,Ali,70000
1,Muhendislik,Veli,80000
2,Muhendislik,Ayse,120000
3,İK,Fatma,90000


In [377]:
df3a = df3.set_index('name')
df3a

Unnamed: 0_level_0,maas
name,Unnamed: 1_level_1
Ali,70000
Veli,80000
Ayse,120000
Fatma,90000


In [378]:
df3a.shape

(4, 1)

In [380]:
df2a = df2.set_index('calisanlar')
df2a

Unnamed: 0_level_0,ilk_giris
calisanlar,Unnamed: 1_level_1
Ayse,2010
Ali,2009
Veli,2014
Fatma,2019


In [381]:
pd.merge(df3a, df2a, left_index=True, right_index=True) # intersection df3a<>df2a (there is none)

Unnamed: 0,maas,ilk_giris
Ali,70000,2009
Veli,80000,2014
Ayse,120000,2010
Fatma,90000,2019


In [382]:
df3a.join(df2a)

Unnamed: 0_level_0,maas,ilk_giris
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Ali,70000,2009
Veli,80000,2014
Ayse,120000,2010
Fatma,90000,2019


In [383]:
dfa = pd.DataFrame({'calisanlar': ['Ali', 'Veli', 'Ayse', 'Fatma'],
                    'siralama': [1, 2, 3, 4]})

dfa

Unnamed: 0,calisanlar,siralama
0,Ali,1
1,Veli,2
2,Ayse,3
3,Fatma,4


In [384]:
dfb = pd.DataFrame({'calisanlar': ['Ali', 'Veli', 'Ayse', 'Fatma'],
                    'siralama': [3, 1, 4, 2]})

dfb

Unnamed: 0,calisanlar,siralama
0,Ali,3
1,Veli,1
2,Ayse,4
3,Fatma,2


In [385]:
pd.merge(dfa, dfb, on = 'calisanlar')

Unnamed: 0,calisanlar,siralama_x,siralama_y
0,Ali,1,3
1,Veli,2,1
2,Ayse,3,4
3,Fatma,4,2


In [386]:
pd.merge(dfa, dfb, on = 'calisanlar', suffixes=['Salary','Experience'])

Unnamed: 0,calisanlar,siralamaSalary,siralamaExperience
0,Ali,1,3
1,Veli,2,1
2,Ayse,3,4
3,Fatma,4,2
