# Pandas Serisi Olusturmak

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.Series([99,112,70,-3,11])  # Degerleri index ile beraber tutar internal 

0     99
1    112
2     70
3     -3
4     11
dtype: int64

In [4]:
seri = pd.Series([99,112,70,-3,11])
type(seri)

pandas.core.series.Series

In [6]:
seri.axes # index bilgisine ulas, index 5'e kadar 5 dahil degil

[RangeIndex(start=0, stop=5, step=1)]

In [7]:
seri.dtype

dtype('int64')

In [9]:
seri.ndim # Boyut

1

In [10]:
seri.values # index olmadan sadece veriler ulasmak

array([ 99, 112,  70,  -3,  11])

In [12]:
seri.head()

0     99
1    112
2     70
3     -3
4     11
dtype: int64

In [14]:
seri.tail(3)

2    70
3    -3
4    11
dtype: int64

## index isimlendirmesi

In [18]:
seri = pd.Series([99,112,70,-3,11])
seri

0     99
1    112
2     70
3     -3
4     11
dtype: int64

seri = pd.Series([99,112,70,-3,11], index = [1,3,5,7,9])
seri

In [24]:
seri = pd.Series([99,112,70,-3,11], index = ['a', 3, 'c', 7,9])
seri

a     99
3    112
c     70
7     -3
9     11
dtype: int64

In [25]:
seri['a']

99

In [27]:
seri['a':'c']

a     99
3    112
c     70
dtype: int64

## Sozluk uzerinde Liste Olusturmak

In [28]:
sozluk = {'reg':10, 'log':11, 'cart':12}

In [29]:
seri = pd.Series(sozluk)
seri

reg     10
log     11
cart    12
dtype: int64

In [31]:
seri['log':]

log     11
cart    12
dtype: int64

## Iki seriyi birlestirmek (concat)

In [39]:
seri

reg     10
log     11
cart    12
dtype: int64

In [42]:
seri2 = pd.Series({'reg':1999, 'log':77, 'cart':555})
seri2

reg     1999
log       77
cart     555
dtype: int64

In [43]:
pd.concat([seri, seri2])

reg       10
log       11
cart      12
reg     1999
log       77
cart     555
dtype: int64

In [44]:
pd.concat([seri, seri2], axis = 1)

Unnamed: 0,0,1
reg,10,1999
log,11,77
cart,12,555


In [45]:
#?pd.concat ====> Concatenate

# Eleman Birlestirme

In [49]:
a = np.array([1,2,33,444,75])
seri = pd.Series(a)
seri         

0      1
1      2
2     33
3    444
4     75
dtype: int64

In [51]:
seri[0]

1

In [52]:
seri[0:3]

0     1
1     2
2    33
dtype: int64

In [54]:
seri = pd.Series([1221, 200, 150, 99],
               index = ['reg', 'log', 'cart', 'rf'])

In [56]:
seri

reg     1221
log      200
cart     150
rf        99
dtype: int64

In [58]:
seri.index

Index(['reg', 'log', 'cart', 'rf'], dtype='object')

In [59]:
seri.axes

[Index(['reg', 'log', 'cart', 'rf'], dtype='object')]

In [60]:
seri.keys

<bound method Series.keys of reg     1221
log      200
cart     150
rf        99
dtype: int64>

In [61]:
list(seri.items())

[('reg', 1221), ('log', 200), ('cart', 150), ('rf', 99)]

In [66]:
seri2 = pd.Series([999, 222, 777, 888],
               index = ['reg', 'log', 'cart', 'rf'])

In [67]:
s1 = pd.concat([seri, seri2], axis = 1)
s1

Unnamed: 0,0,1
reg,1221,999
log,200,222
cart,150,777
rf,99,888


In [68]:
list(s1.items())

[(0,
  reg     1221
  log      200
  cart     150
  rf        99
  Name: 0, dtype: int64),
 (1,
  reg     999
  log     222
  cart    777
  rf      888
  Name: 1, dtype: int64)]

In [69]:
s1[0]

reg     1221
log      200
cart     150
rf        99
Name: 0, dtype: int64

In [73]:
s1[0][0] # s1[0,0] ====> hata verir !!!!!

1221

## Eleman Sorgulama

In [78]:
'reg' in seri

True

In [79]:
'a' in seri

False

In [80]:
seri['reg']

1221

In [81]:
seri[['reg', 'rf']]

reg    1221
rf       99
dtype: int64

In [82]:
## Eleman degistirme

In [83]:
seri['reg']

1221

In [85]:
seri['reg'] = 'Abdullah'
seri['reg']

'Abdullah'

# Pandas DataFrame Olusturma

In [86]:
l = [1,2,39,67,90]
l

[1, 2, 39, 67, 90]

In [89]:
pd.DataFrame(l, columns = ['degisken_ismi'])

Unnamed: 0,degisken_ismi
0,1
1,2
2,39
3,67
4,90


In [90]:
m = np.arange(1,10).reshape(3,3)
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [92]:
pd.DataFrame(m, columns = ['var1', 'var2', 'var3'])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


## df Isimlendirme

In [93]:
df = pd.DataFrame(m, columns = ['var1', 'var2', 'var3'])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [94]:
df.columns

Index(['var1', 'var2', 'var3'], dtype='object')

## Kolon Adi Degistirme

In [95]:
df.columns = ('deg1', 'deg2', 'deg3')
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [96]:
type(df)

pandas.core.frame.DataFrame

In [97]:
df.axes  # Satir ve Sutun bilgisine ulasilir

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [98]:
df.ndim

2

In [99]:
df.shape

(3, 3)

In [100]:
df.size # eleman sayisi

9

In [101]:
df.values #!!!!! Numpy arraya cevirip degerleri verdi

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [116]:
a = np.array([1,2,3,4,5])
b = pd.DataFrame(a, columns = ['deg1'])
b

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


In [119]:
b['deg1'][0]

1

# Eleman Islemleri

In [129]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [133]:
sozluk = {'var1':s1, 'var2':s2, 'var3':3}
sozluk

{'var1': array([6, 8, 0, 1, 2]), 'var2': array([5, 7, 3, 4, 7]), 'var3': 3}

In [138]:
df = pd.DataFrame(sozluk)
df

Unnamed: 0,var1,var2,var3
0,6,5,3
1,8,7,3
2,0,3,3
3,1,4,3
4,2,7,3


In [167]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,6,5,3


# Index Degistirmek

In [168]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [169]:
df.index = ['a', 'b', 'c', 'd', 'e']
df

Unnamed: 0,var1,var2,var3
a,6,5,3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [171]:
df['c':'e']

Unnamed: 0,var1,var2,var3
c,0,3,3
d,1,4,3
e,2,7,3


## Silme Islemi

df.drop(0, axis = 0 , inplace = True)

In [173]:
df.drop('a', axis = 0) # Dikkat kalici silmez !!!!!

Unnamed: 0,var1,var2,var3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [174]:
df # ana DataFrame ayni duruyor

Unnamed: 0,var1,var2,var3
a,6,5,3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [175]:
df.drop('a', axis = 0, inplace = True) 
df

Unnamed: 0,var1,var2,var3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [176]:
# Fancy ile Silme

In [177]:
l = ['c', 'd']
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,8,7,3
e,2,7,3


## in (DataFrame icinde mi ? Sorgulama)

In [178]:
'var1' in df

True

In [181]:
l = ['var1', 'xxx', 'var3']

In [182]:
for i in l:
    print(i in df)

True
False
True


## Yeni Sutun Eklemek ve Silmek (New Column added and deleted)

In [183]:
df

Unnamed: 0,var1,var2,var3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [185]:
df['var4'] = df['var1'] * df['var2']
df

Unnamed: 0,var1,var2,var3,var4
b,8,7,3,56
c,0,3,3,0
d,1,4,3,4
e,2,7,3,14


In [186]:
df.drop('var4', axis = 1)

Unnamed: 0,var1,var2,var3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [188]:
df.drop('var4', axis = 1, inplace = True)
df

Unnamed: 0,var1,var2,var3
b,8,7,3
c,0,3,3
d,1,4,3
e,2,7,3


In [189]:
l = ['var1', 'var2']
df.drop(l, axis = 1)

Unnamed: 0,var3
b,3
c,3
d,3
e,3


# Gozlem ve Degisken Secimi : loc & iloc

In [192]:
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m, columns = ['v1', 'v2', 'v3'])
df                

Unnamed: 0,v1,v2,v3
0,17,11,23
1,26,16,29
2,26,15,27
3,8,18,6
4,29,2,23
5,20,27,2
6,16,2,9
7,12,27,4
8,17,5,24
9,29,19,11


## loc : Tanimlandigi sekli ile secim yapar.

In [201]:
df.loc[0:3] # 3. Indeksi de alirrrrr

Unnamed: 0,v1,v2,v3
0,17,11,23
1,26,16,29
2,26,15,27
3,8,18,6


## iloc : alisik oldugumuz indeksleme mantigi ile secim yapar.

In [203]:
df.iloc[0:3]  # 3. indeks dahil degildir.

Unnamed: 0,v1,v2,v3
0,17,11,23
1,26,16,29
2,26,15,27


In [204]:
df.iloc[0,0]

17

In [206]:
# df.loc[0,0]   ======> Hata verir

In [207]:
df.iloc[:3,:2]

Unnamed: 0,v1,v2
0,17,11
1,26,16
2,26,15


In [210]:
# df.loc[:3,:2]    ======> Hata verir

In [211]:
df.loc[0:3, 'v3']

0    23
1    29
2    27
3     6
Name: v3, dtype: int64

In [213]:
#df.iloc[0:3, 'v3'] =======> Hata verir

In [216]:
df.iloc[0:3]['v3']

0    23
1    29
2    27
Name: v3, dtype: int64

# Kosullu Eleman Islmeleri

In [218]:
df

Unnamed: 0,v1,v2,v3
0,17,11,23
1,26,16,29
2,26,15,27
3,8,18,6
4,29,2,23
5,20,27,2
6,16,2,9
7,12,27,4
8,17,5,24
9,29,19,11


In [219]:
df['v1']

0    17
1    26
2    26
3     8
4    29
5    20
6    16
7    12
8    17
9    29
Name: v1, dtype: int64

In [223]:
df['v1'][0:3]

0    17
1    26
2    26
Name: v1, dtype: int64

In [224]:
df[0:3]

Unnamed: 0,v1,v2,v3
0,17,11,23
1,26,16,29
2,26,15,27


In [225]:
df[0:3]['v1']

0    17
1    26
2    26
Name: v1, dtype: int64

In [226]:
df[0:3][['v1', 'v2']] # Fancy islemi yapilir , degilse hata verir

Unnamed: 0,v1,v2
0,17,11
1,26,16
2,26,15


In [227]:
df.v1

0    17
1    26
2    26
3     8
4    29
5    20
6    16
7    12
8    17
9    29
Name: v1, dtype: int64

In [230]:
df[df.v1 > 20] # =====> Bu DataFrame in tamamidir, kosulu saglayanlardir

Unnamed: 0,v1,v2,v3
1,26,16,29
2,26,15,27
4,29,2,23
9,29,19,11


In [232]:
df[df.v1 > 20]['v1']  # Bu DataFrame in kosulu saglayanlarin v1 stunudur sadece

1    26
2    26
4    29
9    29
Name: v1, dtype: int64

In [233]:
df[(df.v1 > 20) & (df.v3 < 25)]

Unnamed: 0,v1,v2,v3
4,29,2,23
9,29,19,11


In [236]:
df[(df.v1 > 20) & (df.v3 < 25)][['v2', 'v3']]

Unnamed: 0,v2,v3
4,2,23
9,19,11


In [248]:
m = np.random.randint(1,30, size  = (5,3))
df1 = pd.DataFrame(m, columns = ['c1', 'c2', 'c3'])
df1

Unnamed: 0,c1,c2,c3
0,12,26,18
1,19,10,18
2,27,7,2
3,21,25,17
4,5,13,19


In [249]:
df2 = df1 + 99
df2

Unnamed: 0,c1,c2,c3
0,111,125,117
1,118,109,117
2,126,106,101
3,120,124,116
4,104,112,118


# pd.concat([df1,df2], ignore_index = False, join = 'outer')

In [251]:
pd.concat([df1,df2])

Unnamed: 0,c1,c2,c3
0,12,26,18
1,19,10,18
2,27,7,2
3,21,25,17
4,5,13,19
0,111,125,117
1,118,109,117
2,126,106,101
3,120,124,116
4,104,112,118


In [252]:
?pd.concat

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mobjs[0m[0;34m:[0m [0;34m'Iterable[NDFrame] | Mapping[Hashable, NDFrame]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mjoin[0m[0;34m=[0m[0;34m'outer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeys[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlevels[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnames[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverify_integrity[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msort[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy[0m[0;34m:[0m [0;34m'b

## ignore_index = True

In [253]:
pd.concat([df1,df2], ignore_index=False)

Unnamed: 0,c1,c2,c3
0,12,26,18
1,19,10,18
2,27,7,2
3,21,25,17
4,5,13,19
0,111,125,117
1,118,109,117
2,126,106,101
3,120,124,116
4,104,112,118


In [254]:
pd.concat([df1,df2], ignore_index=True)

Unnamed: 0,c1,c2,c3
0,12,26,18
1,19,10,18
2,27,7,2
3,21,25,17
4,5,13,19
5,111,125,117
6,118,109,117
7,126,106,101
8,120,124,116
9,104,112,118


In [256]:
df1.columns

Index(['c1', 'c2', 'c3'], dtype='object')

In [257]:
df2.columns

Index(['c1', 'c2', 'c3'], dtype='object')

In [259]:
df2.columns = ['c1', 'c2', 'deg3']
df2

Unnamed: 0,c1,c2,deg3
0,111,125,117
1,118,109,117
2,126,106,101
3,120,124,116
4,104,112,118


In [260]:
pd.concat([df1,df2])   #========> NaN hatasi olustu

Unnamed: 0,c1,c2,c3,deg3
0,12,26,18.0,
1,19,10,18.0,
2,27,7,2.0,
3,21,25,17.0,
4,5,13,19.0,
0,111,125,,117.0
1,118,109,,117.0
2,126,106,,101.0
3,120,124,,116.0
4,104,112,,118.0


## join = 'inner' , 'outer'

In [266]:
pd.concat([df1,df2], join = 'inner')

Unnamed: 0,c1,c2
0,12,26
1,19,10
2,27,7
3,21,25
4,5,13
0,111,125
1,118,109
2,126,106
3,120,124
4,104,112


In [267]:
pd.concat([df1,df2], join = 'outer')

Unnamed: 0,c1,c2,c3,deg3
0,12,26,18.0,
1,19,10,18.0,
2,27,7,2.0,
3,21,25,17.0,
4,5,13,19.0,
0,111,125,,117.0
1,118,109,,117.0
2,126,106,,101.0
3,120,124,,116.0
4,104,112,,118.0


In [265]:
pd.concat([df1,df2], join = 'inner', ignore_index=True)

Unnamed: 0,c1,c2
0,12,26
1,19,10
2,27,7
3,21,25
4,5,13
5,111,125
6,118,109
7,126,106
8,120,124
9,104,112


# Ileri Birlestirme Islmeleri

In [11]:
df3 = pd.DataFrame({'calisanlar' : ['Ali', 'Veli', 'Ayse', 'Fatma'],
                   'grup' : ['Muhasebe', 'Muh', 'Muh', 'IK']})
df3

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muh
2,Ayse,Muh
3,Fatma,IK


In [12]:
df4 = pd.DataFrame({'calisanlar' : ['Ali', 'Veli', 'Ayse', 'Fatma'],
                   'ilk_giris' : [2010, 2009, 2014, 2019]})
df4

Unnamed: 0,calisanlar,ilk_giris
0,Ali,2010
1,Veli,2009
2,Ayse,2014
3,Fatma,2019


In [13]:
pd.concat([df3,df4], axis = 1, )

Unnamed: 0,calisanlar,grup,calisanlar.1,ilk_giris
0,Ali,Muhasebe,Ali,2010
1,Veli,Muh,Veli,2009
2,Ayse,Muh,Ayse,2014
3,Fatma,IK,Fatma,2019


In [275]:
?pd.concat

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mobjs[0m[0;34m:[0m [0;34m'Iterable[NDFrame] | Mapping[Hashable, NDFrame]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mjoin[0m[0;34m=[0m[0;34m'outer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeys[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlevels[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnames[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverify_integrity[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msort[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy[0m[0;34m:[0m [0;34m'b

## pd._**merge()**_

In [34]:
df_m = pd.merge(df3,df4)
df_m

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muh,2009
2,Ayse,Muh,2014
3,Fatma,IK,2019


In [14]:
pd.merge(df3, df4, on = 'calisanlar')

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muh,2009
2,Ayse,Muh,2014
3,Fatma,IK,2019


In [18]:
?pd.merge

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mmerge[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mleft[0m[0;34m:[0m [0;34m'DataFrame | Series'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mright[0m[0;34m:[0m [0;34m'DataFrame | Series'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mhow[0m[0;34m:[0m [0;34m'str'[0m [0;34m=[0m [0;34m'inner'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mon[0m[0;34m:[0m [0;34m'IndexLabel | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mleft_on[0m[0;34m:[0m [0;34m'IndexLabel | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mright_on[0m[0;34m:[0m [0;34m'IndexLabel | None'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mleft_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mright_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m

In [22]:
pd.merge(df3, df4, how = 'right')

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muh,2009
2,Ayse,Muh,2014
3,Fatma,IK,2019


## coktan teke (many to one)

In [46]:
df5 = pd.DataFrame({'grup': ['Muhasebe', 'Muh', 'IK'],
                    'mudur': ['Caner', 'Mustafa', 'Berkcan']})
df5

Unnamed: 0,grup,mudur
0,Muhasebe,Caner
1,Muh,Mustafa
2,IK,Berkcan


In [47]:
df_m

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2010
1,Veli,Muh,2009
2,Ayse,Muh,2014
3,Fatma,IK,2019


In [49]:
pd.merge(df_m, df5)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2010,Caner
1,Veli,Muh,2009,Mustafa
2,Ayse,Muh,2014,Mustafa
3,Fatma,IK,2019,Berkcan


## Coktan Coka (many to many)

In [53]:
df6 = pd.DataFrame({'grup': ['Muhasebe', 'Muhasebe',
                              'Muh', 'Muh', 'IK', 'IK'],
                    'yetenekler': ['matematik', 'excel', 'kodlama', 'linux',
                               'excel', 'yonetim']})
df6

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muh,kodlama
3,Muh,linux
4,IK,excel
5,IK,yonetim


In [50]:
df3

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muh
2,Ayse,Muh
3,Fatma,IK


In [52]:
pd.merge(df3, df6)

Unnamed: 0,calisanlar,grup,yetenekler
0,Ali,Muhasebe,matematik
1,Ali,Muhasebe,excel
2,Veli,Muh,kodlama
3,Veli,Muh,linux
4,Ayse,Muh,kodlama
5,Ayse,Muh,linux
6,Fatma,IK,excel
7,Fatma,IK,yonetim


# Toplulastirma ve Gruplama (Aggregation & Grouping)

Basit toplulastirma fonksiyonlari

* count()         ------------------------ _**describe**_()
* first()         ------------------------ _**dropna**_()
* last()
* mean()
* median()
* min()
* max()
* std()
* var()
* sum()

In [55]:
import seaborn as sns

In [59]:
df = sns.load_dataset('planets')
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [60]:
?sns.load_dataset

[0;31mSignature:[0m [0msns[0m[0;34m.[0m[0mload_dataset[0m[0;34m([0m[0mname[0m[0;34m,[0m [0mcache[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m [0mdata_home[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0;34m**[0m[0mkws[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Load an example dataset from the online repository (requires internet).

This function provides quick access to a small number of example datasets
that are useful for documenting seaborn or generating reproducible examples
for bug reports. It is not necessary for normal usage.

Note that some of the datasets have a small amount of preprocessing applied
to define a proper ordering for categorical variables.

Use :func:`get_dataset_names` to see a list of available datasets.

Parameters
----------
name : str
    Name of the dataset (``{name}.csv`` on
    https://github.com/mwaskom/seaborn-data).
cache : boolean, optional
    If True, try to load from the local cache first, and save to the cache
 

In [61]:
df.shape # 1035 satir , 6 sutun var [1035 rows X 6 columns]

(1035, 6)

In [65]:
df.mean()

  df.mean()


number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [67]:
df['mass'].mean()

2.6381605847953233

In [69]:
df['mass'].count()

513

In [70]:
df['mass'].min()

0.0036

In [71]:
df['mass'].max()

25.0

In [72]:
df['mass'].sum()

1353.37638

In [81]:
df['mass'].std()

3.8186166509616046

In [82]:
df['mass'].var()

14.58183312700122

In [83]:
df.describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [84]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [85]:
df.dropna().describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


In [95]:
df.count()-df.dropna().count()

method            537
number            537
orbital_period    494
mass               15
distance          310
year              537
dtype: int64

# Gruplama Islemleri

In [99]:
df = pd.DataFrame({'gruplar' : ['A', 'B', 'C', 'A', 'B', 'C'],
                   'veri' : [10,11,52,23,43,55]}, columns = ['gruplar', 'veri'])
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,55


In [103]:
df1 = pd.DataFrame({'gruplar' : ['A', 'B', 'C', 'A', 'B', 'C'],
                   'veri' : [10,11,52,23,43,55]})
df1

Unnamed: 0,gruplar,veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,55


In [104]:
df.groupby('gruplar')  #=====> e ne yapayim diye soruyor

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7fbf84b1b520>

In [105]:
df.groupby('gruplar').mean()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,16.5
B,27.0
C,53.5


In [106]:
df.groupby('gruplar').sum()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,33
B,54
C,107


In [107]:
df = sns.load_dataset('planets')
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [110]:
df.groupby('method')['orbital_period'].mean() # Neye gore gruplanacak(method) ve Degisken secilir (orbital_period)

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [111]:
df.groupby('method')['orbital_period'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0


In [113]:
dfg = sns.load_dataset('iris')
dfg.head()           

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [119]:
dfg.groupby('species')[['sepal_length', 'sepal_width']].mean()

Unnamed: 0_level_0,sepal_length,sepal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1
setosa,5.006,3.428
versicolor,5.936,2.77
virginica,6.588,2.974


In [120]:
dfg.groupby('species')[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.428,1.462,0.246
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


In [122]:
dfg.groupby('species')[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']].std()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,0.35249,0.379064,0.173664,0.105386
versicolor,0.516171,0.313798,0.469911,0.197753
virginica,0.63588,0.322497,0.551895,0.27465


In [123]:
dfg.groupby('species').describe()

Unnamed: 0_level_0,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length,sepal_length,sepal_width,sepal_width,...,petal_length,petal_length,petal_width,petal_width,petal_width,petal_width,petal_width,petal_width,petal_width,petal_width
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
species,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
setosa,50.0,5.006,0.35249,4.3,4.8,5.0,5.2,5.8,50.0,3.428,...,1.575,1.9,50.0,0.246,0.105386,0.1,0.2,0.2,0.3,0.6
versicolor,50.0,5.936,0.516171,4.9,5.6,5.9,6.3,7.0,50.0,2.77,...,4.6,5.1,50.0,1.326,0.197753,1.0,1.2,1.3,1.5,1.8
virginica,50.0,6.588,0.63588,4.9,6.225,6.5,6.9,7.9,50.0,2.974,...,5.875,6.9,50.0,2.026,0.27465,1.4,1.8,2.0,2.3,2.5


# Ileri Toplulastirma islemleri (Aggregate, Filter, Transform, Apply)

In [125]:
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


## Aggregate

In [127]:
df.groupby('method')['orbital_period'].aggregate([min, 'max', np.median]) # ===> Pandas fonklari '' ve direkt calisir

Unnamed: 0_level_0,min,max,median
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Astrometry,246.36,1016.0,631.18
Eclipse Timing Variations,1916.25,10220.0,4343.5
Imaging,4639.15,730000.0,27500.0
Microlensing,1825.0,5100.0,3300.0
Orbital Brightness Modulation,0.240104,1.544929,0.342887
Pulsar Timing,0.090706,36525.0,66.5419
Pulsation Timing Variations,1170.0,1170.0,1170.0
Radial Velocity,0.73654,17337.5,360.2
Transit,0.355,331.60059,5.714932
Transit Timing Variations,22.3395,160.0,57.011


In [135]:
df.groupby('method')[['orbital_period', 'mass']].aggregate(['mean', 'median', 'sum' ])

Unnamed: 0_level_0,orbital_period,orbital_period,orbital_period,mass,mass,mass
Unnamed: 0_level_1,mean,median,sum,mean,median,sum
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Astrometry,631.18,631.18,1262.36,,,0.0
Eclipse Timing Variations,4751.644444,4343.5,42764.8,5.125,5.125,10.25
Imaging,118247.7375,27500.0,1418973.0,,,0.0
Microlensing,3153.571429,3300.0,22075.0,,,0.0
Orbital Brightness Modulation,0.709307,0.342887,2.12792,,,0.0
Pulsar Timing,7343.021201,66.5419,36715.11,,,0.0
Pulsation Timing Variations,1170.0,1170.0,1170.0,,,0.0
Radial Velocity,823.35468,360.2,455315.1,2.630699,1.26,1341.65638
Transit,21.102073,5.714932,8377.523,1.47,1.47,1.47
Transit Timing Variations,79.7835,57.011,239.3505,,,0.0


In [139]:
df.groupby('method')[['orbital_period', 'mass']].aggregate(['describe', min])

Unnamed: 0_level_0,orbital_period,orbital_period,orbital_period,orbital_period,orbital_period,orbital_period,orbital_period,orbital_period,orbital_period,mass,mass,mass,mass,mass,mass,mass,mass,mass
Unnamed: 0_level_1,describe,describe,describe,describe,describe,describe,describe,describe,min,describe,describe,describe,describe,describe,describe,describe,describe,min
Unnamed: 0_level_2,count,mean,std,min,25%,50%,75%,max,orbital_period,count,mean,std,min,25%,50%,75%,max,mass
method,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0,246.36,0.0,,,,,,,,
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0,1916.25,2.0,5.125,1.308148,4.2,4.6625,5.125,5.5875,6.05,4.2
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0,4639.15,0.0,,,,,,,,
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0,1825.0,0.0,,,,,,,,
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929,0.240104,0.0,,,,,,,,
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0,0.090706,0.0,,,,,,,,
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0,1170.0,0.0,,,,,,,,
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5,0.73654,510.0,2.630699,3.825883,0.0036,0.22525,1.26,3.0,25.0,0.0036
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059,0.355,1.0,1.47,,1.47,1.47,1.47,1.47,1.47,1.47
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0,22.3395,0.0,,,,,,,,


In [142]:
df.groupby('method').aggregate({'orbital_period' :[min, max] ,
                                'distance' : ['std', np.median]} )

Unnamed: 0_level_0,orbital_period,orbital_period,distance,distance
Unnamed: 0_level_1,min,max,std,median
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Astrometry,246.36,1016.0,4.094148,17.875
Eclipse Timing Variations,1916.25,10220.0,213.203907,315.36
Imaging,4639.15,730000.0,53.736817,40.395
Microlensing,1825.0,5100.0,2076.611556,3840.0
Orbital Brightness Modulation,0.240104,1.544929,0.0,1180.0
Pulsar Timing,0.090706,36525.0,,1200.0
Pulsation Timing Variations,1170.0,1170.0,,
Radial Velocity,0.73654,17337.5,45.559381,40.445
Transit,0.355,331.60059,913.87699,341.0
Transit Timing Variations,22.3395,160.0,915.819487,855.0
