# Pandas Serisi Oluşturmak

In [1]:
import pandas as pd

In [3]:
pd.Series([1,2,3,4,5])
#ilk sütun index bilgisi taşır

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
seri = pd.Series([1,2,3,4,5])

In [5]:
type(seri)

pandas.core.series.Series

In [6]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [7]:
seri.dtype

dtype('int64')

In [8]:
seri.size

5

In [9]:
seri.ndim

1

In [10]:
seri.shape

(5,)

In [12]:
seri.values
#array formunda sadece degerlere erisebiliyoruz

array([1, 2, 3, 4, 5])

In [14]:
seri.head()
#argüman vermedigimizde ilk 5 elemanı getirir.

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [19]:
s = pd.Series([99,22,33,44,55,66] , index = ["a","b","c","d","e","f"])
#indexlere kendimiz isim veriyoruz

In [20]:
s["a"]

99

In [21]:
s["a":"c"]

a    99
b    22
c    33
dtype: int64

In [22]:
#sozluk üzerinden liste olusturmak

In [23]:
sozluk = pd.Series({"reg":10,"log":11, "cart":12})

In [24]:
sozluk

reg     10
log     11
cart    12
dtype: int64

In [25]:
type(sozluk)

pandas.core.series.Series

In [26]:
#iki seriyi birlestirerek seri olusturma

In [30]:
pd.concat([seri,s])

0     1
1     2
2     3
3     4
4     5
a    99
b    22
c    33
d    44
e    55
f    66
dtype: int64

# Eleman İşlemleri

In [31]:
import numpy as np
a = np.array([11,22,33,44,55])

In [32]:
seri = pd.Series(a)
seri

0    11
1    22
2    33
3    44
4    55
dtype: int64

In [33]:
seri[0]

11

In [36]:
seri[0:3]

0    11
1    22
2    33
dtype: int64

In [37]:
seri[::2]

0    11
2    33
4    55
dtype: int64

In [41]:
seri = pd.Series([12,14,15,16,17,18], index = ["reg","loj","a","b","c","d"])

In [43]:
seri

reg    12
loj    14
a      15
b      16
c      17
d      18
dtype: int64

In [44]:
seri.index

Index(['reg', 'loj', 'a', 'b', 'c', 'd'], dtype='object')

In [45]:
seri.keys

<bound method Series.keys of reg    12
loj    14
a      15
b      16
c      17
d      18
dtype: int64>

In [46]:
list(seri.items())

[('reg', 12), ('loj', 14), ('a', 15), ('b', 16), ('c', 17), ('d', 18)]

In [47]:
seri.values

array([12, 14, 15, 16, 17, 18])

In [50]:
"reg" in seri
#serinin icinde bu eleman var mı seklinde true false döndürür

True

In [51]:
seri["reg"]

12

In [52]:
#fancy eleman

In [53]:
seri[["a","reg"]]

a      15
reg    12
dtype: int64

In [56]:
seri["reg"] = 130
#elemanın degerini degistirebiliyoruz

In [57]:
seri

reg    130
loj     14
a       15
b       16
c       17
d       18
dtype: int64

In [60]:
seri["reg":"d"]

reg    130
loj     14
a       15
b       16
c       17
d       18
dtype: int64

# Pandas DataFrame Oluşturmak

In [61]:
#excel veri yapisina benzerdir.
#numpy sabit veri tipliydi, kategorik ve sürekli degiskenlerde basarili degil
#veri analizi alanında dataframelere ihtiyac duyariz
#makine ögrenmesine verecegimiz veri setleri olarak düsünebiliriz

In [62]:
import pandas as pd
liste = [12,24,34,56]

In [65]:
pd.DataFrame(liste , columns = ["degisken_isimleri"])
#columns ile isimlendirebiliyoruz

Unnamed: 0,degisken_isimleri
0,12
1,24
2,34
3,56


In [68]:
import numpy as np
m = np.arange(1,10).reshape((3,3))

In [69]:
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [70]:
pd.DataFrame(m, columns = ["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [71]:
#df isimlendirme

In [73]:
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [75]:
df.columns = ("deg1","deg2","deg3")

In [76]:
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [77]:
type(df)

pandas.core.frame.DataFrame

In [78]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [79]:
df.shape

(3, 3)

In [80]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [81]:
df.ndim

2

In [82]:
type(df.values)

numpy.ndarray

In [83]:
df.size

9

In [84]:
a = np.array([1,2,3,4,5])

In [86]:
pd.DataFrame(a)

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


# Eleman İşlemleri

In [87]:
import numpy as np
import pandas as pd
s1 = np.random.randint(10,size = 5)
s2 = np.random.randint(10,size = 5)
s3 = np.random.randint(10,size = 5)

In [92]:
sozluk = {"var1":s1, "var2":s2, "var3":s3}
#arrayleri sozluge alıp, dataframe ceviriyoruz

In [89]:
sozluk

{'var1': array([1, 5, 9, 3, 9]),
 'var2': array([1, 7, 9, 2, 6]),
 'var3': array([8, 7, 8, 1, 8])}

In [115]:
df = pd.DataFrame(sozluk)

In [91]:
df

Unnamed: 0,var1,var2,var3
0,1,1,8
1,5,7,7
2,9,9,8
3,3,2,1
4,9,6,8


In [93]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,1,1,8


In [96]:
df.index = ["a","b","c","d","e"]

In [97]:
df[0:2]

Unnamed: 0,var1,var2,var3
a,1,1,8
b,5,7,7


In [99]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,9,9,8
d,3,2,1
e,9,6,8


In [100]:
#silme 

In [106]:
df.drop("b" , axis = 0)

Unnamed: 0,var1,var2,var3
c,9,9,8
d,3,2,1
e,9,6,8


In [108]:
df.drop("b" , axis = 0, inplace = True)
#kalıcı olarak sildi

In [109]:
df

Unnamed: 0,var1,var2,var3
c,9,9,8
d,3,2,1
e,9,6,8


In [110]:
#fancy

In [111]:
l = ["c","e"]

In [112]:
df.drop(l,axis = 0)

Unnamed: 0,var1,var2,var3
d,3,2,1


In [113]:
df

Unnamed: 0,var1,var2,var3
c,9,9,8
d,3,2,1
e,9,6,8


In [114]:
#degiskenler icin

In [116]:
df

Unnamed: 0,var1,var2,var3
0,1,1,8
1,5,7,7
2,9,9,8
3,3,2,1
4,9,6,8


In [117]:
"var1" in df

True

In [118]:
l = ["var1","var3","var6"]

In [119]:
for i in l:
    print(i in df)

True
True
False


In [120]:
df

Unnamed: 0,var1,var2,var3
0,1,1,8
1,5,7,7
2,9,9,8
3,3,2,1
4,9,6,8


In [123]:
df["var4"] = df["var1"]*df["var2"]

In [124]:
df

Unnamed: 0,var1,var2,var3,var4
0,1,1,8,1
1,5,7,7,35
2,9,9,8,81
3,3,2,1,6
4,9,6,8,54


In [125]:
#degisken silmek

In [127]:
df.drop("var4", axis = 1)

Unnamed: 0,var1,var2,var3
0,1,1,8
1,5,7,7
2,9,9,8
3,3,2,1
4,9,6,8


# Gözlem ve Değişken Seçimi
## loc & iloc

In [129]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m,columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,13,1,13
1,27,20,18
2,22,11,4
3,3,22,11
4,27,4,2
5,15,3,15
6,5,16,23
7,10,21,16
8,3,14,11
9,9,16,21


In [132]:
#loc: tanımlandıgı şekli ile seçim yapmak için kullanılır
#indexlemeye sadık kalır,son yazılan indexi dahil eder.

In [133]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,13,1,13
1,27,20,18
2,22,11,4
3,3,22,11


In [136]:
#iloc: alısık oldugumuz indexleme mantigi ile seçim yapar

In [135]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,13,1,13
1,27,20,18
2,22,11,4


In [137]:
df.iloc[0,0]

13

In [138]:
df.iloc[:3,:2]

Unnamed: 0,var1,var2
0,13,1
1,27,20
2,22,11


In [140]:
df.loc[0:3,"var3"]

0    13
1    18
2     4
3    11
Name: var3, dtype: int64

In [143]:
df.iloc[0:3,"var3"]
#hata verecektir
#mutlak bir şekilde deger işaretlemesi(index,degisken) yapacaksak loc kullanmamız gerekecek

ValueError: Location based indexing can only have [integer, integer slice (START point is INCLUDED, END point is EXCLUDED), listlike of integers, boolean array] types

# Koşullu Eleman İşlemleri

In [1]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m,columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,26,14,27
1,12,12,5
2,20,14,19
3,7,23,10
4,16,28,13
5,10,13,24
6,26,17,6
7,2,9,8
8,7,4,4
9,20,15,7


In [2]:
df["var1"]

0    26
1    12
2    20
3     7
4    16
5    10
6    26
7     2
8     7
9    20
Name: var1, dtype: int64

In [3]:
df["var1"][0:2]

0    26
1    12
Name: var1, dtype: int64

In [4]:
df[0:2]["var3"]

0    27
1     5
Name: var3, dtype: int64

In [5]:
df[0:2][["var1","var2"]]

Unnamed: 0,var1,var2
0,26,14
1,12,12


In [8]:
df[df.var1 > 15]["var1"]

0    26
2    20
4    16
6    26
9    20
Name: var1, dtype: int64

In [11]:
df[(df.var1 > 15) & (df.var3 < 3)]

Unnamed: 0,var1,var2,var3


In [12]:
df.loc[(df.var1 > 15), ["var1","var2"]]

Unnamed: 0,var1,var2
0,26,14
2,20,14
4,16,28
6,26,17
9,20,15


# Birleştirme (Join) İşlemleri

In [15]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (5,3))
df1 = pd.DataFrame(m,columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,27,2,14
1,10,12,8
2,28,7,3
3,3,3,22
4,28,22,23


In [16]:
df2 = df1 + 99

In [17]:
df2

Unnamed: 0,var1,var2,var3
0,126,101,113
1,109,111,107
2,127,106,102
3,102,102,121
4,127,121,122


In [18]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3
0,27,2,14
1,10,12,8
2,28,7,3
3,3,3,22
4,28,22,23
0,126,101,113
1,109,111,107
2,127,106,102
3,102,102,121
4,127,121,122


In [22]:
pd.concat([df1,df2],ignore_index = True )
#indexleri sıralamak için.

Unnamed: 0,var1,var2,var3
0,27,2,14
1,10,12,8
2,28,7,3
3,3,3,22
4,28,22,23
5,126,101,113
6,109,111,107
7,127,106,102
8,102,102,121
9,127,121,122


In [24]:
df1.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [25]:
df2.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [27]:
df2.columns = ["var1","var2","deg3"]

In [28]:
df2

Unnamed: 0,var1,var2,deg3
0,126,101,113
1,109,111,107
2,127,106,102
3,102,102,121
4,127,121,122


In [29]:
df1

Unnamed: 0,var1,var2,var3
0,27,2,14
1,10,12,8
2,28,7,3
3,3,3,22
4,28,22,23


In [31]:
pd.concat([df1,df2])
#ozellik uyarısı
#deg3 iki değişkende de olmadıgı için hata üretti.

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,deg3,var1,var2,var3
0,,27,2,14.0
1,,10,12,8.0
2,,28,7,3.0
3,,3,3,22.0
4,,28,22,23.0
0,113.0,126,101,
1,107.0,109,111,
2,102.0,127,106,
3,121.0,102,102,
4,122.0,127,121,


In [33]:
pd.concat([df1,df2], join = "inner")
#kesişimlere göre birleştirdi.

Unnamed: 0,var1,var2
0,27,2
1,10,12
2,28,7
3,3,3
4,28,22
0,126,101
1,109,111
2,127,106
3,102,102
4,127,121


In [34]:
pd.concat([df1,df2], join_axes = [df1.columns])

  """Entry point for launching an IPython kernel.


Unnamed: 0,var1,var2,var3
0,27,2,14.0
1,10,12,8.0
2,28,7,3.0
3,3,3,22.0
4,28,22,23.0
0,126,101,
1,109,111,
2,127,106,
3,102,102,
4,127,121,


# İleri Birleştirme İşlemleri

In [35]:
import pandas as pd

In [36]:
#birebir birleştirme

In [39]:
df1 = pd.DataFrame({'calisanlar': ["Ali","Ahmet","Mehmet","Ayşe","Fatma"],
                   'grup': ["Yönetim","Muhasebe","IK","Muhasebe","IK"]})
df1

Unnamed: 0,calisanlar,grup
0,Ali,Yönetim
1,Ahmet,Muhasebe
2,Mehmet,IK
3,Ayşe,Muhasebe
4,Fatma,IK


In [40]:
df2 = pd.DataFrame({'calisanlar': ["Ali","Ahmet","Mehmet","Ayşe","Fatma"],
                   'ilk_giris': [2010,2009,2012,2014,2007]})
df2

Unnamed: 0,calisanlar,ilk_giris
0,Ali,2010
1,Ahmet,2009
2,Mehmet,2012
3,Ayşe,2014
4,Fatma,2007


In [42]:
pd.merge(df1,df2)
#birebir birlestirme

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Yönetim,2010
1,Ahmet,Muhasebe,2009
2,Mehmet,IK,2012
3,Ayşe,Muhasebe,2014
4,Fatma,IK,2007


In [46]:
pd.merge(df1,df2, on = 'calisanlar')

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Yönetim,2010
1,Ahmet,Muhasebe,2009
2,Mehmet,IK,2012
3,Ayşe,Muhasebe,2014
4,Fatma,IK,2007


In [47]:
#çoktan teke

In [48]:
df3 = pd.merge(df1,df2)

In [49]:
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Yönetim,2010
1,Ahmet,Muhasebe,2009
2,Mehmet,IK,2012
3,Ayşe,Muhasebe,2014
4,Fatma,IK,2007


In [53]:
df4 = pd.DataFrame({'mudur': ["Caner","Ahmet","Emre"],
                   'grup': ["Yönetim","Muhasebe","IK"]})
df4

Unnamed: 0,mudur,grup
0,Caner,Yönetim
1,Ahmet,Muhasebe
2,Emre,IK


In [55]:
pd.merge(df3,df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Yönetim,2010,Caner
1,Ahmet,Muhasebe,2009,Ahmet
2,Ayşe,Muhasebe,2014,Ahmet
3,Mehmet,IK,2012,Emre
4,Fatma,IK,2007,Emre


In [56]:
#çoktan çoka

In [58]:
df5 = pd.DataFrame({'grup': ["Yönetim","Muhasebe","IK","Yönetim","Muhasebe","IK"],
                   'yetenek': ["Excel","Matematik","Kodlama","Linux","Excel","Matematik"]})
df5
#gruplarda birçok özellik var

Unnamed: 0,grup,yetenek
0,Yönetim,Excel
1,Muhasebe,Matematik
2,IK,Kodlama
3,Yönetim,Linux
4,Muhasebe,Excel
5,IK,Matematik


In [60]:
pd.merge(df1,df5)
#çalışanlar çokladı birden fazla kez görüntülendi.

Unnamed: 0,calisanlar,grup,yetenek
0,Ali,Yönetim,Excel
1,Ali,Yönetim,Linux
2,Ahmet,Muhasebe,Matematik
3,Ahmet,Muhasebe,Excel
4,Ayşe,Muhasebe,Matematik
5,Ayşe,Muhasebe,Excel
6,Mehmet,IK,Kodlama
7,Mehmet,IK,Matematik
8,Fatma,IK,Kodlama
9,Fatma,IK,Matematik


# Toplulaştırma ve Gruplama (Aggregation & Grouping)

Basit Toplulaştırma Fonksiyonları:
* count()
* first()
* last()
* mean()
* median()
* min()
* max()
* std()
* var()
* sum()

In [61]:
import seaborn as sns
import pandas as pd
import numpy as np

In [62]:
df = sns.load_dataset("planets")
df

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [63]:
df.shape

(1035, 6)

In [65]:
df.mean()

number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [66]:
df["mass"].mean()

2.6381605847953216

In [68]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


# Gruplama İşlemleri

In [71]:
df = pd.DataFrame({'gruplar': ["A","B","C","A","B","C"],
                  'veri': [10,22,33,44,55,66]}, columns=['gruplar','veri'])
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,22
2,C,33
3,A,44
4,B,55
5,C,66


In [74]:
df.groupby("gruplar")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f9d5d2be590>

In [75]:
df.groupby("gruplar").mean()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,27.0
B,38.5
C,49.5


In [76]:
df.groupby("gruplar").sum()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,54
B,77
C,99


In [77]:
df = sns.load_dataset("planets")
df

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [79]:
df.groupby('method')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f9d5d2abc50>

In [80]:
df.groupby("method")["orbital_period"].mean()

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [81]:
#groupby işlemi için kategorik bir degiskene ihtiyac duyarız

# İleri Toplulaştırma İşlemleri (Aggregate, filter, transform, apply)

In [83]:
import pandas as pd
df = pd.DataFrame({'gruplar': ["A","B","C","A","B","C"],
                  'degisken1': [10,23,33,22,11,99],
                  'degisken2': [100,233,455,234,123,324]},
                  columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,233
2,C,33,455
3,A,22,234
4,B,11,123
5,C,99,324


In [84]:
#aggregate

In [86]:
df.groupby("gruplar").mean()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,16.0,167.0
B,17.0,178.0
C,66.0,389.5


In [89]:
df.groupby("gruplar").aggregate(["min",np.median,"max"])
#np.median dışardan bir fonksiyon oldugu icin tırnak icinde yazamayız

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16,22,100,167.0,234
B,11,17,23,123,178.0,233
C,33,66,99,324,389.5,455


In [91]:
df.groupby("gruplar").aggregate({"degisken1": "min", "degisken2": "max"},columns=["degiksen1","degisken2"])

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,234
B,11,233
C,33,455


In [92]:
#filter

In [93]:
import pandas as pd
df = pd.DataFrame({'gruplar': ["A","B","C","A","B","C"],
                  'degisken1': [10,23,33,22,11,99],
                  'degisken2': [100,233,455,234,123,324]},
                  columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,233
2,C,33,455
3,A,22,234
4,B,11,123
5,C,99,324


In [94]:
def filter_func(x):
    return x["degisken1"].std() > 9

In [96]:
df.groupby("gruplar").std()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,8.485281,94.752309
B,8.485281,77.781746
C,46.669048,92.630988


In [95]:
df.groupby("gruplar").filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
2,C,33,455
5,C,99,324


In [97]:
#transform

In [98]:
import pandas as pd
df = pd.DataFrame({'gruplar': ["A","B","C","A","B","C"],
                  'degisken1': [10,23,33,22,11,99],
                  'degisken2': [100,233,455,234,123,324]},
                  columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,233
2,C,33,455
3,A,22,234
4,B,11,123
5,C,99,324


In [99]:
#degiskenleri dönüstürmek istiyoruz

In [100]:
df["degisken1"]*9

0     90
1    207
2    297
3    198
4     99
5    891
Name: degisken1, dtype: int64

In [104]:
df_a = df.iloc[:,1:3]

In [108]:
df_a.transform(lambda x: (x-x.mean()) / x.std())
#transform bizim istedigimiz bir fonksiyonu degiskenlerin üzerinde calıstırır

Unnamed: 0,degisken1,degisken2
0,-0.687871,-1.101053
1,-0.299074,-0.089959
2,0.0,1.597731
3,-0.328982,-0.082357
4,-0.657963,-0.926202
5,1.97389,0.601841


In [109]:
#apply

In [111]:
import pandas as pd
df = pd.DataFrame({'gruplar': ["A","B","C","A","B","C"],
                  'degisken1': [10,23,33,22,11,99],
                  'degisken2': [100,233,455,234,123,324]},
                  columns = ["degisken1","degisken2"])
df

Unnamed: 0,degisken1,degisken2
0,10,100
1,23,233
2,33,455
3,22,234
4,11,123
5,99,324


# Pivot Tablolar

In [1]:
import seaborn as sns
import pandas as pd
titanic = sns.load_dataset("titanic")
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [2]:
titanic.groupby("sex")["survived"].mean()

sex
female    0.742038
male      0.188908
Name: survived, dtype: float64

In [3]:
titanic.groupby("sex")[["survived"]].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [6]:
titanic.groupby(["sex","class"])[["survived"]].aggregate("mean").unstack()
#pivot table

Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [7]:
#pivot ile pivot table

In [8]:
titanic.pivot_table("survived", index = "sex", columns = "class")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [9]:
titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [10]:
age = pd.cut(titanic["age"], [0,18,90])
age.head()

0    (18, 90]
1    (18, 90]
2    (18, 90]
3    (18, 90]
4    (18, 90]
Name: age, dtype: category
Categories (2, interval[int64]): [(0, 18] < (18, 90]]

In [11]:
titanic.pivot_table("survived", ["sex",age], "class")

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663
