# Pandas Serisi Oluşturmak

In [2]:
import pandas as pd

In [6]:
pd.Series([17,602,3,4,5])

0     17
1    602
2      3
3      4
4      5
dtype: int64

In [10]:
seri = pd.Series([17,602,3,4,5])

In [12]:
type(seri)

pandas.core.series.Series

In [14]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [16]:
seri.dtype

dtype('int64')

In [18]:
seri.size

5

In [20]:
seri.ndim

1

In [22]:
seri.values

array([ 17, 602,   3,   4,   5], dtype=int64)

In [26]:
seri.head(3)

0     17
1    602
2      3
dtype: int64

In [28]:
pd.Series([5,4,3,2,1])

0    5
1    4
2    3
3    2
4    1
dtype: int64

In [30]:
pd.Series([5,4,3,2,1], index = [1,3,5,7,9])

1    5
3    4
5    3
7    2
9    1
dtype: int64

In [54]:
seri = pd.Series([5,4,3,2,1], index = ["a", "b", "c", "d", "e"])
seri

a    5
b    4
c    3
d    2
e    1
dtype: int64

In [56]:
seri["a"]

5

In [58]:
seri["a": "c"]

a    5
b    4
c    3
dtype: int64

In [60]:
sozluk = pd.Series({"reg": 10, "log": 11, "cart": 12})

In [62]:
sozluk

reg     10
log     11
cart    12
dtype: int64

In [64]:
sozluk = {"reg": 10, "log": 11, "cart": 12}

In [66]:
seri = pd.Series(sozluk)
seri

reg     10
log     11
cart    12
dtype: int64

In [68]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

# Eleman İşlemleri

In [71]:
import numpy as np
import pandas as pd
a = np.array([1,2,33,44,75])
seri = pd.Series(a)
seri

0     1
1     2
2    33
3    44
4    75
dtype: int32

In [73]:
seri[0]

1

In [75]:
seri[0:3]

0     1
1     2
2    33
dtype: int32

In [79]:
seri = pd.Series([121,200,150,99], index = ["reg", "loj", "cart", "rf"])
seri

reg     121
loj     200
cart    150
rf       99
dtype: int64

In [81]:
seri.index

Index(['reg', 'loj', 'cart', 'rf'], dtype='object')

In [85]:
seri.keys

<bound method Series.keys of reg     121
loj     200
cart    150
rf       99
dtype: int64>

In [87]:
list(seri.items())

[('reg', 121), ('loj', 200), ('cart', 150), ('rf', 99)]

In [91]:
seri.values

array([121, 200, 150,  99], dtype=int64)

In [93]:
"reg" in seri

True

In [95]:
"a" in seri

False

In [97]:
seri["reg"]

121

In [99]:
seri[["rf", "reg"]]

rf      99
reg    121
dtype: int64

In [101]:
seri["reg"] = 155

In [103]:
seri

reg     155
loj     200
cart    150
rf       99
dtype: int64

In [105]:
seri["reg" : "loj"]

reg    155
loj    200
dtype: int64

# Pandas DataFrame Oluşturma

In [108]:
import pandas as pd

In [110]:
l = [1,2,39,67,90]

In [112]:
l

[1, 2, 39, 67, 90]

In [114]:
pd.DataFrame(l, columns = ["degiken_ismi"])

Unnamed: 0,degiken_ismi
0,1
1,2
2,39
3,67
4,90


In [116]:
import numpy as np
m = np.arange(1,10).reshape((3,3))
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [120]:
pd.DataFrame(m,columns = ["var1", "var2", "var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [122]:
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [128]:
df.columns = ("deg1", "deg2", "deg3")

In [130]:
df

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [132]:
type(df)

pandas.core.frame.DataFrame

In [134]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [136]:
df.shape

(3, 3)

In [138]:
df.ndim

2

In [140]:
df.size

9

In [146]:
type(df.values)

numpy.ndarray

In [148]:
df.tail()

Unnamed: 0,deg1,deg2,deg3
0,1,2,3
1,4,5,6
2,7,8,9


In [150]:
a = np.array([1,2,3,4,5])

In [152]:
pd.DataFrame(a, columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


# Eleman İşlemleri

In [73]:
import numpy as np
import pandas as pd
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [75]:
sozluk = {"var1" : s1, "var2" : s2, "var3": s3}

In [77]:
sozluk

{'var1': array([4, 7, 0, 4, 1]),
 'var2': array([6, 0, 3, 1, 4]),
 'var3': array([7, 9, 0, 8, 1])}

In [79]:
df = pd.DataFrame(sozluk)

In [81]:
df

Unnamed: 0,var1,var2,var3
0,4,6,7
1,7,0,9
2,0,3,0
3,4,1,8
4,1,4,1


In [83]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,4,6,7


In [85]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [87]:
df.index = ["a", "b", "c", "d", "e"]

In [89]:
df

Unnamed: 0,var1,var2,var3
a,4,6,7
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [91]:
df["c":"e"]

Unnamed: 0,var1,var2,var3
c,0,3,0
d,4,1,8
e,1,4,1


In [93]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [95]:
df

Unnamed: 0,var1,var2,var3
a,4,6,7
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [97]:
df.drop("a", axis = 0, inplace = True)

In [99]:
df

Unnamed: 0,var1,var2,var3
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [101]:
l = ["c", "e"]

In [103]:
df.drop(l,axis = 0)

Unnamed: 0,var1,var2,var3
b,7,0,9
d,4,1,8


In [107]:
df

Unnamed: 0,var1,var2,var3
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [109]:
"var1" in df

True

In [111]:
l = ["var1", "var4", "var2"]

In [113]:
for i in l:
    print(i in df)

True
False
True


In [117]:
df["var1"]

b    7
c    0
d    4
e    1
Name: var1, dtype: int32

In [119]:
df["var4"] = df["var1"] / df["var2"]

In [123]:
df

Unnamed: 0,var1,var2,var3,var4
b,7,0,9,inf
c,0,3,0,0.0
d,4,1,8,4.0
e,1,4,1,0.25


In [125]:
df.drop("var4", axis = 1)

Unnamed: 0,var1,var2,var3
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [127]:
df

Unnamed: 0,var1,var2,var3,var4
b,7,0,9,inf
c,0,3,0,0.0
d,4,1,8,4.0
e,1,4,1,0.25


In [129]:
df.drop("var4", axis = 1, inplace = True)

In [131]:
df

Unnamed: 0,var1,var2,var3
b,7,0,9
c,0,3,0
d,4,1,8
e,1,4,1


In [135]:
l = ["var1", "var2"]

In [137]:
df.drop(l, axis = 1)

Unnamed: 0,var3
b,9
c,0
d,8
e,1


# Gözlem ve Değişken Seçimi: loc & iloc

In [143]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df

Unnamed: 0,var1,var2,var3
0,8,19,18
1,24,26,16
2,24,9,2
3,7,7,12
4,11,29,25
5,20,5,16
6,25,8,4
7,18,23,19
8,1,21,26
9,13,20,5


In [147]:
 # ?np.random.randint

In [149]:
#loc: tanımlandığı şekli ile seçim yapmak için kullanılır.

In [151]:
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,8,19,18
1,24,26,16
2,24,9,2
3,7,7,12


In [153]:
df

Unnamed: 0,var1,var2,var3
0,8,19,18
1,24,26,16
2,24,9,2
3,7,7,12
4,11,29,25
5,20,5,16
6,25,8,4
7,18,23,19
8,1,21,26
9,13,20,5


In [155]:
#iloc: alışık olduğumuz indeksleme mantığı ile seçim yapar.

In [157]:
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,8,19,18
1,24,26,16
2,24,9,2


In [159]:
df.iloc[0,0]

8

In [161]:
df.iloc[:3,:2]

Unnamed: 0,var1,var2
0,8,19
1,24,26
2,24,9


In [163]:
df.loc[0:3, "var3"]

0    18
1    16
2     2
3    12
Name: var3, dtype: int32

In [165]:
df.iloc[0:3]["var3"]

0    18
1    16
2     2
Name: var3, dtype: int32

# Koşullu Eleman  İşlemleri

In [168]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (10,3))
df = pd.DataFrame(m, columns = ["var1", "var2", "var3"])
df

Unnamed: 0,var1,var2,var3
0,25,2,8
1,22,4,2
2,7,2,19
3,13,3,10
4,8,26,15
5,28,28,4
6,26,12,26
7,20,16,6
8,1,4,16
9,16,24,11


In [170]:
df["var1"]

0    25
1    22
2     7
3    13
4     8
5    28
6    26
7    20
8     1
9    16
Name: var1, dtype: int32

In [172]:
df[0:2][["var1", "var2"]]

Unnamed: 0,var1,var2
0,25,2
1,22,4


In [174]:
df

Unnamed: 0,var1,var2,var3
0,25,2,8
1,22,4,2
2,7,2,19
3,13,3,10
4,8,26,15
5,28,28,4
6,26,12,26
7,20,16,6
8,1,4,16
9,16,24,11


In [176]:
df.var1

0    25
1    22
2     7
3    13
4     8
5    28
6    26
7    20
8     1
9    16
Name: var1, dtype: int32

In [182]:
df[df.var1 > 15]

Unnamed: 0,var1,var2,var3
0,25,2,8
1,22,4,2
5,28,28,4
6,26,12,26
7,20,16,6
9,16,24,11


In [184]:
df[df.var1 > 15]["var1"]

0    25
1    22
5    28
6    26
7    20
9    16
Name: var1, dtype: int32

In [186]:
df[(df.var1 > 15) & (df.var3 < 5)]

Unnamed: 0,var1,var2,var3
1,22,4,2
5,28,28,4


In [190]:
df.loc[(df.var1 > 15), ["var1", "var2"]]

Unnamed: 0,var1,var2
0,25,2
1,22,4
5,28,28
6,26,12
7,20,16
9,16,24


In [194]:
df[(df.var1 > 15)][["var1", "var2"]]

Unnamed: 0,var1,var2
0,25,2
1,22,4
5,28,28
6,26,12
7,20,16
9,16,24


# Birleştirme (Join) İşlemleri

In [5]:
import numpy as np
import pandas as pd
m = np.random.randint(1, 30, size = (5, 3))
df1 = pd. DataFrame(m, columns = ["var1", "var2", "var3"])
df1

Unnamed: 0,var1,var2,var3
0,7,6,12
1,26,9,20
2,8,26,2
3,16,19,20
4,1,14,1


In [7]:
df2 = df1 + 90

In [9]:
df2

Unnamed: 0,var1,var2,var3
0,97,96,102
1,116,99,110
2,98,116,92
3,106,109,110
4,91,104,91


In [11]:
pd.concat([df1, df2])

Unnamed: 0,var1,var2,var3
0,7,6,12
1,26,9,20
2,8,26,2
3,16,19,20
4,1,14,1
0,97,96,102
1,116,99,110
2,98,116,92
3,106,109,110
4,91,104,91


In [13]:
pd.concat([df1, df2], ignore_index = True)

Unnamed: 0,var1,var2,var3
0,7,6,12
1,26,9,20
2,8,26,2
3,16,19,20
4,1,14,1
5,97,96,102
6,116,99,110
7,98,116,92
8,106,109,110
9,91,104,91


In [15]:
df1.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [17]:
df2.columns = ["var1", "var2", "deg3"]

In [19]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3,deg3
0,7,6,12.0,
1,26,9,20.0,
2,8,26,2.0,
3,16,19,20.0,
4,1,14,1.0,
0,97,96,,102.0
1,116,99,,110.0
2,98,116,,92.0
3,106,109,,110.0
4,91,104,,91.0


In [21]:
pd.concat([df1, df2], join = "inner")

Unnamed: 0,var1,var2
0,7,6
1,26,9
2,8,26
3,16,19
4,1,14
0,97,96
1,116,99
2,98,116
3,106,109
4,91,104


# İleri Birleştirme İşlemleri

In [46]:
import pandas as pd

In [48]:
df1 = pd.DataFrame({'calisanlar': ['Ali', 'Veli', 'Ayse', 'Fatma'],
                   'grup': ['Muhasebe', 'Muhendislik', 'Muhenislik', 'İK']})
df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhenislik
3,Fatma,İK


In [50]:
df2 = pd.DataFrame({'calisanlar': ['Ayse', 'Ali', 'Veli', 'Fatma'],
                   'ilk_giris': [2010, 2009, 2014, 2019]})
df2

Unnamed: 0,calisanlar,ilk_giris
0,Ayse,2010
1,Ali,2009
2,Veli,2014
3,Fatma,2019


In [52]:
pd.merge(df1, df2)

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhenislik,2010
3,Fatma,İK,2019


In [54]:
df3 = pd.merge(df1, df2, on = "calisanlar")
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhenislik,2010
3,Fatma,İK,2019


In [56]:
df4 = pd.DataFrame({'grup': ['Muhasebe', 'Muhendislik', 'IK'],
                   'mudur': ['Caner', 'Mustafa', 'Berkcan']})
df4

Unnamed: 0,grup,mudur
0,Muhasebe,Caner
1,Muhendislik,Mustafa
2,IK,Berkcan


In [58]:
pd.merge(df3, df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2009,Caner
1,Veli,Muhendislik,2014,Mustafa


In [64]:
df5 = pd.DataFrame({'grup' : ['Muhasebe', 'Muhasebe', 'Muhendislik', 'Muhendislik', 'IK', 'IK'],
                   'yetenekler': ['matematik', 'excel', 'kodlama', 'linux', 
                                  'excel', 'yonetim']})
df5

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muhendislik,kodlama
3,Muhendislik,linux
4,IK,excel
5,IK,yonetim


In [66]:
pd.merge(df1, df5)

Unnamed: 0,calisanlar,grup,yetenekler
0,Ali,Muhasebe,matematik
1,Ali,Muhasebe,excel
2,Veli,Muhendislik,kodlama
3,Veli,Muhendislik,linux


# Toplulaştırma ve Gruplama(Aggregation & Grouping)

In [1]:
import pip
pip.main(['install','seaborn'])

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


0

In [3]:
import seaborn as sns

In [5]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [7]:
df.shape

(1035, 6)

In [9]:
df["mass"].mean()

2.6381605847953216

In [11]:
df["year"].count()

1035

In [13]:
df["year"].var()

15.781287434941506

In [15]:
df.describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [17]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [19]:
df.dropna().describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


# Gruplama İşlemleri

In [24]:
import pandas as pd
df = pd.DataFrame({'gruplar': ['A', 'B', 'C', 'A', 'B', 'C'],
                  'veri': [10,11,52,23,43,55]}, columns = ['gruplar', 'veri'])
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,55


In [26]:
df.groupby("gruplar")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001E523819D60>

In [28]:
df.groupby("gruplar").mean()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,16.5
B,27.0
C,53.5


In [30]:
df.groupby("gruplar").sum()

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,33
B,54
C,107


In [32]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [36]:
df.groupby("method")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001E523D4E510>

In [40]:
df.groupby("method")["orbital_period"].mean()

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [44]:
df.groupby("method")["mass"].mean()

method
Astrometry                            NaN
Eclipse Timing Variations        5.125000
Imaging                               NaN
Microlensing                          NaN
Orbital Brightness Modulation         NaN
Pulsar Timing                         NaN
Pulsation Timing Variations           NaN
Radial Velocity                  2.630699
Transit                          1.470000
Transit Timing Variations             NaN
Name: mass, dtype: float64

# İleri Toplulaştırma İşlemleri(Aggregate, filter, transform, apply)

In [7]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'gruplar': ['A', 'B', 'C', 'A', 'B', 'C'],
                  'degisken1': [10, 23, 33, 22, 11, 99],
                  'degisken2': [100, 253, 333, 262, 111, 969]},
                  columns = ['gruplar', 'degisken1', 'degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [9]:
df.groupby("gruplar").mean()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,16.0,181.0
B,17.0,182.0
C,66.0,651.0


In [19]:
df.groupby("gruplar").aggregate(["min", np.median, "max"])

  df.groupby("gruplar").aggregate(["min", np.median, "max"])


Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16.0,22,100,181.0,262
B,11,17.0,23,111,182.0,253
C,33,66.0,99,333,651.0,969


In [21]:
df.groupby("gruplar").aggregate({"degisken1": "min", "degisken2": "max"})

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,262
B,11,253
C,33,969


In [23]:
import pandas as pd
df = pd.DataFrame({'gruplar': ['A', 'B', 'C', 'A', 'B', 'C'],
                  'degisken1': [10, 23,33,22,11,99],
                  'degisken2': [100,200,300,56,321,52]},
                 columns = ['gruplar', 'degisken1', 'degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,200
2,C,33,300
3,A,22,56
4,B,11,321
5,C,99,52


In [25]:
def filter_func(x):
    return x["degisken1"].std() > 9

In [29]:
df.groupby("gruplar").filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
2,C,33,300
5,C,99,52


In [31]:
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,200
2,C,33,300
3,A,22,56
4,B,11,321
5,C,99,52


In [35]:
df["degisken1"]*9

0     90
1    207
2    297
3    198
4     99
5    891
Name: degisken1, dtype: int64

In [43]:
df.iloc[:,1:2]

Unnamed: 0,degisken1
0,10
1,23
2,33
3,22
4,11
5,99


In [47]:
df_a = df.iloc[:,1:3]

In [51]:
df_a.transform(lambda x: (x-x.mean()) / x.std())

Unnamed: 0,degisken1,degisken2
0,-0.687871,-0.594154
1,-0.299074,0.236831
2,0.0,1.067816
3,-0.328982,-0.959788
4,-0.657963,1.242323
5,1.97389,-0.993027


In [59]:
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,200
2,C,33,300
3,A,22,56
4,B,11,321
5,C,99,52


In [61]:
df.apply(np.sum)

gruplar      ABCABC
degisken1       198
degisken2      1029
dtype: object

In [67]:
df.groupby("gruplar").apply(np.mean)

gruplar
A     47.00
B    138.75
C    121.00
dtype: float64

# Pivot Tablolar

In [70]:
import pandas as pd
import seaborn as sns
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [74]:
titanic.groupby("sex")[["survived"]].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [78]:
titanic.groupby(["sex", "class"])[["survived"]].aggregate("mean").unstack()

  titanic.groupby(["sex", "class"])[["survived"]].aggregate("mean").unstack()


Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [80]:
titanic.pivot_table("survived", index = "sex", columns = "class")

  titanic.pivot_table("survived", index = "sex", columns = "class")


class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [82]:
titanic.age.head()

0    22.0
1    38.0
2    26.0
3    35.0
4    35.0
Name: age, dtype: float64

In [86]:
age = pd.cut(titanic["age"], [0, 18, 90])
age.head(10)

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64, right]): [(0, 18] < (18, 90]]

In [92]:
titanic.pivot_table("survived", ["sex", age], "class")

  titanic.pivot_table("survived", ["sex", age], "class")


Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663


# Dış Kaynaklı Veri Okumak

In [103]:
import pandas as pd

In [137]:
pd.read_csv("duz_metin.txt")

Unnamed: 0,hfhgjhkjhj
