# Pandas Serisi Oluşturmak

In [3]:
import pandas as pd

In [8]:
pd.Series([10,88,3,4,5])

0    10
1    88
2     3
3     4
4     5
dtype: int64

In [5]:
seri = pd.Series([10,88,3,4,5])

In [6]:
type(seri)

pandas.core.series.Series

In [9]:
seri.axes

[RangeIndex(start=0, stop=5, step=1)]

In [10]:
seri.dtype

dtype('int64')

In [11]:
seri.size

5

In [12]:
seri.ndim

1

In [13]:
seri.values

array([10, 88,  3,  4,  5], dtype=int64)

In [14]:
seri.head(3)
#baştan setlere bakar.

0    10
1    88
2     3
dtype: int64

In [15]:
seri.tail(3)
#sondan setlere bakar.

2    3
3    4
4    5
dtype: int64

In [None]:
 # index isimlendirmeleri

In [17]:
pd.Series([15,245,331242,92134,1125])

0        15
1       245
2    331242
3     92134
4      1125
dtype: int64

In [20]:
pd.Series([15,245,331242,92134,1125], index = [10,11,12,13,14])
# indexleri isimlendirdik, sadece numerik değil string ifadelerle de isimlendirilebilir.

10        15
11       245
12    331242
13     92134
14      1125
dtype: int64

In [26]:
seri = pd.Series([15,245,331242,92134,1125], index = ["a","b","c","d","e"])
pd.Series([15,245,331242,92134,1125], index = ["a","b","c","d","e"])

a        15
b       245
c    331242
d     92134
e      1125
dtype: int64

In [25]:
seri["a":"c"]

a        15
b       245
c    331242
dtype: int64

In [None]:
#sözlük üzerinden liste oluşturmak

In [27]:
sozluk = {"reg":10, "log":11,"cart":12}

In [28]:
seri = pd.Series(sozluk)

In [29]:
seri

reg     10
log     11
cart    12
dtype: int64

In [None]:
# iki seriyi birleştirerek seri oluşturma

In [30]:
pd.concat([seri,seri])

reg     10
log     11
cart    12
reg     10
log     11
cart    12
dtype: int64

# Eleman işlemleri

In [4]:
import numpy as np

In [32]:
a = np.array([412,1231,5346,6242,121])

In [35]:
seri = pd.Series(a)
seri

0     412
1    1231
2    5346
3    6242
4     121
dtype: int32

In [36]:
seri[0]

412

In [37]:
seri[0:2]

0     412
1    1231
dtype: int32

In [40]:
seri = pd.Series([218213,124141,213127,7921235], 
                 index = ["reg","loj","cart","rf"])
seri

reg      218213
loj      124141
cart     213127
rf      7921235
dtype: int64

In [41]:
seri.index

Index(['reg', 'loj', 'cart', 'rf'], dtype='object')

In [42]:
seri.keys

<bound method Series.keys of reg      218213
loj      124141
cart     213127
rf      7921235
dtype: int64>

In [43]:
list(seri.items())

[('reg', 218213), ('loj', 124141), ('cart', 213127), ('rf', 7921235)]

In [44]:
seri.values

array([ 218213,  124141,  213127, 7921235], dtype=int64)

In [None]:
#eleman sorgulama

In [45]:
"reg" in seri

True

In [46]:
seri["reg"]

218213

In [47]:
seri[["rf","reg"]]

rf     7921235
reg     218213
dtype: int64

In [48]:
seri["rf"] = 120
seri

reg     218213
loj     124141
cart    213127
rf         120
dtype: int64

# Pandas ile DataFrame Oluşturma

In [49]:
l = [1,2,3,4,5]
l

[1, 2, 3, 4, 5]

In [51]:
pd.DataFrame(l, columns = ["degisken_ismi"])

Unnamed: 0,degisken_ismi
0,1
1,2
2,3
3,4
4,5


In [54]:
m = np.arange(1,10).reshape((3,3))
m

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
pd.DataFrame(m, columns = ["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [None]:
#df isimlendirme

In [58]:
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df.head()

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


In [59]:
df.columns 

Index(['var1', 'var2', 'var3'], dtype='object')

In [60]:
df.columns = ("deg1","deg2","deg3")
df.columns

Index(['deg1', 'deg2', 'deg3'], dtype='object')

In [61]:
type(df)

pandas.core.frame.DataFrame

In [62]:
df.axes

[RangeIndex(start=0, stop=3, step=1),
 Index(['deg1', 'deg2', 'deg3'], dtype='object')]

In [63]:
df.shape

(3, 3)

In [64]:
df.ndim

2

In [65]:
df.size

9

In [66]:
df.values

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [67]:
type(df.values)

numpy.ndarray

In [68]:
df.head(1)

Unnamed: 0,deg1,deg2,deg3
0,1,2,3


In [69]:
df.tail(2)

Unnamed: 0,deg1,deg2,deg3
1,4,5,6
2,7,8,9


In [70]:
a = np.array([1,2,3,4,5])

In [71]:
pd.DataFrame(a,columns = ["deg1"])

Unnamed: 0,deg1
0,1
1,2
2,3
3,4
4,5


# Eleman İşlemleri

In [5]:
s1 = np.random.randint(10, size = 5)
s2 = np.random.randint(10, size = 5)
s3 = np.random.randint(10, size = 5)

In [6]:
sozluk = {"var1": s1, "var2": s2, "var3":s3}

In [7]:
sozluk

{'var1': array([3, 1, 5, 8, 5]),
 'var2': array([5, 9, 0, 8, 1]),
 'var3': array([5, 8, 9, 7, 0])}

In [10]:
import pandas as pd
df = pd.DataFrame(sozluk)
df

Unnamed: 0,var1,var2,var3
0,3,5,5
1,1,9,8
2,5,0,9
3,8,8,7
4,5,1,0


In [77]:
df[0:1]

Unnamed: 0,var1,var2,var3
0,2,0,2


In [78]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [11]:
df.index = ["a","b","c","d","e"] 
df

Unnamed: 0,var1,var2,var3
a,3,5,5
b,1,9,8
c,5,0,9
d,8,8,7
e,5,1,0


In [85]:
#silme

In [96]:
df.drop("a", axis = 0)

Unnamed: 0,var1,var2,var3
b,1,4,8
c,1,6,6
d,5,4,2
e,7,4,0


In [86]:
df
# veri setinin ana yapısı üzerinde değişiklik olmaz

Unnamed: 0,var1,var2,var3
a,2,0,2
b,2,9,9
c,7,7,1
d,9,2,4
e,1,2,4


In [12]:
df.drop("a", axis = 0, inplace = True)
df

Unnamed: 0,var1,var2,var3
b,1,9,8
c,5,0,9
d,8,8,7
e,5,1,0


In [None]:
# inplace komutu kalıcı olarak siler

In [98]:
#fancy

In [13]:
l = ["c","e"]

In [14]:
df.drop(l, axis = 0)

Unnamed: 0,var1,var2,var3
b,1,9,8
d,8,8,7


In [101]:
# degiskenler için

In [15]:
"var1" in df

True

In [16]:
l = ["var1","var4","var2"]

In [17]:
for i in l:
    print(i in df)

True
False
True


In [105]:
df

Unnamed: 0,var1,var2,var3
b,1,4,8
c,1,6,6
d,5,4,2
e,7,4,0


In [18]:
df["var4"] = df["var1"] * df["var2"]
df

Unnamed: 0,var1,var2,var3,var4
b,1,9,8,9
c,5,0,9,0
d,8,8,7,64
e,5,1,0,5


In [110]:
#degisken silme 

In [19]:
df.drop("var4", axis = 1)

Unnamed: 0,var1,var2,var3
b,1,9,8
c,5,0,9
d,8,8,7
e,5,1,0


In [20]:
df.drop("var4", axis = 1, inplace = True)
df

Unnamed: 0,var1,var2,var3
b,1,9,8
c,5,0,9
d,8,8,7
e,5,1,0


In [21]:
l = ["var1","var2"]

In [22]:
df.drop(l, axis = 1)

Unnamed: 0,var3
b,8
c,9
d,7
e,0


# Birleştirme(Join) İşlemleri

In [61]:
import numpy as np
import pandas as pd
m = np.random.randint(1,30, size = (5,3))
df1 = pd.DataFrame(m, columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,25,1,21
1,29,26,26
2,15,23,1
3,28,7,2
4,14,10,5


In [46]:
df2 = df1 + 99

In [47]:
df2

Unnamed: 0,var1,var2,var3
0,108,118,122
1,124,103,108
2,111,115,111
3,128,116,109
4,117,100,107


In [48]:
pd.concat([df1,df2])

Unnamed: 0,var1,var2,var3
0,9,19,23
1,25,4,9
2,12,16,12
3,29,17,10
4,18,1,8
0,108,118,122
1,124,103,108
2,111,115,111
3,128,116,109
4,117,100,107


In [29]:
?pd.concat

[1;31mSignature:[0m
[0mpd[0m[1;33m.[0m[0mconcat[0m[1;33m([0m[1;33m
[0m    [0mobjs[0m[1;33m:[0m [1;34m'Iterable[NDFrame] | Mapping[Hashable, NDFrame]'[0m[1;33m,[0m[1;33m
[0m    [0maxis[0m[1;33m:[0m [1;34m'Axis'[0m [1;33m=[0m [1;36m0[0m[1;33m,[0m[1;33m
[0m    [0mjoin[0m[1;33m:[0m [1;34m'str'[0m [1;33m=[0m [1;34m'outer'[0m[1;33m,[0m[1;33m
[0m    [0mignore_index[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mkeys[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mlevels[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mnames[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m[1;33m
[0m    [0mverify_integrity[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0msort[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mFalse[0m[1;33m,[0m[1;33m
[0m    [0mcopy[0m[1;33m:[0m [1;34m'bool'[0m [1;33m=[0m [1;32mTrue[0m[1;33m,[

In [49]:
pd.concat([df1,df2], ignore_index = True)

Unnamed: 0,var1,var2,var3
0,9,19,23
1,25,4,9
2,12,16,12
3,29,17,10
4,18,1,8
5,108,118,122
6,124,103,108
7,111,115,111
8,128,116,109
9,117,100,107


In [36]:
df1.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [37]:
df2.columns

Index(['var1', 'var2', 'var3'], dtype='object')

In [62]:
df2.columns = ["var1","var2","deg1"]

In [56]:
pd.concat([df1, df2], join = "inner", ignore_index = True)

Unnamed: 0,var1,var2
0,17,7
1,8,14
2,25,28
3,23,26
4,9,11
5,108,118
6,124,103
7,111,115
8,128,116
9,117,100


# İleri Birleştirme İşlemleri

In [None]:
# birebir birleştirme

In [67]:
df1 = pd.DataFrame({'calisanlar':['Ali','Veli','Ayse','Fatma'],
                    'grup':['Muhasebe','Muhendislik','Muhendislik','IK']})
df1

Unnamed: 0,calisanlar,grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,IK


In [66]:
df2 = pd.DataFrame({'calisanlar':['Ayse','Ali','Veli','Fatma'],
                    'ilk_giris':[2010,2009,2014,2019]})
df2

Unnamed: 0,calisanlar,ilk_giris
0,Ayse,2010
1,Ali,2009
2,Veli,2014
3,Fatma,2019


In [68]:
pd.merge(df1,df2)

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,IK,2019


In [69]:
pd.merge(df1,df2, on = "calisanlar")
# merge komutu nereden birleştireceğini kendisi anlar fakat
# biz on komutu ile nerde birleştirileceğini belirleyebiliyoruz.

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,IK,2019


In [70]:
# coktan teke

In [71]:
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,calisanlar,grup,ilk_giris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,IK,2019


In [74]:
df4 = pd.DataFrame({'grup': ['Muhasebe','Muhendislik','IK'],
                   'mudur': ['Caner','Mustafa','Berkcan']})
df4

Unnamed: 0,grup,mudur
0,Muhasebe,Caner
1,Muhendislik,Mustafa
2,IK,Berkcan


In [75]:
pd.merge(df3,df4)

Unnamed: 0,calisanlar,grup,ilk_giris,mudur
0,Ali,Muhasebe,2009,Caner
1,Veli,Muhendislik,2014,Mustafa
2,Ayse,Muhendislik,2010,Mustafa
3,Fatma,IK,2019,Berkcan


In [76]:
df5 = pd.DataFrame({'grup': ['Muhasebe', 'Muhasebe',
                             'Muhendislik','Muhendislik','IK','IK'],
                    'yetenekler': ['matematik','excel','kodlama',
                                   'linux','excel','yonetim']})
df5

Unnamed: 0,grup,yetenekler
0,Muhasebe,matematik
1,Muhasebe,excel
2,Muhendislik,kodlama
3,Muhendislik,linux
4,IK,excel
5,IK,yonetim


In [82]:
pd.merge(df1,df5)

Unnamed: 0,calisanlar,grup,yetenekler
0,Ali,Muhasebe,matematik
1,Ali,Muhasebe,excel
2,Veli,Muhendislik,kodlama
3,Veli,Muhendislik,linux
4,Ayse,Muhendislik,kodlama
5,Ayse,Muhendislik,linux
6,Fatma,IK,excel
7,Fatma,IK,yonetim


# Toplulaşma ve Gruplama(Aggregation & Grouping)

##### Basit toplulaştırma fonksiyonaları:

* count()
* first()
* last()
* mean()
* median()
* min()
* max()
* std()
* var()
* sum()

In [6]:
import seaborn as sns

In [36]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [10]:
df.shape

(1035, 6)

In [37]:
df.mean()

  df.mean()


number               1.785507
orbital_period    2002.917596
mass                 2.638161
distance           264.069282
year              2009.070531
dtype: float64

In [38]:
df["mass"].mean()
#ortamala

2.6381605847953216

In [15]:
df["mass"].count()

513

In [16]:
df["mass"].min()

0.0036

In [17]:
df["mass"].max()

25.0

In [18]:
df["mass"].sum()

1353.37638

In [19]:
df["mass"].std()

3.8186166509616046

In [21]:
df["mass"].var()

14.58183312700122

In [23]:
df.describe()
#toplu hali

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [30]:
df.describe().T
#transpozu

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [27]:
df.dropna().describe().T
#eksik değerler olsa dahi betimsel değerlere bakılılabilir.

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,498.0,1.73494,1.17572,1.0,1.0,1.0,2.0,6.0
orbital_period,498.0,835.778671,1469.128259,1.3283,38.27225,357.0,999.6,17337.5
mass,498.0,2.50932,3.636274,0.0036,0.2125,1.245,2.8675,25.0
distance,498.0,52.068213,46.596041,1.35,24.4975,39.94,59.3325,354.0
year,498.0,2007.37751,4.167284,1989.0,2005.0,2009.0,2011.0,2014.0


# Gruplama İşlemleri

In [32]:
import pandas as pd
df = pd.DataFrame({'gruplar': ['A','B','C','A','B','C'],
                  'veri': [10,11,52,23,43,55]}, columns = ['gruplar','veri'])
df

Unnamed: 0,gruplar,veri
0,A,10
1,B,11
2,C,52
3,A,23
4,B,43
5,C,55


In [34]:
df.groupby("gruplar").mean()
# gruplama işlemi

Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
A,16.5
B,27.0
C,53.5


In [39]:
df = sns.load_dataset("planets")
df.head()

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.3,7.1,77.4,2006
1,Radial Velocity,1,874.774,2.21,56.95,2008
2,Radial Velocity,1,763.0,2.6,19.84,2011
3,Radial Velocity,1,326.03,19.4,110.62,2007
4,Radial Velocity,1,516.22,10.5,119.47,2009


In [41]:
df.groupby("method")["orbital_period"].mean()
#gruplama yapılıyor ardından neye göre gruplama yapılacağı belirleniyor.

method
Astrometry                          631.180000
Eclipse Timing Variations          4751.644444
Imaging                          118247.737500
Microlensing                       3153.571429
Orbital Brightness Modulation         0.709307
Pulsar Timing                      7343.021201
Pulsation Timing Variations        1170.000000
Radial Velocity                     823.354680
Transit                              21.102073
Transit Timing Variations            79.783500
Name: orbital_period, dtype: float64

In [42]:
df.groupby("method")["orbital_period"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Astrometry,2.0,631.18,544.217663,246.36,438.77,631.18,823.59,1016.0
Eclipse Timing Variations,9.0,4751.644444,2499.130945,1916.25,2900.0,4343.5,5767.0,10220.0
Imaging,12.0,118247.7375,213978.177277,4639.15,8343.9,27500.0,94250.0,730000.0
Microlensing,7.0,3153.571429,1113.166333,1825.0,2375.0,3300.0,3550.0,5100.0
Orbital Brightness Modulation,3.0,0.709307,0.725493,0.240104,0.291496,0.342887,0.943908,1.544929
Pulsar Timing,5.0,7343.021201,16313.265573,0.090706,25.262,66.5419,98.2114,36525.0
Pulsation Timing Variations,1.0,1170.0,,1170.0,1170.0,1170.0,1170.0,1170.0
Radial Velocity,553.0,823.35468,1454.92621,0.73654,38.021,360.2,982.0,17337.5
Transit,397.0,21.102073,46.185893,0.355,3.16063,5.714932,16.1457,331.60059
Transit Timing Variations,3.0,79.7835,71.599884,22.3395,39.67525,57.011,108.5055,160.0


# İleri Toplulaştırma İşlemleri(Agggregatei, filter, transform, apply)

In [44]:
df = pd.DataFrame({'gruplar': ['A','B','C','A','B','C'],
                  'degisken1': [10,11,52,23,43,55],
                  'degisken2': [100,121,525,2123,433,515]},
                  columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,11,121
2,C,52,525
3,A,23,2123
4,B,43,433
5,C,55,515


In [45]:
df.groupby("gruplar").mean()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,16.5,1111.5
B,27.0,277.0
C,53.5,520.0


In [50]:
import numpy as np
df.groupby("gruplar").aggregate([min, np.median, max])

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16.5,23,100,1111.5,2123
B,11,27.0,43,121,277.0,433
C,52,53.5,55,515,520.0,525


In [53]:
df.groupby("gruplar").aggregate({"degisken1": min,"degisken2": max})

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,2123
B,11,433
C,52,525


In [54]:
# filter

In [55]:
df = pd.DataFrame({'gruplar': ['A','B','C','A','B','C'],
                  'degisken1': [10,11,52,23,43,55],
                  'degisken2': [100,121,525,2123,433,515]},
                  columns = ['gruplar','degisken1','degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,11,121
2,C,52,525
3,A,23,2123
4,B,43,433
5,C,55,515


In [67]:
def filter_func(x):
    return x["degisken1"].std() > 10
# kendi tanımladığımız fonksiyon

In [62]:
df.groupby("gruplar").std()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,9.192388,1430.477018
B,22.627417,220.617316
C,2.12132,7.071068


In [66]:
df.groupby("gruplar").filter(filter_func)
# kendi kodladığımız fonksiyonu kullanıyoruz

Unnamed: 0,gruplar,degisken1,degisken2
1,B,11,121
4,B,43,433


In [68]:
# transform  

In [69]:
df["degisken1"]*10

0    100
1    110
2    520
3    230
4    430
5    550
Name: degisken1, dtype: int64

In [72]:
df_a = df.iloc[:,1:3]

In [75]:
df_a.transform(lambda x: x-x.mean() / x.std())

Unnamed: 0,degisken1,degisken2
0,8.405244,99.154682
1,9.405244,120.154682
2,50.405244,524.154682
3,21.405244,2122.154682
4,41.405244,432.154682
5,53.405244,514.154682


In [78]:
# apply
# dataframelerde gezebilen toplulaştırma işlemi

In [84]:
df = pd.DataFrame({'degisken1': [10,11,52,23,43,55],
                   'degisken2': [100,121,525,2123,433,515]},
                   columns = ['degisken1','degisken2'])
df

Unnamed: 0,degisken1,degisken2
0,10,100
1,11,121
2,52,525
3,23,2123
4,43,433
5,55,515


In [79]:
df.apply(np.sum)

degisken1     194
degisken2    3817
dtype: int64

In [85]:
df.apply(np.mean)

degisken1     32.333333
degisken2    636.166667
dtype: float64

# Pivot Tablolar

In [88]:
import pandas as pd
import seaborn as sns
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [93]:
titanic.groupby("sex")[["survived"]].mean()
# iki tane köşeli parantez atarsak daha düzenli olur

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [96]:
titanic.groupby(["sex","class"])[["survived"]].aggregate("mean")

Unnamed: 0_level_0,Unnamed: 1_level_0,survived
sex,class,Unnamed: 2_level_1
female,First,0.968085
female,Second,0.921053
female,Third,0.5
male,First,0.368852
male,Second,0.157407
male,Third,0.135447


In [97]:
titanic.groupby(["sex","class"])[["survived"]].aggregate("mean").unstack()

Unnamed: 0_level_0,survived,survived,survived
class,First,Second,Third
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [98]:
# pivot ile table

In [99]:
titanic.pivot_table("survived", index = "sex", columns = "class")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [101]:
age = pd.cut(titanic["age"], [0,18,90])
age.head(10)

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64, right]): [(0, 18] < (18, 90]]

In [102]:
titanic.pivot_table("survived", ["sex",age],  "class")

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663


# Dış Kaynaklı Veri Okumak

In [106]:
#csv okuma
pd.read_csv("reading_data/ornekcsv.csv", sep = ";") 

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [110]:
# txt okuma
pd.read_csv("reading_data/duz_metin.txt") 

Unnamed: 0,1 2
0,2 2
1,3 2
2,4 2
3,5 2
4,6 2
5,7 2
6,8 2
7,9 2
8,10 2


In [113]:
pd.read_excel("reading_data/ornekx.xlsx")

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [114]:
df = pd.read_excel("reading_data/ornekx.xlsx")

In [115]:
type(df)

pandas.core.frame.DataFrame

In [116]:
df.head()

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0


In [119]:
df.columns = ("A","B","C")
df

Unnamed: 0,A,B,C
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [122]:
# sıfırdan txt okuma
data = pd.read_csv("data.txt")

In [123]:
data.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
