In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [2]:
df = pd.DataFrame(randn(3,3),index = ["A","B","C"],columns = ["Column1","Column2","Column3"])
df

Unnamed: 0,Column1,Column2,Column3
A,-0.935676,-2.359897,0.205068
B,-0.101575,2.068765,0.494035
C,0.146043,0.917366,0.532742


In [3]:
df["Column1"]

A   -0.935676
B   -0.101575
C    0.146043
Name: Column1, dtype: float64

# Dataframe'e Sütün Ekleme ¶

In [4]:
df["Column4"] = pd.Series(randn(3),index = ["A","B","C"])
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,-0.935676,-2.359897,0.205068,-2.502912
B,-0.101575,2.068765,0.494035,-0.634689
C,0.146043,0.917366,0.532742,-2.23226


In [5]:
df["Column5"] = df["Column1"] + df["Column2"] + df["Column3"]
df

Unnamed: 0,Column1,Column2,Column3,Column4,Column5
A,-0.935676,-2.359897,0.205068,-2.502912,-3.090506
B,-0.101575,2.068765,0.494035,-0.634689,2.461224
C,0.146043,0.917366,0.532742,-2.23226,1.596151


# Dataframeden Sütün Silme

In [6]:
df.drop("Column5",axis = 1,inplace=True) # inplace değişikliğin yansıması için
df

Unnamed: 0,Column1,Column2,Column3,Column4
A,-0.935676,-2.359897,0.205068,-2.502912
B,-0.101575,2.068765,0.494035,-0.634689
C,0.146043,0.917366,0.532742,-2.23226


# Loc ve ILoc 

In [7]:
df.loc["A"] # indexleri görüntülemeyi sağlar.

Column1   -0.935676
Column2   -2.359897
Column3    0.205068
Column4   -2.502912
Name: A, dtype: float64

In [8]:
df.iloc[0] # 0.index yani A

Column1   -0.935676
Column2   -2.359897
Column3    0.205068
Column4   -2.502912
Name: A, dtype: float64

In [9]:
df.loc["B","Column2"] # indexe göre veri çekme

2.068764669571866

In [10]:
df.loc[["A","B"],["Column1","Column2"]]

Unnamed: 0,Column1,Column2
A,-0.935676,-2.359897
B,-0.101575,2.068765


# Dataframe Filtreleme

In [11]:
boolDf = df > -1
boolDf

Unnamed: 0,Column1,Column2,Column3,Column4
A,True,False,True,False
B,True,True,True,True
C,True,True,True,False


In [12]:
df[boolDf]

Unnamed: 0,Column1,Column2,Column3,Column4
A,-0.935676,,0.205068,
B,-0.101575,2.068765,0.494035,-0.634689
C,0.146043,0.917366,0.532742,


In [13]:
df["Column2"].unique()

array([-2.35989721,  2.06876467,  0.91736635])

In [14]:
def square_func(x): return x**2
df["Column2"].apply(square_func)

A    5.569115
B    4.279787
C    0.841561
Name: Column2, dtype: float64

# Sütun Silme

In [15]:
df = pd.DataFrame({
    "Column1":[1,2,3,4,5,6],
    "Column2":[100,100,200,300,300,100],
    "Column3":["Mustafa","Kamil","Emre","Ayşe","Murat","Zeynep"]
})
df

Unnamed: 0,Column1,Column2,Column3
0,1,100,Mustafa
1,2,100,Kamil
2,3,200,Emre
3,4,300,Ayşe
4,5,300,Murat
5,6,100,Zeynep


In [16]:
df.drop("Column1",axis = 1,inplace = True) #drop: sutünu siler, inplace: yapılan değişikliği kaydeder.
df

Unnamed: 0,Column2,Column3
0,100,Mustafa
1,100,Kamil
2,200,Emre
3,300,Ayşe
4,300,Murat
5,100,Zeynep


# Dataframelerin multi index gruplanması

In [17]:
outerIndex = ["Group1","Group1","Group1","Group2","Group2","Group2","Group3","Group3","Group3"]
innerIndex = ["Index1","Index2","Index3","Index1","Index2","Index3","Index1","Index2","Index3"]
list(zip(outerIndex,innerIndex))

[('Group1', 'Index1'),
 ('Group1', 'Index2'),
 ('Group1', 'Index3'),
 ('Group2', 'Index1'),
 ('Group2', 'Index2'),
 ('Group2', 'Index3'),
 ('Group3', 'Index1'),
 ('Group3', 'Index2'),
 ('Group3', 'Index3')]

In [18]:
hierarchy = list(zip(outerIndex,innerIndex))
hierarchy = pd.MultiIndex.from_tuples(hierarchy)
hierarchy

MultiIndex([('Group1', 'Index1'),
            ('Group1', 'Index2'),
            ('Group1', 'Index3'),
            ('Group2', 'Index1'),
            ('Group2', 'Index2'),
            ('Group2', 'Index3'),
            ('Group3', 'Index1'),
            ('Group3', 'Index2'),
            ('Group3', 'Index3')],
           )

In [19]:
df = pd.DataFrame(randn(9,3),hierarchy,columns = ["A","B","C"])
df

Unnamed: 0,Unnamed: 1,A,B,C
Group1,Index1,-0.075379,0.967503,0.176053
Group1,Index2,0.335332,-0.799082,1.734717
Group1,Index3,-1.071625,-0.941683,-1.346934
Group2,Index1,0.140817,-0.834011,-0.79889
Group2,Index2,-1.17701,-1.075575,-1.778146
Group2,Index3,-1.013486,2.49029,-0.485507
Group3,Index1,-0.624728,-1.140856,-1.431957
Group3,Index2,-0.198067,0.996864,-1.2057
Group3,Index3,0.186924,1.152268,2.542111


In [20]:
df["A"]

Group1  Index1   -0.075379
        Index2    0.335332
        Index3   -1.071625
Group2  Index1    0.140817
        Index2   -1.177010
        Index3   -1.013486
Group3  Index1   -0.624728
        Index2   -0.198067
        Index3    0.186924
Name: A, dtype: float64

In [21]:
df.loc["Group1"]

Unnamed: 0,A,B,C
Index1,-0.075379,0.967503,0.176053
Index2,0.335332,-0.799082,1.734717
Index3,-1.071625,-0.941683,-1.346934


In [22]:
df.xs("Group1").xs("Index1").xs("A")

-0.07537880626726899

# GroupBy

In [23]:
dataset = {"Departman":["Bilişim","İnsan Kaynakları","Üretim","Üretim","Bilişim","İnsan Kaynakları"],
           "Çalışan": ["Mustafa","Jale","Kadir","Zeynep","Murat","Ahmet"],
           "Maaş":[3000,3500,2500,4500,4000,2000]}

In [24]:
df = pd.DataFrame(dataset)
df

Unnamed: 0,Departman,Çalışan,Maaş
0,Bilişim,Mustafa,3000
1,İnsan Kaynakları,Jale,3500
2,Üretim,Kadir,2500
3,Üretim,Zeynep,4500
4,Bilişim,Murat,4000
5,İnsan Kaynakları,Ahmet,2000


In [25]:
DepGroup = df.groupby("Departman") # departmana göre grupla
DepGroup.sum()

Unnamed: 0_level_0,Maaş
Departman,Unnamed: 1_level_1
Bilişim,7000
Üretim,7000
İnsan Kaynakları,5500


In [26]:
DepGroup.max()

Unnamed: 0_level_0,Çalışan,Maaş
Departman,Unnamed: 1_level_1,Unnamed: 2_level_1
Bilişim,Mustafa,4000
Üretim,Zeynep,4500
İnsan Kaynakları,Jale,3500


# Dataframeleri birleştirme

In [29]:
dataset1 = {"A":["A1","A2","A3","A4"],
            "B":["B1","B2","B3","B4"],
            "C":["C1","C2","C3","C4"]}
dataset2 = {"A":["A5","A6","A7","A8"],
            "B":["B5","B6","B7","B8"],
            "C":["C5","C6","C7","C8"]}
df1 = pd.DataFrame(dataset1,index = [1,2,3,4])
df2 = pd.DataFrame(dataset2,index = [5,6,7,8] )

In [32]:
df1

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,A3,B3,C3
4,A4,B4,C4


In [33]:
df2

Unnamed: 0,A,B,C
5,A5,B5,C5
6,A6,B6,C6
7,A7,B7,C7
8,A8,B8,C8


In [35]:
pd.concat([df1,df2]) # dataframeleri birleştirir.

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2
3,A3,B3,C3
4,A4,B4,C4
5,A5,B5,C5
6,A6,B6,C6
7,A7,B7,C7
8,A8,B8,C8


# Merge

In [36]:
dataset1 = {"A" : ["A1","A2","A3"],
            "B" : ["B1","B2","B3"],
            "anahtar" : ["K1","K2","K3"]}
dataset2 = {"X" : ["X1","X2","X3","X4"],
            "Y" : ["Y1","Y2","Y3","Y4"],
            "anahtar" : ["K1","K2","K5","K4"]}
df1 = pd.DataFrame(dataset1,index = [1,2,3]) 
df2 = pd.DataFrame(dataset2,index = [1,2,3,4])

In [37]:
df1

Unnamed: 0,A,B,anahtar
1,A1,B1,K1
2,A2,B2,K2
3,A3,B3,K3


In [38]:
df2

Unnamed: 0,X,Y,anahtar
1,X1,Y1,K1
2,X2,Y2,K2
3,X3,Y3,K5
4,X4,Y4,K4


In [40]:
pd.merge(df1,df2,how ="inner",on = "anahtar") #dataframeler eşleşen keywordlere göre birleştirilir.

Unnamed: 0,A,B,anahtar,X,Y
0,A1,B1,K1,X1,Y1
1,A2,B2,K2,X2,Y2


In [42]:
dataset1 = {"A" : ["A1","A2","A3","A4"],
            "B" : ["B1","B2","B3","A4"]}
dataset2 = {"X" : ["X1","X2","X3"],
            "Y" : ["Y1","Y2","Y3"]}
df1 = pd.DataFrame(dataset1,index = [1,2,3,4]) 
df2 = pd.DataFrame(dataset2,index = [1,2,3])

In [43]:
df1

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,A4


In [44]:
df2

Unnamed: 0,X,Y
1,X1,Y1
2,X2,Y2
3,X3,Y3


In [45]:
df1.join(df2) # dataframeleri birleştirir.

Unnamed: 0,A,B,X,Y
1,A1,B1,X1,Y1
2,A2,B2,X2,Y2
3,A3,B3,X3,Y3
4,A4,A4,,
