In [1]:
import numpy as np
import pandas as pd
from numpy.random import randn
np.random.seed(101)
df3 = pd.DataFrame(np.random.randn(5,4),index = "A B C D E".split(), columns = " W X Y Z".split())

In [2]:
df3

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


## reset_index(), set_index()

In [5]:
df3.reset_index()  # row labels become another column on the left

Unnamed: 0,index,W,X,Y,Z
0,A,2.70685,0.628133,0.907969,0.503826
1,B,0.651118,-0.319318,-0.848077,0.605965
2,C,-2.018168,0.740122,0.528813,-0.589001
3,D,0.188695,-0.758872,-0.933237,0.955057
4,E,0.190794,1.978757,2.605967,0.683509


In [7]:
df3.reset_index(drop = True)

Unnamed: 0,W,X,Y,Z
0,2.70685,0.628133,0.907969,0.503826
1,0.651118,-0.319318,-0.848077,0.605965
2,-2.018168,0.740122,0.528813,-0.589001
3,0.188695,-0.758872,-0.933237,0.955057
4,0.190794,1.978757,2.605967,0.683509


In [13]:
newindx = "CA NY WY OR CO".split()  # liste olusturuyor
newindx

['CA', 'NY', 'WY', 'OR', 'CO']

In [14]:
df3["States"] = newindx

In [15]:
df3

Unnamed: 0,W,X,Y,Z,States
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR
E,0.190794,1.978757,2.605967,0.683509,CO


In [18]:
df3.set_index("States", inplace = True)

In [19]:
df3

Unnamed: 0_level_0,W,X,Y,Z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


## Multi-index and Index Hierarchy

In [22]:
# Index Levels
outside = ['M1', 'M1', 'M1', 'M2', 'M2', 'M2']
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

np.random.seed(101)
df5 = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])

In [23]:
df5

Unnamed: 0,Unnamed: 1,A,B
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


In [27]:
df5.loc["M1"]

Unnamed: 0,A,B
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [28]:
df5.loc["M1"][["A"]]

Unnamed: 0,A
1,2.70685
2,0.907969
3,0.651118


In [31]:
df5.loc["M1"].loc[[1]]

Unnamed: 0,A,B
1,2.70685,0.628133


In [32]:
df5.index

MultiIndex([('M1', 1),
            ('M1', 2),
            ('M1', 3),
            ('M2', 1),
            ('M2', 2),
            ('M2', 3)],
           )

In [33]:
df5.index.names

FrozenList([None, None])

In [34]:
df5.index.names = ["Grup", "Number"]  # index isimlendirme

In [35]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


## Get Values at specified index and level

In [36]:
df5.xs("M1")

Unnamed: 0_level_0,A,B
Number,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [37]:
df5.xs("M2")

Unnamed: 0_level_0,A,B
Number,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.848077,0.605965
2,-2.018168,0.740122
3,0.528813,-0.589001


In [41]:
df5.xs(("M1",2), level = [0,1])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,2,0.907969,0.503826


In [44]:
df5.xs(("M1",2))

A    0.907969
B    0.503826
Name: (M1, 2), dtype: float64

In [45]:
d = {'num_legs': [4, 4, 2, 2],
     'num_wings': [0, 0, 2, 2],
     'class': ['mammal', 'mammal', 'bird', 'bird'],
     'animal': ['tiger', 'fox', 'penguin', 'sparrow'],
     'locomotion': ['walks', 'walks', 'walks', 'flies']}
df = pd.DataFrame(data=d)
df = df.set_index(['class', 'animal', 'locomotion'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
mammal,fox,walks,4,0
bird,penguin,walks,2,2
bird,sparrow,flies,2,2


In [46]:
df.xs(("fox", "walks"), level = [1,2])

Unnamed: 0_level_0,num_legs,num_wings
class,Unnamed: 1_level_1,Unnamed: 2_level_1
mammal,4,0


In [47]:
df.xs("tiger", level=1)  # level i belirtmek gerekir, sifirinci level belirtmesek de olur 

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
class,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
mammal,walks,4,0


In [48]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
mammal,fox,walks,4,0
bird,penguin,walks,2,2
bird,sparrow,flies,2,2


In [50]:
df.xs("bird")  # level belirtmeye gerek yok sifirinci level oldugu icin

Unnamed: 0_level_0,Unnamed: 1_level_0,num_legs,num_wings
animal,locomotion,Unnamed: 2_level_1,Unnamed: 3_level_1
penguin,walks,2,2
sparrow,flies,2,2


In [53]:
df.xs(("bird", "sparrow", "flies"))  # tuple halinde yazmak lazim, yoksa fonksiyonun parametreleri karisabileceginden hata verir

num_legs     2
num_wings    2
Name: (bird, sparrow, flies), dtype: int64

In [56]:
df.xs(("fox", "walks"), level = [1,2])

Unnamed: 0_level_0,num_legs,num_wings
class,Unnamed: 1_level_1,Unnamed: 2_level_1
mammal,4,0


In [57]:
df.xs(("fox", "walks"), level = [1,"locomotion"])  # levela isim de yazilabiliyor

Unnamed: 0_level_0,num_legs,num_wings
class,Unnamed: 1_level_1,Unnamed: 2_level_1
mammal,4,0


In [58]:
df5.xs(("M1", 1))

A    2.706850
B    0.628133
Name: (M1, 1), dtype: float64

In [60]:
df5.xs(("M1", 1), level = [0,1])  # data frame olarak verir

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133


In [61]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


In [62]:
df5.xs(1, level = "Number")  # sadece 1 leri sergiledi

Unnamed: 0_level_0,A,B
Grup,Unnamed: 1_level_1,Unnamed: 2_level_1
M1,2.70685,0.628133
M2,-0.848077,0.605965


In [63]:
df5.xs(1, level = 1)

Unnamed: 0_level_0,A,B
Grup,Unnamed: 1_level_1,Unnamed: 2_level_1
M1,2.70685,0.628133
M2,-0.848077,0.605965


In [64]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
mammal,fox,walks,4,0
bird,penguin,walks,2,2
bird,sparrow,flies,2,2


In [67]:
df.drop(index="fox", level=1)  # fox gitti

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
bird,penguin,walks,2,2
bird,sparrow,flies,2,2


In [68]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,num_legs,num_wings
class,animal,locomotion,Unnamed: 3_level_1,Unnamed: 4_level_1
mammal,tiger,walks,4,0
mammal,fox,walks,4,0
bird,penguin,walks,2,2
bird,sparrow,flies,2,2


In [69]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


In [70]:
df5.describe()

Unnamed: 0,A,B
count,6.0,6.0
mean,0.321418,0.261621
std,1.614151,0.565964
min,-2.018168,-0.589001
25%,-0.503854,-0.113532
50%,0.589966,0.554896
75%,0.843757,0.622591
max,2.70685,0.740122


In [71]:
df5.describe().T  # transpose, satirlar sütun sutunlar satir oldu

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,6.0,0.321418,1.614151,-2.018168,-0.503854,0.589966,0.843757,2.70685
B,6.0,0.261621,0.565964,-0.589001,-0.113532,0.554896,0.622591,0.740122


In [72]:
df5.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
A,6.0,0.321418,1.614151,-2.018168,-0.503854,0.589966,0.843757,2.70685
B,6.0,0.261621,0.565964,-0.589001,-0.113532,0.554896,0.622591,0.740122


In [73]:
df5.isnull()  # bos ise true verir

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,False,False
M1,2,False,False
M1,3,False,False
M2,1,False,False
M2,2,False,False
M2,3,False,False


In [74]:
df5.isnull().sum()  # hic bos yokmus

A    0
B    0
dtype: int64

In [75]:
df5

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


In [76]:
df5.dropna()  # NaN olanlar düsüyor

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Grup,Number,Unnamed: 2_level_1,Unnamed: 3_level_1
M1,1,2.70685,0.628133
M1,2,0.907969,0.503826
M1,3,0.651118,-0.319318
M2,1,-0.848077,0.605965
M2,2,-2.018168,0.740122
M2,3,0.528813,-0.589001


In [79]:
df5.dropna().info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 6 entries, ('M1', 1) to ('M2', 3)
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       6 non-null      float64
 1   B       6 non-null      float64
dtypes: float64(2)
memory usage: 256.0+ bytes


In [80]:
df5.dropna().describe()  # düsürdükten sonraki özellikler

Unnamed: 0,A,B
count,6.0,6.0
mean,0.321418,0.261621
std,1.614151,0.565964
min,-2.018168,-0.589001
25%,-0.503854,-0.113532
50%,0.589966,0.554896
75%,0.843757,0.622591
max,2.70685,0.740122


In [81]:
df5["A"].value_counts(dropna = False)  # 

 0.651118    1
 0.528813    1
 0.907969    1
 2.706850    1
-0.848077    1
-2.018168    1
Name: A, dtype: int64

In [82]:
df5["A"].nunique()  # 6 tane unique deger var

6

In [83]:
df5.groupby("B")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002ADDA6463C8>

In [84]:
df5.groupby("B").mean()

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
-0.589001,0.528813
-0.319318,0.651118
0.503826,0.907969
0.605965,-0.848077
0.628133,2.70685
0.740122,-2.018168


In [85]:
data = {'Company' : ['GOOG', 'GOOG', 'MSFT', 'MSFT', 'FB', 'FB'],
       'Person' : ['Sam', 'Charlie', 'Amy', 'Vanessa', 'Carl', 'Sarah'],
       'Sales' : [200, 120, 340, 124, 243, 350]}

In [86]:
df1 = pd.DataFrame(data)

In [87]:
df1

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [88]:
by_comp = df1.groupby("Company")

In [89]:
by_comp.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [90]:
by_comp.std()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,75.660426
GOOG,56.568542
MSFT,152.735065


In [91]:
by_comp.min()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,Carl,243
GOOG,Charlie,120
MSFT,Amy,124


In [92]:
by_comp.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [93]:
by_comp.max()

Unnamed: 0_level_0,Person,Sales
Company,Unnamed: 1_level_1,Unnamed: 2_level_1
FB,Sarah,350
GOOG,Sam,200
MSFT,Vanessa,340


In [94]:
by_comp.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [95]:
by_comp.describe().T

Unnamed: 0,Company,FB,GOOG,MSFT
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,160.0,232.0
Sales,std,75.660426,56.568542,152.735065
Sales,min,243.0,120.0,124.0
Sales,25%,269.75,140.0,178.0
Sales,50%,296.5,160.0,232.0
Sales,75%,323.25,180.0,286.0
Sales,max,350.0,200.0,340.0


In [97]:
by_comp.describe().T[["GOOG"]]

Unnamed: 0,Company,GOOG
Sales,count,2.0
Sales,mean,160.0
Sales,std,56.568542
Sales,min,120.0
Sales,25%,140.0
Sales,50%,160.0
Sales,75%,180.0
Sales,max,200.0


In [99]:
df2 = pd.DataFrame({'col1' : [1, 2, 3, 4],
                  'col2' : [444, 555, 666, 444],
                  'col3' : ['abc', 'def', 'ghi', 'xyz']})

In [100]:
df2["col2"].unique()

array([444, 555, 666], dtype=int64)

In [102]:
df2["col2"].nunique()

3

In [101]:
df2["col2"].value_counts(dropna= False)

444    2
555    1
666    1
Name: col2, dtype: int64

In [103]:
newdf = df2[(df2["col1"] > 2) & (df2["col2"]==444)]

In [104]:
newdf

Unnamed: 0,col1,col2,col3
3,4,444,xyz


In [106]:
df2["col1"].sum()

10

In [107]:
del df2["col1"]  # inplace ile yaptigimiz dropu böyle de yapabiliyoruz

In [108]:
df2

Unnamed: 0,col2,col3
0,444,abc
1,555,def
2,666,ghi
3,444,xyz


In [109]:
df2.columns

Index(['col2', 'col3'], dtype='object')

In [110]:
df2.index

RangeIndex(start=0, stop=4, step=1)

In [111]:
df2

Unnamed: 0,col2,col3
0,444,abc
1,555,def
2,666,ghi
3,444,xyz


In [113]:
df2.sort_values(by = "col2")  # col2 yi baz alarak degerleri sirala

Unnamed: 0,col2,col3
0,444,abc
3,444,xyz
1,555,def
2,666,ghi


In [114]:
df2.isnull()  # isnull boslara true verir

Unnamed: 0,col2,col3
0,False,False
1,False,False
2,False,False
3,False,False


In [115]:
df2.dropna()  # hepsi duruyor Nan gitmemis cünkü yok

Unnamed: 0,col2,col3
0,444,abc
1,555,def
2,666,ghi
3,444,xyz


In [116]:
df3 = pd.DataFrame({'col1' : [1, 2, 3, np.nan],
                  'col2' : [np.nan, 555, 666, 444],
                  'col3' : ['abc', 'def', 'ghi', 'xyz']})

In [117]:
df3

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz


In [118]:
df3.fillna("süt")

Unnamed: 0,col1,col2,col3
0,1,süt,abc
1,2,555,def
2,3,666,ghi
3,süt,444,xyz


In [119]:
df3

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz


In [121]:
df3.dropna()  # axis=0 oldugu icin NaN satirlar gitti. 

Unnamed: 0,col1,col2,col3
1,2.0,555.0,def
2,3.0,666.0,ghi


In [122]:
df3.dropna(how="all")  # tüm satirlar NaN ise siler yoksa silmez

Unnamed: 0,col1,col2,col3
0,1.0,,abc
1,2.0,555.0,def
2,3.0,666.0,ghi
3,,444.0,xyz
