# Yararlı Fonksiyonlar

## .apply() metodu

**.apply()** yöntemi DataFrame'in sütunlarına özel fonksiyonlar uygulamamıza ve yayınlamamıza olanak tanır. Bir Series'deki her satıra kendi özel python fonksiyonunuzu uygulamamıza izin verir ve girdi olarak bir veya birden fazla sütun alabilir.

In [2]:
import numpy as np
import pandas as pd

In [4]:
df = pd.read_csv('tips.csv')

In [5]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


### Özel Fonksiyon ile .apply() yöntemini kullanma:

In [7]:
# Bir string dizisinin son 4 basamağını veren fonksiyon tanımlama

def last_four(num):
    return str(num)[-4:]

# Pandas'ta bu fonksiyonunun işlevini yapan yerleşik bir yöntem yoktur. 
# Bu gibi durumlarda .apply() yöntemi devreye girer.

#### Negatif indeks
Negatif indeks sondan başla, dört indeks konumu geri git ve sonra iki nokta üst üste ile indeksin sonuna kadar git anlamına gelir. Yani eksi dörtten başlayarak indeksin sonuna kadar gidin.

In [8]:
df['CC Number'][0]

3560325168603410

In [9]:
last_four(3560325168603410)

'3410'

In [10]:
df['last_four'] = df['CC Number'].apply(last_four) 
# df'te yeni bir sütuna kayıt etme

In [11]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221


### Daha kompleks fonksiyonlarla .apply() yöntemini kullanma

In [14]:
df['total_bill'].mean()

19.785942622950824

In [88]:
def yelp(price):
    if price < 10:
        return '$'
    elif price >= 10 and price < 30:
        return '$$'
    else:
        return '$$$'

In [89]:
df['Expensive'] = df['total_bill'].apply(yelp)

In [90]:
df.tail(5)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
239,29.03,5.92,M,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657,2842,$$,Other
240,27.18,2.0,F,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766,5404,$$,Other
241,22.67,2.0,M,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880,7196,$$,Other
242,17.82,1.75,M,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17,950,$$,Other
243,18.78,3.0,F,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672,8139,$$,Other


### lambda ifadesi ile .apply() yöntemini kullanma

In [91]:
def simple(num):
    return num*2

In [92]:
# yukarıdaki fonksiyonun eşdeğer lambda ifadesi:
lambda num: num*2

<function __main__.<lambda>(num)>

In [93]:
# total_bill sütunun elemanlarına lambda ifadesi uyguluma
df['total_bill'].apply(lambda bill: bill*0.18) # %18 KDV hesabı

0      3.0582
1      1.8612
2      3.7818
3      4.2624
4      4.4262
        ...  
239    5.2254
240    4.8924
241    4.0806
242    3.2076
243    3.3804
Name: total_bill, Length: 244, dtype: float64

## Çoklu sütuna .apply() yöntemini uygulama
Bunu yapmanın birkaç yolu olduğunu unutmayın:

In [94]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [95]:
# özel fonksiyon tanımlama

def quality(total_bill,tip):
    if tip/total_bill > 0.25:
        return "Generous"
    else:
        return "Other"

In [96]:
df['Tip Quality'] = df[['total_bill','tip']].apply(lambda df: quality(df['total_bill'],df['tip']),axis=1)

**ADIMLAR**
- Fonksiyonunuzda kullanacağınız sütunları liste olarak seçin
- DataFrame üzerinde lambda ifadesini çağırın
- Ardından özel fonksiyonu girin 
- Sütuna uygulayacağınızı belirtin (axis=1)

Böylece çoklu sütuna (axis=1 ile belirttik) apply yöntemi ile özel bir fonksiyon uygulamış oluruz.

In [97]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [98]:
len(df[df['Tip Quality']=='Generous']) / len(df)

0.040983606557377046

- Yukarıdaki işlemleri çok daha hızlı yapan bir diğer yol: **np.vectorize** Numpy fonksiyonu

**np.vectorize**'ın amacı, ondalıklı sayıları girdi olarak alıp, ondalıklı sayıları çıktı olarak döndüren fonksiyonları, numpy dizileri üzerinde çalışabilen (ve döndüren) fonksiyonlara dönüştürmektir.

numpy.vectorize, bir f:a->b fonksiyonunu alır ve onu g:a[]->b[] 'ye dönüştürür.

In [99]:
df['Tip Quality'] = np.vectorize(quality)(df['total_bill'],df['tip'])

In [100]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [101]:
import timeit 
  
# yalnızca bir kez yürütülecek kod parçacığı 
setup = '''
import numpy as np
import pandas as pd
df = pd.read_csv('tips.csv')
def quality(total_bill,tip):
    if tip/total_bill  > 0.25:
        return "Generous"
    else:
        return "Other"
'''
  
# yürütme süresini ölçen kod parçacığı
stmt_one = ''' 
df['Tip Quality'] = df[['total_bill','tip']].apply(lambda df: quality(df['total_bill'],df['tip']),axis=1)
'''

stmt_two = '''
df['Tip Quality'] = np.vectorize(quality)(df['total_bill'], df['tip'])
'''

In [102]:
timeit.timeit(setup = setup, 
              stmt = stmt_one, 
              number = 1000) 

2.180474916000094

In [103]:
timeit.timeit(setup = setup, 
              stmt = stmt_two, 
              number = 1000) 

0.16947766599878378

### İstatistiksel özetler için .describe() yöntemi

In [104]:
df.describe()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [105]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.78594,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9510998,1.0,2.0,2.0,3.0,6.0
price_per_person,244.0,7.888197,2.914234,2.88,5.8,7.255,9.39,20.27
CC Number,244.0,2563496000000000.0,2369340000000000.0,60406790000.0,30407310000000.0,3525318000000000.0,4553675000000000.0,6596454000000000.0


### sort_values() : Bir sütundaki verileri sıralama yöntemi


In [106]:
df.sort_values('tip',ascending=False)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
170,50.81,10.00,M,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954,8236,$$$,Other
212,48.33,9.00,M,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Other
23,39.42,7.58,M,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Other
59,48.27,6.73,M,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,0595,$$$,Other
141,34.30,6.70,M,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
236,12.60,1.00,M,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Other
111,7.25,1.00,F,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Other
67,3.07,1.00,F,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Generous


In [107]:
# # Bir sıralamadan sonra başka bir sütuna göre yeniden sıralamak istersek

df.sort_values(['tip','size'])

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
67,3.07,1.00,F,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Generous
111,7.25,1.00,F,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Other
92,5.75,1.00,F,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780,6392,$,Other
236,12.60,1.00,M,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Other
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,34.30,6.70,M,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Other
59,48.27,6.73,M,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,0595,$$$,Other
23,39.42,7.58,M,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Other
212,48.33,9.00,M,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Other


## Korelasyon için df.corr() 
Sadece nümerik sütunların korelasyonunu döndürür

In [108]:
df.corr()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
total_bill,1.0,0.675734,0.598315,0.647554,0.104576
tip,0.675734,1.0,0.489299,0.347405,0.110857
size,0.598315,0.489299,1.0,-0.175359,-0.030239
price_per_person,0.647554,0.347405,-0.175359,1.0,0.13524
CC Number,0.104576,0.110857,-0.030239,0.13524,1.0


In [109]:
df[['total_bill','size']].corr()

Unnamed: 0,total_bill,size
total_bill,1.0,0.598315
size,0.598315,1.0


### idxmin ve idxmax
min ve max değerlerin indeks konumunu döndürür

In [110]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [111]:
df['total_bill'].max()

50.81

In [112]:
df['total_bill'].idxmax()

170

In [113]:
df.iloc[170]

total_bill                     50.81
tip                             10.0
sex                                M
smoker                           Yes
day                              Sat
time                          Dinner
size                               3
price_per_person               16.94
Payer Name             Gregory Clark
CC Number           5473850968388236
Payment ID                   Sat1954
last_four                       8236
Expensive                        $$$
Tip Quality                    Other
Name: 170, dtype: object

In [114]:
df['total_bill'].idxmin()

67

In [115]:
df.iloc[67]

total_bill                      3.07
tip                              1.0
sex                                F
smoker                           Yes
day                              Sat
time                          Dinner
size                               1
price_per_person                3.07
Payer Name             Tiffany Brock
CC Number           4359488526995267
Payment ID                   Sat3455
last_four                       5267
Expensive                          $
Tip Quality                 Generous
Name: 67, dtype: object

In [116]:
df['total_bill'].idxmax()

170

### value_counts() metodu

Kategorik verilerin sayısını hızlı bir şekilde alan bir yöntemdir ve yalnızca kategorik sütunlarda anlamlıdır.

In [117]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [118]:
df['size'].value_counts()

2    156
3     38
4     37
5      5
1      4
6      4
Name: size, dtype: int64

In [119]:
df['sex'].value_counts()

M    157
F     87
Name: sex, dtype: int64

### replace()

Değerleri hızlıca başka bir değerle değiştirme metodu.

In [120]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,F,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,M,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,M,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,M,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,F,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


In [121]:
# Tip Quality sütunundaki 'Other' değerleri yerine 'OK' yerleştirme
df['Tip Quality'].replace(to_replace='Generous',value='Cömert')

0      Other
1      Other
2      Other
3      Other
4      Other
       ...  
239    Other
240    Other
241    Other
242    Other
243    Other
Name: Tip Quality, Length: 244, dtype: object

In [122]:
# df'de değişikliğin oluşması için
df['Tip Quality'] = df['Tip Quality'].replace(to_replace='Generous',value='Cömert')

In [123]:
df.tail(25)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
219,30.14,3.09,F,Yes,Sat,Dinner,4,7.54,Shelby House,502097403252,Sat8863,3252,$$$,Other
220,12.16,2.2,M,Yes,Fri,Lunch,2,6.08,Ricky Johnson,213109508670736,Fri4607,736,$$,Other
221,13.42,3.48,F,Yes,Fri,Lunch,2,6.71,Leslie Kaufman,379437981958785,Fri7511,8785,$$,Cömert
222,8.58,1.92,M,Yes,Fri,Lunch,1,8.58,Jason Lawrence,3505302934650403,Fri6624,403,$,Other
223,15.98,3.0,F,No,Fri,Lunch,3,5.33,Mary Rivera,5343428579353069,Fri6014,3069,$$,Other
224,13.42,1.58,M,Yes,Fri,Lunch,2,6.71,Ronald Vaughn DVM,341503466406403,Fri5959,6403,$$,Other
225,16.27,2.5,F,Yes,Fri,Lunch,2,8.14,Whitney Arnold,3579111947217428,Fri6665,7428,$$,Other
226,10.09,2.0,F,Yes,Fri,Lunch,2,5.04,Ruth Weiss,5268689490381635,Fri6359,1635,$$,Other
227,20.45,3.0,M,No,Sat,Dinner,4,5.11,Robert Bradley,213141668145910,Sat4319,5910,$$,Other
228,13.28,2.72,M,No,Sat,Dinner,2,6.64,Glenn Jones,502061651712,Sat2937,1712,$$,Other


In [124]:
df['sex'].replace(['Female', 'Male'],['F','M'])

0      F
1      M
2      M
3      M
4      F
      ..
239    M
240    F
241    M
242    M
243    F
Name: sex, Length: 244, dtype: object

### unique() ve nunique() metodları

In [125]:
df['size'].unique()

array([2, 3, 4, 1, 6, 5])

In [126]:
df['size'].nunique() # unique elemanların sayısını verir.

6

In [127]:
df['time'].unique()

array(['Dinner', 'Lunch'], dtype=object)

### map
Map metodu ile bir sözlük eşleşmesi oluştulur. Birden fazla veri değerini değiştirmek isterseniz map yöntemi daha kullanışlıdır.

In [128]:
my_map = {'Female':'F','Male':'M'}

In [129]:
df['sex'].map(my_map)

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
      ... 
239    NaN
240    NaN
241    NaN
242    NaN
243    NaN
Name: sex, Length: 244, dtype: object

In [130]:
df['sex'] = df['sex'].map({'Female':'F','Male':'M'})

In [131]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
2,21.01,3.5,,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,1322,$$,Other
3,23.68,3.31,,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,5994,$$,Other
4,24.59,3.61,,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,7221,$$,Other


## Duplicates 
Tekrar eden satırları belirlemekte kullanılır.

### .duplicated() ve .drop_duplicates()

In [132]:
# Yinelenen bir satırın ilk örneğinde itibaren True döndürür
df.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
239    False
240    False
241    False
242    False
243    False
Length: 244, dtype: bool

In [133]:
simple_df = pd.DataFrame([1,2,2,2],['a','b','c','d'],columns=['Data'])

In [134]:
simple_df

Unnamed: 0,Data
a,1
b,2
c,2
d,2


In [135]:
simple_df.duplicated()

a    False
b    False
c     True
d     True
dtype: bool

In [136]:
# yinelenen satırları atan kod
simple_df.drop_duplicates() 

Unnamed: 0,Data
a,1
b,2


## between() Metodu

- **left:** Sol sınır skaler değeri
- **right:** Sağ sınır skaler değeri
- **inclusive:** “both” (varsayılan), “neither”, “left”, “right” değerlerini alabilir. “both” ise, iki sınırıda dahil eder, “neither” ise sınırlar hariç, "left" sol sınır dahil ve "right" sağ sınırı dahil eder.

In [137]:
# 10$ ve 20$ arasındaki toplam fatura sayısını alma
df['total_bill'].between(10,20,inclusive='both') 

0       True
1       True
2      False
3      False
4      False
       ...  
239    False
240    False
241    False
242     True
243     True
Name: total_bill, Length: 244, dtype: bool

In [138]:
sum(df['total_bill'].between(10,20,inclusive='both'))

130

In [139]:
df[df['total_bill'].between(10,20,inclusive='both')]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
0,16.99,1.01,,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
1,10.34,1.66,,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,9230,$$,Other
8,15.04,1.96,,No,Sun,Dinner,2,7.52,Joseph Mcdonald,3522866365840377,Sun6820,0377,$$,Other
9,14.78,3.23,,No,Sun,Dinner,2,7.39,Jerome Abbott,3532124519049786,Sun3775,9786,$$,Other
10,10.27,1.71,,No,Sun,Dinner,2,5.14,William Riley,566287581219,Sun2546,1219,$$,Other
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,15.53,3.00,,Yes,Sat,Dinner,2,7.76,Tracy Douglas,4097938155941930,Sat7220,1930,$$,Other
235,10.07,1.25,,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605,Sat4615,7605,$$,Other
236,12.60,1.00,,Yes,Sat,Dinner,2,6.30,Matthew Myers,3543676378973965,Sat5032,3965,$$,Other
242,17.82,1.75,,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17,0950,$$,Other


In [140]:
s = pd.Series([2, 0, 4, 8, np.nan])

In [141]:
s.between(1, 4)

0     True
1    False
2     True
3    False
4    False
dtype: bool

In [142]:
s.between(1, 4, inclusive='neither')

0     True
1    False
2    False
3    False
4    False
dtype: bool

## sample()
DataFrame'in belli sayıdaki satırını örnekleyen metod.

In [143]:
df.sample(5) # rastgele 5 satırı örnekleme

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
119,24.08,2.92,,No,Thur,Lunch,4,6.02,Melanie Jordan,676212062720,Thur8063,2720,$$,Other
129,22.82,2.18,,No,Thur,Lunch,3,7.61,Raymond Torres,4855776744024,Thur9424,4024,$$,Other
229,22.12,2.88,,Yes,Sat,Dinner,2,11.06,Jennifer Russell,4793003293608,Sat3943,3608,$$,Other
132,11.17,1.5,,No,Thur,Lunch,2,5.58,Taylor Gonzalez,6011990685390011,Thur7783,11,$$,Other
185,20.69,5.0,,No,Sun,Dinner,5,4.14,Joseph Howell,30362407455623,Sun5842,5623,$$,Other


In [144]:
df.sample(frac=0.1) # verinin yanlızca %10'nunu örnekleme.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
15,21.58,3.92,,No,Sun,Dinner,2,10.79,Matthew Reilly,180073029785069,Sun1878,5069,$$,Other
11,35.26,5.0,,No,Sun,Dinner,4,8.82,Diane Macias,4577817359320969,Sun6686,969,$$$,Other
12,15.42,1.57,,No,Sun,Dinner,2,7.71,Chad Harrington,577040572932,Sun1300,2932,$$,Other
86,13.03,2.0,,No,Thur,Lunch,2,6.52,Derek Thomas,213161022097557,Thur6793,7557,$$,Other
106,20.49,4.06,,Yes,Sat,Dinner,2,10.24,Karl Mcdaniel,180024452771522,Sat7865,1522,$$,Other
88,24.71,5.85,,No,Thur,Lunch,2,12.36,Roger Taylor,4410248629955,Thur9003,9955,$$,Other
232,11.61,3.39,,No,Sat,Dinner,2,5.8,James Taylor,6011482917327995,Sat2124,7995,$$,Cömert
143,27.05,5.0,,No,Thur,Lunch,6,4.51,Regina Jones,4311048695487,Thur6179,5487,$$,Other
223,15.98,3.0,,No,Fri,Lunch,3,5.33,Mary Rivera,5343428579353069,Fri6014,3069,$$,Other
30,9.55,1.45,,No,Sat,Dinner,2,4.78,Grant Hall,30196517521548,Sat4099,1548,$,Other


## nlargest ve nsmallest

In [145]:
# En büyük bahşişe (tip) sahip 10 satırı listeleme
df.nlargest(10,'tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
170,50.81,10.0,,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954,8236,$$$,Other
212,48.33,9.0,,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Other
23,39.42,7.58,,No,Sat,Dinner,4,9.86,Lance Peterson,3542584061609808,Sat239,9808,$$$,Other
59,48.27,6.73,,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139,595,$$$,Other
141,34.3,6.7,,No,Thur,Lunch,6,5.72,Steven Carlson,3526515703718508,Thur1025,8508,$$$,Other
183,23.17,6.5,,Yes,Sun,Dinner,4,5.79,Dr. Michael James,4718501859162,Sun6059,9162,$$,Cömert
214,28.17,6.5,,Yes,Sat,Dinner,3,9.39,Marissa Jackson,4922302538691962,Sat3374,1962,$$,Other
47,32.4,6.0,,No,Sun,Dinner,4,8.1,James Barnes,3552002592874186,Sun9677,4186,$$$,Other
239,29.03,5.92,,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657,2842,$$,Other
88,24.71,5.85,,No,Thur,Lunch,2,12.36,Roger Taylor,4410248629955,Thur9003,9955,$$,Other


In [146]:
# En büyük 2 bahşişi listeleme
#df.sort_values('tip',ascending=False).iloc[0:2]
df.nlargest(2,'tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
170,50.81,10.0,,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954,8236,$$$,Other
212,48.33,9.0,,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590,5212,$$$,Other


In [147]:
# En küçük bahşişe sahip 10 satırı listeleme
df.nsmallest(10,'tip')

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,last_four,Expensive,Tip Quality
67,3.07,1.0,,Yes,Sat,Dinner,1,3.07,Tiffany Brock,4359488526995267,Sat3455,5267,$,Cömert
92,5.75,1.0,,Yes,Fri,Dinner,2,2.88,Leah Ramirez,3508911676966392,Fri3780,6392,$,Other
111,7.25,1.0,,No,Sat,Dinner,1,7.25,Terri Jones,3559221007826887,Sat4801,6887,$,Other
236,12.6,1.0,,Yes,Sat,Dinner,2,6.3,Matthew Myers,3543676378973965,Sat5032,3965,$$,Other
0,16.99,1.01,,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,3410,$$,Other
215,12.9,1.1,,Yes,Sat,Dinner,2,6.45,Jessica Owen,4726904879471,Sat6983,9471,$$,Other
237,32.83,1.17,,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929,5508,$$$,Other
75,10.51,1.25,,No,Sat,Dinner,2,5.26,Kenneth Hayes,213142079731108,Sat5056,1108,$$,Other
135,8.51,1.25,,No,Thur,Lunch,2,4.26,Rebecca Harris,4320272020376174,Thur6600,6174,$,Other
235,10.07,1.25,,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605,Sat4615,7605,$$,Other


# SON!