# SERIES

In [9]:
import numpy as np
import pandas as pd

In [None]:
# help(pd.Series)

In [3]:
myindex = ['USA', 'Canada', 'Mexico']

In [4]:
mydata = [1776, 1867, 1821]

In [5]:
pd.Series(data = mydata)

# index parametresini girmediğimiz için default olarak 0 dan başlayan rakamları atadı.

0    1776
1    1867
2    1821
dtype: int64

In [6]:
myser = pd.Series(data = mydata, index=myindex)

In [7]:
myser

USA       1776
Canada    1867
Mexico    1821
dtype: int64

In [None]:
myser = pd.Series(mydata, myindex)

# parametreleri positional arguman olarak sırasıyla girdiğimiz için data= ve index = dememize gerek kalmadı

In [8]:
type(myser)

pandas.core.series.Series

In [62]:
myser[0]

# 0 indeksindeki datayı indexleyerek çağıralım

1776

In [63]:
myser['USA']
# aynı işlemi 0 indeksindeki 'USA' stringini kullanarak da yapabiliriz

1776

In [66]:
# şimdi bir dictionary oluşturup bunu Series'e dönüştürlerim.

ages = {'Sam':5, 'Frank':10, 'Spike':7}

In [67]:
pd.Series(ages)

# dict.in key'lerini index, value'larını data yaptı.

Sam       5
Frank    10
Spike     7
dtype: int64

In [2]:
# Imaginary Sales Data for 1st and 2nd Quarters for Global Company
q1 = {'Japan': 80, 'China': 450, 'India': 200, 'USA': 250}
q2 = {'Brazil': 100,'China': 500, 'India': 210,'USA': 260}

In [3]:
sales_q1 = pd.Series(q1)

In [4]:
sales_q2 = pd.Series(q2)

In [5]:
sales_q1

Japan     80
China    450
India    200
USA      250
dtype: int64

In [6]:
sales_q2

Brazil    100
China     500
India     210
USA       260
dtype: int64

In [8]:
sales_q1['Japan']

# q1 serisinden Japan'a karşılık gelen value'yu label olarak indeksledim.

80

In [9]:
sales_q1[0]

# aynı value'yu indeks numarası ile indeksledim.

80

In [10]:
sales_q1['japan']

# key sensitive olduğu için küçük j ile yazdığımda hata verdi. 
# japan dan sonra bir boşluk koyarsam da key eror verir. indeksi tam olarak doğru yazmak gerekiyor.

KeyError: 'japan'

In [11]:
sales_q1.keys()

# q1 serisinin keylerini yani indekslerini getirdi.

Index(['Japan', 'China', 'India', 'USA'], dtype='object')

In [12]:
[1,2] * 2

# 2 ile çarpınca listeyi double  yaptı. ama array söz konusu olduğunda bakalım nasıl olarak..

[1, 2, 1, 2]

In [13]:
np.array([1,2]) * 2

# numpy array'i 2 ile çarparsak her bir elemanını 2 ile çarpar!

array([2, 4])

In [14]:
sales_q1 * 2

# sales_q1 Serie'sinin elemanlarını 2 ile çarpacak.

Japan    160
China    900
India    400
USA      500
dtype: int64

In [16]:
sales_q1 / 100

# sales_q1 Serie'sinin elemanlarını 100 e bölecek

Japan    0.8
China    4.5
India    2.0
USA      2.5
dtype: float64

In [17]:
sales_q1

Japan     80
China    450
India    200
USA      250
dtype: int64

In [19]:
sales_q2

Brazil    100
China     500
India     210
USA       260
dtype: int64

In [18]:
sales_q1 + sales_q2

# indeksleri aynı olanları topladı. farklı olanlara NaN  value'sunu atadı.
# bunu her indekse uyguladığı için önce sales_q1 deki Brasil satırı için NaN değerini atadı.
# sonra da sales_q2'deki Japan satırı için NaN değerini atadı.

# herhangi bir aritmetik işlem yapıldığında integer olan değerlerin float'a dönüştüğüne dikkat et!

Brazil      NaN
China     950.0
India     410.0
Japan       NaN
USA       510.0
dtype: float64

In [20]:
sales_q1.add(sales_q2, fill_value = 0)

# add() metodunu kullanarak iki serie yi birleştiriyoruz. Burada q1'i q2 ile birleştirdik.

# fill_value parametresine 0 değeri atayarak toplama yapılan indekler farklı da olsa 
# birinin değerini tutup diğerine 0 değeri veriyor.
 

Brazil    100.0
China     950.0
India     410.0
Japan      80.0
USA       510.0
dtype: float64

In [21]:
sales_q1.dtype

# orjinal sales_q1'in tipinin integer olduğunu gördük.

dtype('int64')

In [22]:
first_half = sales_q1.add(sales_q2, fill_value = 0)

first_half.dtype

# add işleminden sonra sonucun folat'a döndüğünü gördük.

dtype('float64')

# DATA FRAMES

In [None]:
"""
How to create a DataFrame
Grab a column or multiple colums
Grab a row or multiple rows
Insert a new column or new row

Selection and Indexing
Creating new columns
Removing colums
"""

# Part1 : Creating a DataFrame

## DataFrame() fonksiyonu ile DataFrame oluşturma

In [2]:
np.random.seed(101)
mydata = np.random.randint(0,101,(4,3))

In [3]:
mydata

array([[95, 11, 81],
       [70, 63, 87],
       [75,  9, 77],
       [40,  4, 63]])

In [4]:
myindex = ['CA', 'NY', 'AZ', 'TX']

In [5]:
mycolumns = ['Jan', 'Fab', 'Mar']

In [6]:
df = pd.DataFrame(mydata)  # df, dataframe i temsil ediyor. 

# DataFrame metodunun içine index ve column parametrelerini belirtmezsen default olarak
# bunları 0 dan başlayan integer rakamalar olarak belirler

In [7]:
df

Unnamed: 0,0,1,2
0,95,11,81
1,70,63,87
2,75,9,77
3,40,4,63


In [38]:
df = pd.DataFrame(data = mydata, index = myindex)

# index parametresine myindex değişkeninin içeriğini atadık

In [39]:
df

Unnamed: 0,0,1,2
CA,95,11,81
NY,70,63,87
AZ,75,9,77
TX,40,4,63


In [42]:
df = pd.DataFrame(data = mydata, index = myindex, columns = mycolumns)

# colums parametresine mycolums değişkeninin içeriğini atadık

In [43]:
df

Unnamed: 0,Jan,Fab,Mar
CA,95,11,81
NY,70,63,87
AZ,75,9,77
TX,40,4,63


In [44]:
df.info()

# dataframe hakkındaki index ve column bilgileri ile memory kullanım bilgisini sergiler

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, CA to TX
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Jan     4 non-null      int32
 1   Fab     4 non-null      int32
 2   Mar     4 non-null      int32
dtypes: int32(3)
memory usage: 80.0+ bytes


## bir dosya çağırarak..

In [None]:
# WHERE IS MY PYTHON CODE LOCATED?

In [45]:
pwd

'C:\\Users\\hüseyin'

In [46]:
ls

# working directory'i listeleyelim

 Volume in drive C has no label.
 Volume Serial Number is 6815-6880

 Directory of C:\Users\h�seyin

09.06.2021  19:23    <DIR>          .
09.06.2021  19:23    <DIR>          ..
28.04.2021  13:34             8.026 .bash_history
22.04.2021  19:59               180 .gitconfig
13.03.2021  20:49    <DIR>          .idlerc
09.06.2021  19:23    <DIR>          .ipynb_checkpoints
13.03.2021  21:13    <DIR>          .ipython
13.03.2021  21:16    <DIR>          .jupyter
07.06.2021  21:57    <DIR>          .matplotlib
22.04.2021  19:53            17.173 .viminfo
27.03.2021  23:09    <DIR>          .VirtualBox
12.04.2021  22:45    <DIR>          .vscode
14.05.2021  18:16            19.851 00_1-SelfStudy-d”ng�ler-break - continue - for.ipynb
14.05.2021  15:27             2.998 00_2-SelfStudy-listenin elemanlar�n� unique yapma.ipynb
16.05.2021  13:00            26.547 00_3-SelfStudy-zip-lambda.ipynb
16.05.2021  01:00             5.199 00_4- w3resource €al�Ÿmalar�m.ipynb
15.05.2021  22:54            3

In [32]:
df = pd.read_csv('tips.csv')

# bu noktada google dan tips.csv dosyasını bulup github'a fork edip bunu working directory'e clone yaptım. 
# sonra da df pd.read_csv() içine bu doyanın adını yazdım.

In [13]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [33]:
df = pd.read_csv('C:\\Users\\hüseyin\\tips.csv')

 # bu şekilde dosya yolunu da yazabiliriz.

In [34]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


In [27]:
# seaborn kütüphanesini import ederek tips.csv dosyasını indirebiliriz.

import matplotlib.pyplot as plt
import seaborn as sns

In [28]:
df=sns.load_dataset("tips")
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


# Part2 : Basic Properties

In [37]:
df.columns

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size',
       'price_per_person', 'Payer Name', 'CC Number', 'Payment ID'],
      dtype='object')

In [36]:
df.index

# başlangıç ve bitiş index numaralarını ve step adımını gösterir.

RangeIndex(start=0, stop=244, step=1)

In [38]:
df.head()

# bir parametre belirtilmediyse ilk 5 satırı getirir.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [17]:
df.head(10)

# ilk 10 satırı getirir.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.0,Male,No,Sun,Dinner,2
7,26.88,3.12,Male,No,Sun,Dinner,4
8,15.04,1.96,Male,No,Sun,Dinner,2
9,14.78,3.23,Male,No,Sun,Dinner,2


In [39]:
df.tail()

# bir parametre belirtilmediyse son 5 satırı getirir.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.0,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.0,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
243,18.78,3.0,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672


In [40]:
df.tail(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
243,18.78,3.0,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672


In [41]:
df.info()

# data frame ile ilgili bazı bilgileri verir. 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


In [42]:
df.describe()

# Generate descriptive statistics

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [43]:
df.describe().transpose()

# transpose ile satırlarla sütunları yer değiştirdik ve bu sayede görünümü daha anlaşılır hale getirdik.

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.78594,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9510998,1.0,2.0,2.0,3.0,6.0
price_per_person,244.0,7.888197,2.914234,2.88,5.8,7.255,9.39,20.27
CC Number,244.0,2563496000000000.0,2369340000000000.0,60406790000.0,30407310000000.0,3525318000000000.0,4553675000000000.0,6596454000000000.0


# part3 Working with Columns

In [44]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [45]:
df['total_bill']

# total_bill sütununu gördük. bu bir pandas serisidir. tipine bakıp öyle olduğunu görelim.

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [46]:
type(df['total_bill'])

# sütunun bir panda serisi olduğunu gördük

pandas.core.series.Series

In [48]:
# az önce single bir column grab ettik. şimdi de multiple column grab edelim.

mycols = ['total_bill', 'tip']

df[mycols]

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.50
3,23.68,3.31
4,24.59,3.61
...,...,...
239,29.03,5.92
240,27.18,2.00
241,22.67,2.00
242,17.82,1.75


In [49]:
print(type(df[mycols]))

<class 'pandas.core.frame.DataFrame'>


In [27]:
df[['total_bill', 'tip']]

# bu şekilde de sütunları grab edebilirdik.
# çift köşeli parantez olduğuna dikkat et!!

Unnamed: 0,total_bill,tip
0,16.99,1.01
1,10.34,1.66
2,21.01,3.50
3,23.68,3.31
4,24.59,3.61
...,...,...
239,29.03,5.92
240,27.18,2.00
241,22.67,2.00
242,17.82,1.75


In [28]:
df['tip'] + df['total_bill']

# iki sütunu aritmetik işleme tabi tutabiliyoruz

0      18.00
1      12.00
2      24.51
3      26.99
4      28.20
       ...  
239    34.95
240    29.18
241    24.67
242    19.57
243    21.78
Length: 244, dtype: float64

In [29]:
df['tip'] / df['total_bill']

0      0.059447
1      0.160542
2      0.166587
3      0.139780
4      0.146808
         ...   
239    0.203927
240    0.073584
241    0.088222
242    0.098204
243    0.159744
Length: 244, dtype: float64

In [30]:
100 * df['tip'] / df['total_bill']

# böylece örneği; ilk sıradaki bill'in toplam bill'e oranının yaklaşık %6 olduğun anlıyoruz.
# ikinci sıradaki bill in ise %16 olduğunu görebiliyoruz. gibi gibi..

0       5.944673
1      16.054159
2      16.658734
3      13.978041
4      14.680765
         ...    
239    20.392697
240     7.358352
241     8.822232
242     9.820426
243    15.974441
Length: 244, dtype: float64

##  create a new column

In [45]:
df['tip_percentage'] = 100 * df['tip'] / df['total_bill']

# yukardaki ifadeyi tip_percentage'e atayalım. ve yeni bir sütun oluşsun.

# bunu yaparken aynı adla başka bir sütun olmadığından emin olmalısın. 
# çünkü bu durumda eskisinin üzerine yazacaktır.


In [46]:
df.head()

# tip_percentage sütununun oluşduğunu görelim.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,5.944673
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,16.054159
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,16.658734
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,13.978041
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,14.680765


In [33]:
df['price_per_person'] = df['total_bill'] / df['size']

# kişi başı düşen bill için yeni bir sütun oluşturalım

In [34]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_percentage,price_per_person
0,16.99,1.01,Female,No,Sun,Dinner,2,5.944673,8.495000
1,10.34,1.66,Male,No,Sun,Dinner,3,16.054159,3.446667
2,21.01,3.50,Male,No,Sun,Dinner,3,16.658734,7.003333
3,23.68,3.31,Male,No,Sun,Dinner,2,13.978041,11.840000
4,24.59,3.61,Female,No,Sun,Dinner,4,14.680765,6.147500
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,20.392697,9.676667
240,27.18,2.00,Female,Yes,Sat,Dinner,2,7.358352,13.590000
241,22.67,2.00,Male,Yes,Sat,Dinner,2,8.822232,11.335000
242,17.82,1.75,Male,No,Sat,Dinner,2,9.820426,8.910000


In [None]:
#  bölüm işlemlerinde sonuç float olacaktır. fakat pandas, NumPy üzerine kurulu olduğu için 
# bu durumu düzeltecek güzel çözümlerimiz var. aşağıda bunun kullanımını görelim

In [36]:
df['price_per_person'] = np.round(df['total_bill'] / df['size'], 2)

In [37]:
# yukarda kullandığımız np.round() fonksiyonu ve bu fonksiyonda kullandığımız
# 2 parametresi sayesinde df.head()virgülden sonra iki rakam ilerlediğini görelim

price_per_person sütununun 

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_percentage,price_per_person
0,16.99,1.01,Female,No,Sun,Dinner,2,5.944673,8.49
1,10.34,1.66,Male,No,Sun,Dinner,3,16.054159,3.45
2,21.01,3.5,Male,No,Sun,Dinner,3,16.658734,7.0
3,23.68,3.31,Male,No,Sun,Dinner,2,13.978041,11.84
4,24.59,3.61,Female,No,Sun,Dinner,4,14.680765,6.15


## sütun silme (drop)

In [None]:
"""
deleting a column

del df['column_name']

df = df.drop('column_name', 1) # column label ile drop

columns = ['Col1', 'Col2', ...]
df.drop(columns, inplace=True, axis=1)

df.drop(columns=['B', 'C'])

to drop by column number instead of by column label, 
try this to delete, e.g. the 1st, 2nd and 4th columns:
df.drop(df.columns[[0, 1, 3]], axis=1)  # df.columns is zero-based pd.Index

df.drop(df.columns[[0,1,3]], axis=1, inplace=True) # Delete first, second and fourth columns:

df.drop(df.columns[[0]], axis=1, inplace=True) # Delete first column:

"""

In [38]:
df.drop('tip_percentage') 

# axis, default olarak zero (0) olduğundan bu hata verecektir. 
# eğer sütunu drop etmek istiyorsak df.drop() fonsiyonundaki axis parametresini 1 yapmalıyız.

KeyError: "['tip_percentage'] not found in axis"

In [39]:
df.drop('tip_percentage', axis = 1) 

# 9dan 8 sütuna düştüğünü görelim.

# fakat burda df.drop() bir fonksiyon olduğundan işlem sonucu tekrar df'e atanmadığı
# sürece bu değişiklik kalıcı olmayacaktır.. çünkü sadece generate edecektir.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91


In [40]:
df

# df nin hala 9 sütun olduğuna dikkat et. 

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_percentage,price_per_person
0,16.99,1.01,Female,No,Sun,Dinner,2,5.944673,8.49
1,10.34,1.66,Male,No,Sun,Dinner,3,16.054159,3.45
2,21.01,3.50,Male,No,Sun,Dinner,3,16.658734,7.00
3,23.68,3.31,Male,No,Sun,Dinner,2,13.978041,11.84
4,24.59,3.61,Female,No,Sun,Dinner,4,14.680765,6.15
...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,20.392697,9.68
240,27.18,2.00,Female,Yes,Sat,Dinner,2,7.358352,13.59
241,22.67,2.00,Male,Yes,Sat,Dinner,2,8.822232,11.34
242,17.82,1.75,Male,No,Sat,Dinner,2,9.820426,8.91


In [47]:
# şimdi kalıcı olarak (permanently) df yi değiştirelim.

df = df.drop('tip_percentage', axis = 1)

df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91


## axis parametresinde neden 0 satırı, 1 sütunu temsil ediyor?

In [49]:
# bu df.shape komutunun özelliğinden kaynakldıdır.
 # df.shape() yapınca gelen değerlere baktığımızda ilk değer olan 244 satırı
# ikinci değer olan 8 sütunu temsil eder

df.shape

(244, 8)

In [50]:
df.shape[0]

# indekslediğimizde 0 bize satır değerini verecektir.
# yani axis=0 satırı temsil eder.

244

In [51]:
df.shape[1]

# indekslediğimizde 1 bize sütun değerini verecektir.
# yani axis=1 satırı temsil eder.

8

# Part4 : Working with Rows

In [50]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [51]:
df.index

# satırları gördük.

RangeIndex(start=0, stop=244, step=1)

In [53]:
df.set_index("Payment ID")

# Payment ID sütununu index yaptık. dikkat et artık o bir sütun ismi değil, indeks ismi oldu!

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [54]:
df.head()

# yukarda yapılan işlem kalıcı değildi. kalıcı değişiklik olması için ya df'ye bu işlemi atamak gerek
# ya da inplace parametresine True değeri atamak gerek.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [55]:
df = df.set_index("Payment ID")

In [56]:
df.head()

# yapılan işlemin kalıcı olduğunu gördük.

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221


In [63]:
# bu işlemi geri almak için reset_index() metodunu kullanacağız.

df.reset_index()

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
2,Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
3,Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
4,Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
...,...,...,...,...,...,...,...,...,...,...,...
239,Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
240,Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
241,Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
242,Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [64]:
df = df.set_index("Payment ID")

# bu hatayı vermemesi lazım. !!! kerneli restart et.

KeyError: "None of ['Payment ID'] are in the columns"

In [65]:
df.iloc[0]

# iloc ile numeric olarak indeks numarasını giriyoruz. 
# index 0'ı çektik ve bize 0 indeksteki satırın bilgilerini getirdi.

total_bill                       16.99
tip                               1.01
sex                             Female
smoker                              No
day                                Sun
time                            Dinner
size                                 2
price_per_person                  8.49
Payer Name          Christy Cunningham
CC Number             3560325168603410
Name: Sun2959, dtype: object

In [68]:
df.loc['Sun2959']

# loc ile label olarak belirtilen satırı getirdi.

total_bill                       16.99
tip                               1.01
sex                             Female
smoker                              No
day                                Sun
time                            Dinner
size                                 2
price_per_person                  8.49
Payer Name          Christy Cunningham
CC Number             3560325168603410
Name: Sun2959, dtype: object

In [69]:
df.iloc[0:4]

# 0-4 indeksler arasını sliceladım

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994


In [70]:
df.loc[['Sun2959', 'Sun5260']]

# label olarak iki satırı indeksledim.   

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994


In [71]:
df.drop('Sun2959', axis = 0)

# Sun2959 satırını dopr ettik. axis default olarak 0 olduğu için yazmayabilirdik

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [72]:
df

# drop işleminin kalıcı olmadığını gördük.

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [75]:
df

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950


In [76]:
one_row = df.iloc[0]

# 0 indeksindeki bilgileri one_row değişkenine atadım.

In [77]:
one_row

total_bill                     10.34
tip                             1.66
sex                             Male
smoker                            No
day                              Sun
time                          Dinner
size                               3
price_per_person                3.45
Payer Name            Douglas Tucker
CC Number           4478071379779230
Name: Sun4608, dtype: object

In [78]:
df = df.append(one_row)

df

# one_row satırını datasetime append ettim. satır sayısının 244 olduğunu gör.

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882
...,...,...,...,...,...,...,...,...,...,...
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950
Thur672,18.78,3.00,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139


 # Conditional Filtering

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv("tips.csv")

In [3]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [5]:
df['total_bill']

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [6]:
df['total_bill'] > 40

0      False
1      False
2      False
3      False
4      False
       ...  
239    False
240    False
241    False
242    False
243    False
Name: total_bill, Length: 244, dtype: bool

In [15]:
type(df['total_bill'] > 40)

pandas.core.series.Series

In [7]:
bool_series = df['total_bill'] > 40

In [8]:
df[bool_series]

# sadece boolean seriesin True olduğu satırları seçip getirecek
# bunu iki adımda yaptık.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
59,48.27,6.73,Male,No,Sat,Dinner,4,12.07,Brian Ortiz,6596453823950595,Sat8139
95,40.17,4.73,Male,Yes,Fri,Dinner,4,10.04,Aaron Bentley,180026611638690,Fri9628
102,44.3,2.5,Female,Yes,Sat,Dinner,3,14.77,Heather Cohen,379771118886604,Sat6240
142,41.19,5.0,Male,No,Thur,Lunch,5,8.24,Eric Andrews,4356531761046453,Thur3621
156,48.17,5.0,Male,No,Sun,Dinner,6,8.03,Ryan Gonzales,3523151482063321,Sun7518
170,50.81,10.0,Male,Yes,Sat,Dinner,3,16.94,Gregory Clark,5473850968388236,Sat1954
182,45.35,3.5,Male,Yes,Sun,Dinner,3,15.12,Jose Parsons,4112207559459910,Sun2337
184,40.55,3.0,Male,Yes,Sun,Dinner,2,20.27,Stephen Cox,3547798222044029,Sun5140
197,43.11,5.0,Female,Yes,Thur,Lunch,4,10.78,Brooke Soto,5544902205760175,Thur9313
212,48.33,9.0,Male,No,Sat,Dinner,4,12.08,Alex Williamson,676218815212,Sat4590


In [9]:
df[df['total_bill'] > 40]

# seçimi tek adımda yaptık. içerdeki ilk katmanda boole değerler üretti. 
# dışardaki katmanda da o değerlerden True olanları dataFrame olarak gösterdi.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
59,48.27,6.73,Male,No,Sat,Dinner,4
95,40.17,4.73,Male,Yes,Fri,Dinner,4
102,44.3,2.5,Female,Yes,Sat,Dinner,3
142,41.19,5.0,Male,No,Thur,Lunch,5
156,48.17,5.0,Male,No,Sun,Dinner,6
170,50.81,10.0,Male,Yes,Sat,Dinner,3
182,45.35,3.5,Male,Yes,Sun,Dinner,3
184,40.55,3.0,Male,Yes,Sun,Dinner,2
197,43.11,5.0,Female,Yes,Thur,Lunch,4
212,48.33,9.0,Male,No,Sat,Dinner,4


In [10]:
df[df['sex'] == "Male"]

# dataFrame in sex sütunu Male olan satırlarını getirelim.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
...,...,...,...,...,...,...,...
236,12.60,1.00,Male,Yes,Sat,Dinner,2
237,32.83,1.17,Male,Yes,Sat,Dinner,2
239,29.03,5.92,Male,No,Sat,Dinner,3
241,22.67,2.00,Male,Yes,Sat,Dinner,2


In [11]:
df[df['size'] > 3]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
4,24.59,3.61,Female,No,Sun,Dinner,4
5,25.29,4.71,Male,No,Sun,Dinner,4
7,26.88,3.12,Male,No,Sun,Dinner,4
11,35.26,5.0,Female,No,Sun,Dinner,4
13,18.43,3.0,Male,No,Sun,Dinner,4
23,39.42,7.58,Male,No,Sat,Dinner,4
25,17.81,2.34,Male,No,Sat,Dinner,4
31,18.35,2.5,Male,No,Sat,Dinner,4
33,20.69,2.45,Female,No,Sat,Dinner,4
44,30.4,5.6,Male,No,Sun,Dinner,4


In [None]:
# AND & --> BOTH COND. NEED TO BE TRUE

# OR | --> EITHER COND. IS TRUE

In [16]:
1 == 1 or 2 == 3

True

In [19]:
1 == 1 | 2 == 3 

 # ???????

False

In [None]:
df['total_bill'] > 30

df['sex'] == "Male"

In [20]:
df[(df['total_bill'] > 30) and (df['sex'] == "Male")]

# bu hata verecek çünkü and'in her iki tarafındaki conditionda boolean serileri var. 
# tek bir boolean ifade yok.
# bunun yerine pandasda şunu yapıyoruz: & kullanıyoruz.

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [21]:
df[(df['total_bill'] > 30) & (df['sex'] == "Male")]


Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
23,39.42,7.58,Male,No,Sat,Dinner,4
39,31.27,5.0,Male,No,Sat,Dinner,3
44,30.4,5.6,Male,No,Sun,Dinner,4
47,32.4,6.0,Male,No,Sun,Dinner,4
56,38.01,3.0,Male,Yes,Sat,Dinner,4
59,48.27,6.73,Male,No,Sat,Dinner,4
83,32.68,5.0,Male,Yes,Thur,Lunch,2
95,40.17,4.73,Male,Yes,Fri,Dinner,4
112,38.07,4.0,Male,No,Sun,Dinner,3
141,34.3,6.7,Male,No,Thur,Lunch,6


In [22]:
df[(df['total_bill'] > 30) | (df['sex'] == "Male")]

# aşağıda dikkat edersen male olanlar 30dan daha aşağıda bill e sahip olabiliyor.
# Female olanlar ise ancak 30 yukarısı bill e sahip olanlar olabiliyor.

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
5,25.29,4.71,Male,No,Sun,Dinner,4
6,8.77,2.00,Male,No,Sun,Dinner,2
...,...,...,...,...,...,...,...
237,32.83,1.17,Male,Yes,Sat,Dinner,2
238,35.83,4.67,Female,No,Sat,Dinner,3
239,29.03,5.92,Male,No,Sat,Dinner,3
241,22.67,2.00,Male,Yes,Sat,Dinner,2


In [25]:
df[(df['day'] == 'Sun') | (df['day'] == "Sat")]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
238,35.83,4.67,Female,No,Sat,Dinner,3
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2


In [26]:
df[(df['day'] == 'Sun') | (df['day'] == "Sat") | (df['day'] == "Fri")]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
238,35.83,4.67,Female,No,Sat,Dinner,3
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2


In [80]:
options = ['Sat', 'Sun']

In [81]:
'Sat' in options

True

In [30]:
row_value in options

NameError: name 'row_value' is not defined

In [82]:
# .isin() metodu ile filtreleme

df['day'].isin(options)

# day is in options? yani options'a atanan değerler day'lerin içinde mi?
# True veya False üretecek

Payment ID
Sun4608     True
Sun4458     True
Sun5260     True
Sun2251     True
Sun9679     True
           ...  
Sat1766     True
Sat3880     True
Sat17       True
Thur672    False
Sun4608     True
Name: day, Length: 244, dtype: bool

In [83]:
df[df['day'].isin(options)]

# df['day'].isin(options)'in boolean olarak True olan değerlerini 
# (yani içinde Sat ve Sun olan değerlerini) dataframe'i indeksleyerek getirdik.  

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230
Sun4458,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950
