# How do I select multiple rows and columns from a pandas DataFrame?
https://www.youtube.com/watch?v=xvpNA7bC8cs

In [1]:
import pandas as pd

In [3]:
ufo = pd.read_csv("data/ufo.csv")

In [5]:
#datamız neye benziyor bakalım
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


In [9]:
# loc etiketlere göre (bunlar column namestir.) operasyonlarda işe yarar. 
# burada 1. row (row 0) u getir dedik.
ufo.loc[0]

City                       Ithaca
Colors Reported               NaN
Shape Reported           TRIANGLE
State                          NY
Time               6/1/1930 22:00
Name: 0, dtype: object

In [13]:
# numpy array gibi çalışıyor. virgülden öncesi satırlar, virgülden sonrası sütunlar
# liste olarak ta yazabiliriz [0,1,2] gibi.
# istersek virgülden sonrasına gerek yok, bütün columnları getirir ama explicite olarak yazmak önerilir. adap buymuş.
# numpy dan farklı olarak .loc inclusive çalışıyor. 
ufo.loc[0:3,:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00


In [15]:
# eğer columnlarla uşraşıyorsak ve 2 column getirmek istersek
ufo.loc[:,['City','State']].head()

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY


In [19]:
# 2 column arasındaki tüm columnları getirmek istiyorsak (list arraya dönüşür gibi oldu.)
ufo.loc[:,'City':'State'].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY


In [34]:
# satırları da seçebiliriz columnları seçerken
%time
ufo.loc[0:3,'City':'State'].head()

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 16 µs


Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS


In [33]:
# aynısını başka türlü
%time
ufo.head(4).drop('Time',axis=1)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 13.8 µs


Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS


In [37]:
# buradan 
ufo[ufo.City == 'Oakland']

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
1694,Oakland,,CIGAR,CA,7/21/1968 14:00
2144,Oakland,,DISK,CA,8/19/1971 0:00
4686,Oakland,,LIGHT,MD,6/1/1982 0:00
7293,Oakland,,LIGHT,CA,3/28/1994 17:00
8488,Oakland,,,CA,8/10/1995 21:45
8768,Oakland,,,CA,10/10/1995 22:40
10816,Oakland,,LIGHT,OR,10/1/1997 21:30
10948,Oakland,,DISK,CA,11/14/1997 19:55
11045,Oakland,,TRIANGLE,CA,12/10/1997 1:30
12322,Oakland,,FIREBALL,CA,10/9/1998 19:40


In [36]:
ufo.loc[ufo.City=='Oakland', 'State']

1694     CA
2144     CA
4686     MD
7293     CA
8488     CA
8768     CA
10816    OR
10948    CA
11045    CA
12322    CA
12941    CA
16803    MD
17322    CA
Name: State, dtype: object

In [43]:
ufo.columns

Index([u'City', u'Colors Reported', u'Shape Reported', u'State', u'Time'], dtype='object')

In [44]:
# diğer arkadaşı iloc tur. iloc integer positiona bakar.
# loc tan farkı exclusive dir.  yukarıdakine göre 0 dan başlayacak şekilde,
ufo.iloc[:,0:4].head()

Unnamed: 0,City,Colors Reported,Shape Reported,State
0,Ithaca,,TRIANGLE,NY
1,Willingboro,,OTHER,NJ
2,Holyoke,,OVAL,CO
3,Abilene,,DISK,KS
4,New York Worlds Fair,,LIGHT,NY


In [49]:
# sadece 2 column seçmek istersek
ufo[['City','State']].head()

Unnamed: 0,City,State
0,Ithaca,NY
1,Willingboro,NJ
2,Holyoke,CO
3,Abilene,KS
4,New York Worlds Fair,NY


In [50]:
# ilk 2 satırı getireceğim
ufo.iloc[0:2,:]

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00


In [53]:
# ix konusu (python 3 te deprecate olmuştur.)
# bunun için drinks df le çalışalım
# ix hem label hem de integer label ile çalışabilirsin demek
drinks = pd.read_csv("data/drinks.csv",index_col='country')

In [54]:
drinks.head()

Unnamed: 0_level_0,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Afghanistan,0,0,0,0.0,Asia
Albania,89,132,54,4.9,Europe
Algeria,25,0,14,0.7,Africa
Andorra,245,138,312,12.4,Europe
Angola,217,57,45,5.9,Africa


In [55]:
drinks.ix['Albania',0] 

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  """Entry point for launching an IPython kernel.


89