In [1]:
import pandas as pd
import numpy as np

## 6.1 選取一筆或多筆Series 資料

In [2]:
# 讀取檔案 並以特定欄位為索引
college = pd.read_csv('../../data/college.csv', index_col='INSTNM')
city = college['CITY']
print(city)

INSTNM
Alabama A & M University                                            Normal
University of Alabama at Birmingham                             Birmingham
Amridge University                                              Montgomery
University of Alabama in Huntsville                             Huntsville
Alabama State University                                        Montgomery
                                                                ...       
SAE Institute of Technology  San Francisco                      Emeryville
Rasmussen College - Overland Park                            Overland Park
National Personal Training Institute of Cleveland         Highland Heights
Bay Area Medical Academy - San Jose Satellite Location            San Jose
Excel Learning Center-San Antonio South                        San Antonio
Name: CITY, Length: 7535, dtype: object


In [4]:
# 使用[]
print(city['Alabama A & M University'])

# 使用.loc
print(city.loc['Alabama A & M University'])

# 使用.iloc
print(city.iloc[0])

# 使用[] 放入list放入list
print(city[['Alabama A & M University', 'Alabama State University']])

# 使用.loc 找多筆
print(city.loc[['Alabama A & M University', 'Alabama State University']])

# 使用.iloc 找多筆
print(city.iloc[[0, 4]])

Normal
Normal
Normal
INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object


In [5]:
# 使用[] 切片
print(city['Alabama A & M University': 'Alabama State University'])

# 使用[] 的位置切片
print(city[0:5])

# 使用.loc 切片
print(city['Alabama A & M University': 'Alabama State University'])

# 使用.iloc 切片
print(city.iloc[0:5])

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montg

In [6]:
# 做出一個地點的遮罩
alabama_mask = city.isin(['Birmingham', 'Montgomery'])
print(alabama_mask)

INSTNM
Alabama A & M University                                  False
University of Alabama at Birmingham                        True
Amridge University                                         True
University of Alabama in Huntsville                       False
Alabama State University                                   True
                                                          ...  
SAE Institute of Technology  San Francisco                False
Rasmussen College - Overland Park                         False
National Personal Training Institute of Cleveland         False
Bay Area Medical Academy - San Jose Satellite Location    False
Excel Learning Center-San Antonio South                   False
Name: CITY, Length: 7535, dtype: bool


In [7]:
# 使用遮罩 就只會回傳True 的資料
print(city[alabama_mask])

INSTNM
University of Alabama at Birmingham                 Birmingham
Amridge University                                  Montgomery
Alabama State University                            Montgomery
Auburn University at Montgomery                     Montgomery
Birmingham Southern College                         Birmingham
South University-Montgomery                         Montgomery
Faulkner University                                 Montgomery
Herzing University-Birmingham                       Birmingham
Huntingdon College                                  Montgomery
Jefferson State Community College                   Birmingham
Lawson State Community College-Birmingham Campus    Birmingham
Samford University                                  Birmingham
Southeastern Bible College                          Birmingham
H Councill Trenholm State Community College         Montgomery
West Virginia University Institute of Technology    Montgomery
Virginia College-Birmingham                     

In [10]:
# 指定列跟欄
print(college.loc['Alabama A & M University', 'CITY'])

print(college.iloc[0, 0])

# 指定多列與欄
print(college.loc[['Alabama A & M University', 'Alabama State University'], 'CITY'])

print(college.iloc[[0,4], 0])

Normal
Normal
INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University        Normal
Alabama State University    Montgomery
Name: CITY, dtype: object


In [11]:
# 切片指定多列與欄
print(college.loc['Alabama A & M University': 'Alabama State University', 'CITY'])

print(college.iloc[0:4, 0])

INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Alabama State University               Montgomery
Name: CITY, dtype: object
INSTNM
Alabama A & M University                   Normal
University of Alabama at Birmingham    Birmingham
Amridge University                     Montgomery
University of Alabama in Huntsville    Huntsville
Name: CITY, dtype: object


## 6.2 選取DataFrame 的列

In [5]:
# 讀取檔案 並以特定欄位為索引
college = pd.read_csv('../../data/college.csv', index_col='INSTNM')

print(college.head())

                                           CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                  
Alabama A & M University                 Normal     AL   1.0      0.0   
University of Alabama at Birmingham  Birmingham     AL   0.0      0.0   
Amridge University                   Montgomery     AL   0.0      0.0   
University of Alabama in Huntsville  Huntsville     AL   0.0      0.0   
Alabama State University             Montgomery     AL   1.0      0.0   

                                     WOMENONLY  RELAFFIL  SATVRMID  SATMTMID  \
INSTNM                                                                         
Alabama A & M University                   0.0         0     424.0     420.0   
University of Alabama at Birmingham        0.0         0     570.0     565.0   
Amridge University                         0.0         1       NaN       NaN   
University of Alabama in Huntsville        0.0         0     595.0     590.0   
Alabama 

In [6]:
# 指定特定列
print(college.iloc[0])

CITY                  Normal
STABBR                    AL
HBCU                     1.0
MENONLY                  0.0
WOMENONLY                0.0
RELAFFIL                   0
SATVRMID               424.0
SATMTMID               420.0
DISTANCEONLY             0.0
UGDS                  4206.0
UGDS_WHITE            0.0333
UGDS_BLACK            0.9353
UGDS_HISP             0.0055
UGDS_ASIAN            0.0019
UGDS_AIAN             0.0024
UGDS_NHPI             0.0019
UGDS_2MOR                0.0
UGDS_NRA              0.0059
UGDS_UNKN             0.0138
PPTUG_EF              0.0656
CURROPER                   1
PCTPELL               0.7356
PCTFLOAN              0.8284
UG25ABV               0.1049
MD_EARN_WNE_P10        30300
GRAD_DEBT_MDN_SUPP     33888
Name: Alabama A & M University, dtype: object


In [7]:
# 指定特定列
print(college.loc['Alabama A & M University'])

CITY                  Normal
STABBR                    AL
HBCU                     1.0
MENONLY                  0.0
WOMENONLY                0.0
RELAFFIL                   0
SATVRMID               424.0
SATMTMID               420.0
DISTANCEONLY             0.0
UGDS                  4206.0
UGDS_WHITE            0.0333
UGDS_BLACK            0.9353
UGDS_HISP             0.0055
UGDS_ASIAN            0.0019
UGDS_AIAN             0.0024
UGDS_NHPI             0.0019
UGDS_2MOR                0.0
UGDS_NRA              0.0059
UGDS_UNKN             0.0138
PPTUG_EF              0.0656
CURROPER                   1
PCTPELL               0.7356
PCTFLOAN              0.8284
UG25ABV               0.1049
MD_EARN_WNE_P10        30300
GRAD_DEBT_MDN_SUPP     33888
Name: Alabama A & M University, dtype: object


In [8]:
# 選取多個列
print(college.iloc[[60, 99, 3]])

                                            CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                   
University of Alaska Anchorage         Anchorage     AK   0.0      0.0   
International Academy of Hair Design       Tempe     AZ   0.0      0.0   
University of Alabama in Huntsville   Huntsville     AL   0.0      0.0   

                                      WOMENONLY  RELAFFIL  SATVRMID  SATMTMID  \
INSTNM                                                                          
University of Alaska Anchorage              0.0         0       NaN       NaN   
International Academy of Hair Design        0.0         0       NaN       NaN   
University of Alabama in Huntsville         0.0         0     595.0     590.0   

                                      DISTANCEONLY     UGDS  ...  UGDS_2MOR  \
INSTNM                                                       ...              
University of Alaska Anchorage                 0.0  12865.0  ... 

In [10]:
# 選取多個列
labels = ['University of Alaska Anchorage',
          'International Academy of Hair Design',
          'University of Alabama in Huntsville']
print(college.loc[labels])

                                            CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                   
University of Alaska Anchorage         Anchorage     AK   0.0      0.0   
International Academy of Hair Design       Tempe     AZ   0.0      0.0   
University of Alabama in Huntsville   Huntsville     AL   0.0      0.0   

                                      WOMENONLY  RELAFFIL  SATVRMID  SATMTMID  \
INSTNM                                                                          
University of Alaska Anchorage              0.0         0       NaN       NaN   
International Academy of Hair Design        0.0         0       NaN       NaN   
University of Alabama in Huntsville         0.0         0     595.0     590.0   

                                      DISTANCEONLY     UGDS  ...  UGDS_2MOR  \
INSTNM                                                       ...              
University of Alaska Anchorage                 0.0  12865.0  ... 

In [11]:
# 切片法
print(college.iloc[99:102])

                                         CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                
International Academy of Hair Design    Tempe     AZ   0.0      0.0   
GateWay Community College             Phoenix     AZ   0.0      0.0   
Mesa Community College                   Mesa     AZ   0.0      0.0   

                                      WOMENONLY  RELAFFIL  SATVRMID  SATMTMID  \
INSTNM                                                                          
International Academy of Hair Design        0.0         0       NaN       NaN   
GateWay Community College                   0.0         0       NaN       NaN   
Mesa Community College                      0.0         0       NaN       NaN   

                                      DISTANCEONLY     UGDS  ...  UGDS_2MOR  \
INSTNM                                                       ...              
International Academy of Hair Design           0.0    188.0  ...     0.0160   
G

In [12]:
# loc 切片法 包含尾巴
start = 'International Academy of Hair Design'
stop = 'Mesa Community College'
print(college.loc[start:stop])

                                         CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                
International Academy of Hair Design    Tempe     AZ   0.0      0.0   
GateWay Community College             Phoenix     AZ   0.0      0.0   
Mesa Community College                   Mesa     AZ   0.0      0.0   

                                      WOMENONLY  RELAFFIL  SATVRMID  SATMTMID  \
INSTNM                                                                          
International Academy of Hair Design        0.0         0       NaN       NaN   
GateWay Community College                   0.0         0       NaN       NaN   
Mesa Community College                      0.0         0       NaN       NaN   

                                      DISTANCEONLY     UGDS  ...  UGDS_2MOR  \
INSTNM                                                       ...              
International Academy of Hair Design           0.0    188.0  ...     0.0160   
G

## 6.3 同時選取DataFrame 的列與欄位

In [None]:
# 讀取檔案 並以特定欄位為索引
college = pd.read_csv('../../data/college.csv', index_col='INSTNM')

In [13]:
# 切片前3列 前4欄
print(college.iloc[:3, :4])

                                           CITY STABBR  HBCU  MENONLY
INSTNM                                                               
Alabama A & M University                 Normal     AL   1.0      0.0
University of Alabama at Birmingham  Birmingham     AL   0.0      0.0
Amridge University                   Montgomery     AL   0.0      0.0


In [14]:
# 切片前3列 前4欄
print(college.loc[:'Amridge University', :'MENONLY'])

                                           CITY STABBR  HBCU  MENONLY
INSTNM                                                               
Alabama A & M University                 Normal     AL   1.0      0.0
University of Alabama at Birmingham  Birmingham     AL   0.0      0.0
Amridge University                   Montgomery     AL   0.0      0.0


In [15]:
# 選取所有列 4和6的欄位
print(college.iloc[:, [4, 6]].head())

                                     WOMENONLY  SATVRMID
INSTNM                                                  
Alabama A & M University                   0.0     424.0
University of Alabama at Birmingham        0.0     570.0
Amridge University                         0.0       NaN
University of Alabama in Huntsville        0.0     595.0
Alabama State University                   0.0     425.0


In [17]:
# 選取所有列 4和6的欄位
print(college.loc[:, ['WOMENONLY', 'SATVRMID']].head())

                                     WOMENONLY  SATVRMID
INSTNM                                                  
Alabama A & M University                   0.0     424.0
University of Alabama at Birmingham        0.0     570.0
Amridge University                         0.0       NaN
University of Alabama in Huntsville        0.0     595.0
Alabama State University                   0.0     425.0


In [18]:
# 顯示特定儲存格的值
print(college.iloc[5, -4])

0.401


In [19]:
# 顯示特定儲存格的值
print(college.loc['The University of Alabama', 'PCTFLOAN'])

0.401


In [21]:
# 切片操作單一欄位
print(college.iloc[10:20:2, 5])

INSTNM
Birmingham Southern College             1
Concordia College Alabama               1
Enterprise State Community College      0
Faulkner University                     1
New Beginning College of Cosmetology    0
Name: RELAFFIL, dtype: int64


In [23]:
# 切片操作單一欄位
start = 'Birmingham Southern College'
stop = 'New Beginning College of Cosmetology'
print(college.loc[start:stop:2, 'RELAFFIL'])

INSTNM
Birmingham Southern College             1
Concordia College Alabama               1
Enterprise State Community College      0
Faulkner University                     1
New Beginning College of Cosmetology    0
Name: RELAFFIL, dtype: int64


## 6.4 混用位置與標籤來選取資料

In [2]:
# 讀取檔案 並以特定欄位為索引
college = pd.read_csv('../../data/college.csv', index_col='INSTNM')

In [3]:
# 找到標籤位置
col_start = college.columns.get_loc('UGDS_WHITE')

col_end = college.columns.get_loc('UGDS_UNKN') + 1

print(col_start, col_end)

# 再用iloc
print(college.iloc[:5, col_start:col_end])

10 19
                                     UGDS_WHITE  UGDS_BLACK  UGDS_HISP  \
INSTNM                                                                   
Alabama A & M University                 0.0333      0.9353     0.0055   
University of Alabama at Birmingham      0.5922      0.2600     0.0283   
Amridge University                       0.2990      0.4192     0.0069   
University of Alabama in Huntsville      0.6988      0.1255     0.0382   
Alabama State University                 0.0158      0.9208     0.0121   

                                     UGDS_ASIAN  UGDS_AIAN  UGDS_NHPI  \
INSTNM                                                                  
Alabama A & M University                 0.0019     0.0024     0.0019   
University of Alabama at Birmingham      0.0518     0.0022     0.0007   
Amridge University                       0.0034     0.0000     0.0000   
University of Alabama in Huntsville      0.0376     0.0143     0.0002   
Alabama State University             

In [4]:
# 反過來用數字找標籤
row_start = college.index[10]

row_end = college.index[15]

print(row_start, row_end)

# 再用loc
print(college.loc[row_start:row_end, 'UGDS_WHITE':'UGDS_UNKN'])

Birmingham Southern College James H Faulkner State Community College
                                          UGDS_WHITE  UGDS_BLACK  UGDS_HISP  \
INSTNM                                                                        
Birmingham Southern College                   0.7983      0.1102     0.0195   
Chattahoochee Valley Community College        0.4661      0.4372     0.0492   
Concordia College Alabama                     0.0280      0.8758     0.0373   
South University-Montgomery                   0.3046      0.6054     0.0153   
Enterprise State Community College            0.6408      0.2435     0.0509   
James H Faulkner State Community College      0.6979      0.2259     0.0320   

                                          UGDS_ASIAN  UGDS_AIAN  UGDS_NHPI  \
INSTNM                                                                       
Birmingham Southern College                   0.0517     0.0102     0.0000   
Chattahoochee Valley Community College        0.0127     0.0023 

## 6.5 按標籤的字母順序進行切片

In [5]:
# 讀取檔案 並以特定欄位為索引
college = pd.read_csv('../../data/college.csv', index_col='INSTNM')

In [8]:
# 排序index
college = college.sort_index()

# 排序後就能以字母順序切片
print(college.loc['Sp':'Su'])

                                                  CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                         
Spa Tech Institute-Ipswich                     Ipswich     MA   0.0      0.0   
Spa Tech Institute-Plymouth                   Plymouth     MA   0.0      0.0   
Spa Tech Institute-Westboro                   Westboro     MA   0.0      0.0   
Spa Tech Institute-Westbrook                 Westbrook     ME   0.0      0.0   
Spalding University                         Louisville     KY   0.0      0.0   
...                                                ...    ...   ...      ...   
Studio Academy of Beauty                      Chandler     AZ   0.0      0.0   
Studio Jewelers                               New York     NY   0.0      0.0   
Stylemaster College of Hair Design            Longview     WA   0.0      0.0   
Styles and Profiles Beauty College              Selmer     TN   0.0      0.0   
Styletrends Barber and Hairstyling Acade

In [9]:
# 反向排序也支援
college = college.sort_index(ascending=False)

print(college.loc['E': 'B'])

                                                  CITY STABBR  HBCU  MENONLY  \
INSTNM                                                                         
Dyersburg State Community College            Dyersburg     TN   0.0      0.0   
Dutchess Community College                Poughkeepsie     NY   0.0      0.0   
Dutchess BOCES-Practical Nursing Program  Poughkeepsie     NY   0.0      0.0   
Durham Technical Community College              Durham     NC   0.0      0.0   
Durham Beauty Academy                           Durham     NC   0.0      0.0   
...                                                ...    ...   ...      ...   
Bacone College                                Muskogee     OK   0.0      0.0   
Babson College                               Wellesley     MA   0.0      0.0   
BJ's Beauty & Barber College                    Auburn     WA   0.0      0.0   
BIR Training Center                            Chicago     IL   0.0      0.0   
B M Spurr School of Practical Nursing   