In [None]:
import pandas as pd

# 1. 读取数据
df = pd.read_csv('team_attributes.csv')
# 2. 在 id 前加上 'a'
df['id'] = 'a' + df['id'].astype(str)
# 3. id 设置为索引（方便后续标签索引）
df.set_index('id', inplace=True)

print("=== 原始数据（加上a后） ===")
print(df.head())


=== 原始数据（加上a后） ===
    team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                        
a0         9930  2010/2/22 0:00                60              Balanced   
a1         9930  2014/9/19 0:00                52              Balanced   
a2         9930  2015/9/10 0:00                47              Balanced   
a3         8485  2010/2/22 0:00                70                  Fast   
a4         8485  2011/2/22 0:00                47              Balanced   

    buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                       
a0                   NaN                    Little                  50   
a1                  48.0                    Normal                  56   
a2                  41.0                    Normal                  54   
a3                   NaN                    Little                  70   
a4         

In [None]:
#1分别利用标签索引和位置索引，获取 id=a1457 的数据
print("利用标签索引获取 id=a1457：")
print(df.loc['a1457'])


利用标签索引获取 id=a1457：
team_api_id                                10000
date                              2015/9/10 0:00
buildUpPlaySpeed                              54
buildUpPlaySpeedClass                   Balanced
buildUpPlayDribbling                        42.0
buildUpPlayDribblingClass                 Normal
buildUpPlayPassing                            51
buildUpPlayPassingClass                    Mixed
buildUpPlayPositioningClass            Organised
chanceCreationPassing                         47
chanceCreationPassingClass                Normal
chanceCreationCrossing                        52
chanceCreationCrossingClass               Normal
chanceCreationShooting                        32
chanceCreationShootingClass               Little
chanceCreationPositioningClass         Organised
defencePressure                               44
defencePressureClass                      Medium
defenceAggression                             58
defenceAggressionClass                     Press
d

In [6]:
print("利用位置索引获取 id=a1457（假设它在位置 i=？）：")
print(df.iloc[[df.index.get_loc('a1457')]])  # 先找位置，再取 iloc

利用位置索引获取 id=a1457（假设它在位置 i=？）：
       team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                           
a1457        10000  2015/9/10 0:00                54              Balanced   

       buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                          
a1457                  42.0                    Normal                  51   

      buildUpPlayPassingClass buildUpPlayPositioningClass  \
id                                                          
a1457                   Mixed                   Organised   

       chanceCreationPassing  ... chanceCreationShooting  \
id                            ...                          
a1457                     47  ...                     32   

       chanceCreationShootingClass chanceCreationPositioningClass  \
id                                                                  

In [7]:
# 2获取 id 为 a0、a2、a4 的数据
print("获取 a0、a2、a4：")
print(df.loc[['a0', 'a2', 'a4']])

获取 a0、a2、a4：
    team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                        
a0         9930  2010/2/22 0:00                60              Balanced   
a2         9930  2015/9/10 0:00                47              Balanced   
a4         8485  2011/2/22 0:00                47              Balanced   

    buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                       
a0                   NaN                    Little                  50   
a2                  41.0                    Normal                  54   
a4                   NaN                    Little                  52   

   buildUpPlayPassingClass buildUpPlayPositioningClass  chanceCreationPassing  \
id                                                                              
a0                   Mixed                   Organised                     60 

In [8]:
# 3获取位置为双数的行数据
print("位置为偶数的行：")
print(df.iloc[::2])

位置为偶数的行：
       team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                           
a0            9930  2010/2/22 0:00                60              Balanced   
a2            9930  2015/9/10 0:00                47              Balanced   
a4            8485  2011/2/22 0:00                47              Balanced   
a6            8485  2013/9/20 0:00                62              Balanced   
a8            8485  2015/9/10 0:00                59              Balanced   
...            ...             ...               ...                   ...   
a1448         8394  2014/9/19 0:00                38              Balanced   
a1450         8027  2014/9/19 0:00                54              Balanced   
a1452        10000  2010/2/22 0:00                65              Balanced   
a1454        10000  2012/2/22 0:00                54              Balanced   
a1456        10000  2014/9/19 0:00                54   

In [9]:
# 4获取 buildUpPlaySpeed 分数低于 30 的数据
print("buildUpPlaySpeed < 30：")
print(df[df['buildUpPlaySpeed'] < 30])

buildUpPlaySpeed < 30：
       team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                           
a61         108893  2012/2/22 0:00                23                  Slow   
a72           9825  2012/2/22 0:00                25                  Slow   
a120          8634  2012/2/22 0:00                24                  Slow   
a504          8305  2012/2/22 0:00                25                  Slow   
a505          8305  2013/9/20 0:00                29                  Slow   
a628          9885  2014/9/19 0:00                26                  Slow   
a1035         8370  2015/9/10 0:00                29                  Slow   
a1056         8560  2012/2/22 0:00                24                  Slow   
a1057         8560  2013/9/20 0:00                28                  Slow   
a1256        10003  2012/2/22 0:00                26                  Slow   
a1294         8611  2012/2/22 0:00       

In [10]:
# 5利用索引和切片获取前5行数据（位置为0、2、4）
print("前5行中位置为 0、2、4 的数据：")
print(df.iloc[[0, 2, 4]])

前5行中位置为 0、2、4 的数据：
    team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                        
a0         9930  2010/2/22 0:00                60              Balanced   
a2         9930  2015/9/10 0:00                47              Balanced   
a4         8485  2011/2/22 0:00                47              Balanced   

    buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                       
a0                   NaN                    Little                  50   
a2                  41.0                    Normal                  54   
a4                   NaN                    Little                  52   

   buildUpPlayPassingClass buildUpPlayPositioningClass  chanceCreationPassing  \
id                                                                              
a0                   Mixed                   Organised                  

In [11]:
# 6删除索引为 a0 的数据
df1 = df.drop(index='a0')
print(" 删除 a0 后：")
print(df1.head())

 删除 a0 后：
    team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                        
a1         9930  2014/9/19 0:00                52              Balanced   
a2         9930  2015/9/10 0:00                47              Balanced   
a3         8485  2010/2/22 0:00                70                  Fast   
a4         8485  2011/2/22 0:00                47              Balanced   
a5         8485  2012/2/22 0:00                58              Balanced   

    buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                       
a1                  48.0                    Normal                  56   
a2                  41.0                    Normal                  54   
a3                   NaN                    Little                  70   
a4                   NaN                    Little                  52   
a5                  

In [12]:

# 7删除索引为 a0、a1 的数据
df2 = df.drop(index=['a0', 'a1'])
print("\n⑦ 删除 a0、a1 后：")
print(df2.head())



⑦ 删除 a0、a1 后：
    team_api_id            date  buildUpPlaySpeed buildUpPlaySpeedClass  \
id                                                                        
a2         9930  2015/9/10 0:00                47              Balanced   
a3         8485  2010/2/22 0:00                70                  Fast   
a4         8485  2011/2/22 0:00                47              Balanced   
a5         8485  2012/2/22 0:00                58              Balanced   
a6         8485  2013/9/20 0:00                62              Balanced   

    buildUpPlayDribbling buildUpPlayDribblingClass  buildUpPlayPassing  \
id                                                                       
a2                  41.0                    Normal                  54   
a3                   NaN                    Little                  70   
a4                   NaN                    Little                  52   
a5                   NaN                    Little                  62   
a6             

In [13]:
#8
df.loc['a3', 'buildUpPlaySpeed']          # 先行后列
df['buildUpPlaySpeed'].loc['a3']          # 先列后行


70

70

In [14]:
df.loc[['a3','a5'], ['buildUpPlaySpeed','buildUpPlaySpeedClass']]
# 或
df[['buildUpPlaySpeed','buildUpPlaySpeedClass']].loc[['a3','a5']]


Unnamed: 0_level_0,buildUpPlaySpeed,buildUpPlaySpeedClass
id,Unnamed: 1_level_1,Unnamed: 2_level_1
a3,70,Fast
a5,58,Balanced


Unnamed: 0_level_0,buildUpPlaySpeed,buildUpPlaySpeedClass
id,Unnamed: 1_level_1,Unnamed: 2_level_1
a3,70,Fast
a5,58,Balanced


In [15]:
df.loc['a3':'a5', ['buildUpPlaySpeed','buildUpPlaySpeedClass']]


Unnamed: 0_level_0,buildUpPlaySpeed,buildUpPlaySpeedClass
id,Unnamed: 1_level_1,Unnamed: 2_level_1
a3,70,Fast
a4,47,Balanced
a5,58,Balanced


In [17]:
df['buildUpPlayDribbling'].fillna(0, inplace=True)
df.head()


Unnamed: 0_level_0,team_api_id,date,buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribbling,buildUpPlayDribblingClass,buildUpPlayPassing,buildUpPlayPassingClass,buildUpPlayPositioningClass,chanceCreationPassing,...,chanceCreationShooting,chanceCreationShootingClass,chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a0,9930,2010/2/22 0:00,60,Balanced,0.0,Little,50,Mixed,Organised,60,...,55,Normal,Organised,50,Medium,55,Press,45,Normal,Cover
a1,9930,2014/9/19 0:00,52,Balanced,48.0,Normal,56,Mixed,Organised,54,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
a2,9930,2015/9/10 0:00,47,Balanced,41.0,Normal,54,Mixed,Organised,54,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
a3,8485,2010/2/22 0:00,70,Fast,0.0,Little,70,Long,Organised,70,...,70,Lots,Organised,60,Medium,70,Double,70,Wide,Cover
a4,8485,2011/2/22 0:00,47,Balanced,0.0,Little,52,Mixed,Organised,53,...,52,Normal,Organised,47,Medium,47,Press,52,Normal,Cover


In [20]:
df.loc[df['buildUpPlaySpeed'] < 30, 'buildUpPlaySpeed'] = 30
df

Unnamed: 0_level_0,team_api_id,date,buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribbling,buildUpPlayDribblingClass,buildUpPlayPassing,buildUpPlayPassingClass,buildUpPlayPositioningClass,chanceCreationPassing,...,chanceCreationShooting,chanceCreationShootingClass,chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
a0,9930,2010/2/22 0:00,60,Balanced,0.0,Little,50,Mixed,Organised,60,...,55,Normal,Organised,50,Medium,55,Press,45,Normal,Cover
a1,9930,2014/9/19 0:00,52,Balanced,48.0,Normal,56,Mixed,Organised,54,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
a2,9930,2015/9/10 0:00,47,Balanced,41.0,Normal,54,Mixed,Organised,54,...,64,Normal,Organised,47,Medium,44,Press,54,Normal,Cover
a3,8485,2010/2/22 0:00,70,Fast,0.0,Little,70,Long,Organised,70,...,70,Lots,Organised,60,Medium,70,Double,70,Wide,Cover
a4,8485,2011/2/22 0:00,47,Balanced,0.0,Little,52,Mixed,Organised,53,...,52,Normal,Organised,47,Medium,47,Press,52,Normal,Cover
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
a1453,10000,2011/2/22 0:00,52,Balanced,0.0,Little,52,Mixed,Organised,52,...,53,Normal,Organised,46,Medium,48,Press,53,Normal,Cover
a1454,10000,2012/2/22 0:00,54,Balanced,0.0,Little,51,Mixed,Organised,47,...,50,Normal,Organised,44,Medium,55,Press,53,Normal,Cover
a1455,10000,2013/9/20 0:00,54,Balanced,0.0,Little,51,Mixed,Organised,47,...,32,Little,Organised,44,Medium,58,Press,37,Normal,Cover
a1456,10000,2014/9/19 0:00,54,Balanced,42.0,Normal,51,Mixed,Organised,47,...,32,Little,Organised,44,Medium,58,Press,37,Normal,Cover
