In [3]:
import pandas as pd
import numpy as np

In [4]:
df=pd.read_csv('soccer.csv')

In [5]:
df.head(2)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0


In [6]:
df.shape

(465, 17)

In [7]:
df.dtypes.value_counts()

int64      10
object      5
float64     2
dtype: int64

In [8]:
df.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Columns: 17 entries, name to new_signing
dtypes: float64(2), int64(10), object(5)
memory usage: 190.7 KB


### Indexing with Boolean Masks

In [9]:
df.loc[df['market_value']>40].head(2)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0


### More Approaches to Boolean Masking

In [10]:
df['position'].unique()

array(['LW', 'AM', 'GK', 'RW', 'CB', 'RB', 'CF', 'LB', 'DM', 'RM', 'CM',
       nan, 'SS', 'LM'], dtype=object)

In [11]:
df['position'].unique().size

14

In [16]:
df[df['market_value'].between(15,20, inclusive='both')].head(2)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
14,Jack Wilshere,Arsenal,25,CM,2,18.0,1759,5.5,0.00%,61,1,England,0,3,1,1,0


Code above same as using greater than or less than equals etc.

In [12]:
df[df['position'].isin(['RB', 'LB', 'CB'])].head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0


### Binary Operators with Booleans

In [11]:
True | False

True

In [12]:
True ^ False

True

In [13]:
True ^ True

False

In [14]:
~False

-1

In [15]:
~True

-2

In [16]:
~1

-2

In [17]:
~0

-1

### Combining Conditions

In [19]:
df[(df['position']=='LB') &
    (df['age']<=25) &
    (df['market_value']>=10)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
389,Ben Davies,Tottenham,24,LB,3,12.0,396,5.5,1.80%,90,2,Wales,0,2,17,1,0


In [22]:
df[(df['position']=='LB') &
    (df['age']<=25) &
    (df['market_value']>=10) &
    ~(df['club'].isin(['Tottenham', 'Arsenal']))]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0


### Conditions as Variables

Sometimes it is hard to read all the conditions if more than one. Solution is to assign the condition into a variable

In [23]:
arsenal_LB=(df['club']=='Arsenal') & (df['position']=='LB')

In [25]:
chelsea_GK=(df['club']=='Chelsea') & (df['position']=='GK')

In [27]:
df[arsenal_LB | chelsea_GK]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
18,Kieran Gibbs,Arsenal,27,LB,3,10.0,489,5.0,0.50%,45,1,England,0,3,1,1,0
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
102,Thibaut Courtois,Chelsea,25,GK,4,40.0,1260,5.5,18.50%,141,2,Belgium,0,3,5,1,0
109,Willy Caballero,Chelsea,35,GK,4,1.5,542,5.0,0.20%,64,3,Argentina,0,6,5,1,0


In [28]:
df.loc[arsenal_LB | chelsea_GK]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
18,Kieran Gibbs,Arsenal,27,LB,3,10.0,489,5.0,0.50%,45,1,England,0,3,1,1,0
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
102,Thibaut Courtois,Chelsea,25,GK,4,40.0,1260,5.5,18.50%,141,2,Belgium,0,3,5,1,0
109,Willy Caballero,Chelsea,35,GK,4,1.5,542,5.0,0.20%,64,3,Argentina,0,6,5,1,0


### Reindexing

In [18]:
df.reindex(index=[1,4,6,7], columns=['name', 'club', 'age', 'position'])

Unnamed: 0,name,club,age,position
1,Mesut Ozil,Arsenal,28,AM
4,Laurent Koscielny,Arsenal,31,CB
6,Olivier Giroud,Arsenal,30,CF
7,Nacho Monreal,Arsenal,31,LB


### Excercises

Find the players in the dataset that meet these criterias:
- they're English (nationality)
- their market value is more than twice the average market value in the league
- they either have more than 4,000 views (page_views) or are a new signing (new_signing) but not both

In [19]:
market_mean=df['market_value'].mean()

In [20]:
nationality=df['nationality']=='England'

In [21]:
market_value=df['market_value']>2*market_mean

In [22]:
popular_xor_new=(df['page_views']>4000)^(df['new_signing']==1)

In [23]:
df[nationality & market_value & popular_xor_new]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
256,John Stones,Manchester+City,23,CB,3,35.0,1078,5.5,2.30%,59,1,England,0,2,11,1,1
380,Dele Alli,Tottenham,21,CM,2,45.0,4626,9.5,38.60%,225,1,England,0,1,17,1,0
381,Harry Kane,Tottenham,23,CF,1,60.0,4161,12.5,35.10%,224,1,England,0,2,17,1,0


In [24]:
print(popular_xor_new.sum())

72


### Sorting and Indexing

In [26]:
df.loc[0:2]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0


In [27]:
df.loc[[0,132], ('name', 'position')]

Unnamed: 0,name,position
0,Alexis Sanchez,LW
132,Connor Wickham,CF


In [29]:
df.loc[0:2,['name','position']]

Unnamed: 0,name,position
0,Alexis Sanchez,LW
1,Mesut Ozil,AM
2,Petr Cech,GK


In [30]:
df.set_index('name', inplace=True)

In [31]:
df.sort_index(inplace=True)

In [32]:
df.head(2)

Unnamed: 0_level_0,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0


In [33]:
df.reset_index(inplace=True)

In [35]:
df.sort_index(axis=1).head()

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,name,nationality,new_foreign,new_signing,page_views,position,position_cat,region
0,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,Aaron Cresswell,England,0,0,380,LB,3,1
1,30,4,0,Everton,7,22,0.20%,5.5,5.0,Aaron Lennon,England,0,0,504,RW,1,1
2,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Aaron Mooy,Australia,0,0,588,CM,2,4
3,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Aaron Ramsey,Wales,0,0,1040,CM,2,1
4,24,2,0,Watford,18,38,0.00%,5.0,6.0,Abdoulaye Doucoure,France,0,0,124,CM,2,2


Sorting by their last name

In [36]:
df.sort_values(by='name', key=lambda x:x.str.split(' ').str[1])

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
372,Rolando Aarons,Newcastle+United,21,LW,1,0.75,170,4.5,0.10%,0,1,England,0,1,13,0,0
421,Tammy Abraham,Swansea,19,CF,1,8.00,1227,5.5,3.30%,0,1,England,0,1,16,0,0
66,Charlie Adam,Stoke+City,31,CM,2,3.50,561,5.0,0.10%,59,2,Scotland,0,4,15,0,0
167,Ibrahim Afellay,Stoke+City,31,LM,2,3.00,461,5.0,0.00%,15,2,Netherlands,0,4,15,0,0
54,Benik Afobe,Bournemouth,24,CF,1,10.00,528,6.0,0.70%,88,4,Congo DR,0,2,2,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,Kabasele,Watford,26,CB,3,5.50,177,4.5,0.20%,35,2,Belgium,0,3,18,0,1
239,Kenedy,Chelsea,21,LB,3,7.00,566,5.0,0.10%,3,3,Brazil,0,1,5,1,0
252,Lascelles,Newcastle+United,27,CB,3,5.00,400,4.5,3.60%,0,1,England,0,3,13,0,0
348,Pedro,Chelsea,29,RW,1,28.00,1500,8.0,7.80%,162,2,Spain,0,4,5,1,0


In [38]:
df.sort_values(by='age').head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
53,Ben Woodburn,Liverpool,17,LW,1,1.5,1241,4.5,0.10%,5,1,Wales,0,1,10,1,0
217,Jonathan Leko,West+Brom,18,RW,1,1.5,169,4.5,0.20%,12,1,England,0,1,19,0,0
434,Trent Alexander-Arnold,Liverpool,18,RB,3,1.5,327,4.5,0.30%,15,2,England,0,1,10,1,0
229,Josh Tymon,Stoke+City,18,LB,3,1.0,120,4.5,0.10%,9,1,England,0,1,15,0,0
45,Axel Tuanzebe,Manchester+United,19,CB,3,1.0,279,4.0,1.70%,14,1,England,0,1,12,1,0


In [39]:
df.sort_values(by=['club', 'market_value'], ascending=[True, False]).head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
21,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
306,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
20,Alexandre Lacazette,Arsenal,26,CF,1,40.0,1183,10.5,26.50%,0,2,France,1,3,1,1,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
152,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0


Searching the youngest player

In [41]:
df['age'].idxmin()

53

In [40]:
df.iloc[df['age'].idxmin()]

name            Ben Woodburn
club               Liverpool
age                       17
position                  LW
position_cat               1
market_value             1.5
page_views              1241
fpl_value                4.5
fpl_sel                0.10%
fpl_points                 5
region                     1
nationality            Wales
new_foreign                0
age_cat                    1
club_id                   10
big_club                   1
new_signing                0
Name: 53, dtype: object

Searching the oldest player

In [127]:
df.iloc[df['age'].idxmax()]

name            Julian Speroni
club            Crystal+Palace
age                         38
position                    GK
position_cat                 4
market_value              0.25
page_views                 188
fpl_value                  4.0
fpl_sel                  2.00%
fpl_points                   0
region                       3
nationality          Argentina
new_foreign                  0
age_cat                      6
club_id                      6
big_club                     0
new_signing                  0
Name: 232, dtype: object

In [42]:
df.head(2)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0


Deleting the first row

In [43]:
df.drop(index=0).head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
5,Adam Federici,Bournemouth,32,GK,4,1.0,126,4.0,1.50%,8,4,Australia,0,5,2,0,0
