In [None]:
import pandas as pd
import numpy as np

In [None]:
df_players = pd.read_csv("players_20.csv")

In [None]:
df_players.head()

In [None]:
df_players.set_index('short_name', inplace=True)
df_players.head()


In [None]:
df_players=df_players[['long_name', 'age','dob','height_cm','weight_kg','nationality','club']]


# Selecting with a single value

In [None]:
df_players.loc['L. Messi','height_cm']

In [None]:
df_players.iloc[0,3]

In [None]:
df_players.loc['Cristiano Ronaldo','weight_kg']

In [None]:
df_players.iloc[1,4]

In [None]:
# get all rows inside column
df_players.loc[:,'height_cm']

In [None]:
df_players.loc['L. Messi',:]

# Selecting with a list of values

In [None]:
df_players.loc[['L. Messi','Cristiano Ronaldo']]

In [None]:
df_players.iloc[[0,1]]

In [None]:
df_players.loc[['L. Messi','Cristiano Ronaldo'],'height_cm']

In [None]:
df_players.loc[['L. Messi','Cristiano Ronaldo'],['height_cm','weight_kg']]

In [None]:
df_players.iloc[[0,1],[3,4]]

# Selecting a range of data with a slice

In [None]:
# slice column labels

players = ['L. Messi','Cristiano Ronaldo']
df_players.loc[players, 'age':'club']

In [None]:
df_players.iloc[[0,1], 1:7]

In [None]:
columns = ['age','dob','height_cm','weight_kg','nationality','club']

# get top1 and top10 player name
df_players.loc['L. Messi':'M. Salah',columns]

# Selecting with some condition

In [None]:
columns = ['height_cm','weight_kg','nationality','club']

df_players.loc[(df_players['height_cm']> 185) & (df_players['nationality'] == 'Argentina'), :]

In [None]:
df_players.iloc[list(df_players['height_cm']> 185) , 1:5]

In [None]:
df_players.iloc[list((df_players['height_cm']> 185) & (df_players['nationality'] == 'Argentina')) , 1:5]

# Set value to a cell

In [None]:
# updating L. Messi height
df_players.loc['L. Messi','height_cm'] = 175

In [None]:
df_players.loc[:,'height_cm'] = 190
df_players.head()

# Set value to row

In [None]:
df_players.iloc[-1,:] = np.nan
df_players.iloc[-1,:]

# Set values to multiple cells

In [None]:
df_players.loc[['L. Messi','Cristiano Ronaldo'],['height_cm']] = 177

In [None]:
df_players.loc[df_players['height_cm'] > 180 ,['age','dob']] = 0

In [None]:
df_players

# Drop a row

In [None]:
df_players.drop('L. Messi',axis=0,inplace=True)

df_players.drop(index=['Cristiano Ronaldo'])

In [None]:
df_players.drop(index=['Neymar Jr','Cristiano Ronaldo'],inplace=True)

In [None]:
df_players.head()

# Drop Columns

In [None]:
df_players.drop('age',axis=1,inplace=True)

In [None]:
df_players.drop(columns=['age'],inplace=True)

In [None]:
df_players.drop(df_players.columns[[-1]],axis=1,inplace=True)

In [None]:
df_players.drop(columns=['long_name','dob'],inplace=True)

# sample()

In [None]:
# extract 10 random elements from "nationality" column
df_players['nationality'].sample(10,random_state=99)

In [None]:
# extract 20% sample of the dataframe
df_players.sample(frac=0.2)

In [None]:
# upsample: increase the sampling rate

df_players.sample(frac=2,replace=True)

# query()

In [None]:
df_players.query("age>34 and nationality == 'Italy'")

In [None]:
df_players.query("not(age>34)")

In [None]:
df_players.query("height_cm/100 > 1.8")

In [None]:
df_players.dtypes

In [None]:
# check out data types
df_players['dob'] = pd.to_datetime(df_players['dob'], errors='coerce')

df_players.query("dob.dt.year > 1990")

# apply()

In [None]:
df_players['age'].apply(np.sqrt)

In [None]:
# create your own function and apply it to a dataframe
def calculate_bmi(row):
    return row['weight_kg']/((row['height_cm']/100)**2)

df_players.apply(calculate_bmi,axis=1)

# Lambda function + Apply

In [None]:
# use lambda function to convert "height_cm" series to meters
df_players['height_cm'].apply(lambda x:x/100)

In [None]:
# use lambda function to convert "long_name" series to upper case
df_players['long_name'].apply(lambda x:x.upper())

In [None]:
df_players['dob'].apply(lambda x:x.year)

In [None]:
# apply lambda function to dataframe in order to calculate bmi
df_players.apply(lambda x: x['weight_kg']/((x['height_cm']/100)**2), axis=1)

# copy()

In [None]:
df_copy = df_players.copy()

In [None]:
df_players.loc['L. Messi', 'height_cm'] = 180

In [None]:
df_copy

In [None]:
# deep=False (Any changes to the data of the original will be reflected in the shallow copy and vice versa)
df_shallow_copy = df_players.copy(deep=False)

In [None]:
# update value in original dataframe
df_players.loc['Cristiano Ronaldo', 'height_cm'] = 200

In [None]:
df_shallow_copy