#### Selecting by row and column

In [None]:
# selects the 4th row
df.iloc[3]

In [None]:
df.iloc[5:8]

In [None]:
# selects rows and columns
df.iloc[:, 3:7]

In [None]:
df.iloc[5:10, 3:9]

In [None]:
# select columns based on row and column name
df.loc[:, 'magnesium']

In [None]:
df.loc[7:16, 'magnesium']

#### Selecting and Editing by Criteria

In [None]:
# selects based on conditional - boolean indexing
df.loc[df['alcohol'] < 12]

In [None]:
# color intensity for alcohol less than 12
df.loc[df['alcohol'] < 12, ['color_intensity']]

In [None]:
# sets all color intensity greater than 10 to 10
df.loc[df['color_intensity'] > 10, 'color_intensity'] = 10

In [None]:
# creates new column named 'shade' and fills in values of either light or dark based on color intensity
df.loc[df['color_intensity'] > 7, 'shade'] = 'dark'
df.loc[df['color_intensity'] <= 7, 'shade'] = 'light'

In [None]:
# Print rows 5 through 9 and columns 'Home Team Name' and 'Away Team Name'
df.loc[5:9,['Home Team Name', 'Away Team Name']]

In [None]:
# Print all info for games played in 1950 for Group 3
df.loc[(df["Year"] == 1950) & (df["Stage"] == "Group 3")]

In [None]:
# Print the 'Attendance' column for games played in 1950 for Group 3
df.loc[(df['Year'] == 1950) & (df['Stage'] == 'Group 3'), 'Attendance']

In [None]:
# Number of home games played by the Netherlands
Neth_home = df[df['Home Team Name'] == ('Netherlands')]
print(len(Neth_home))

In [None]:
# Number of games played by the Netherlands in total
Neth_away = df[df['Away Team Name']==('Netherlands')]
print(len(Neth_home)+len(Neth_away))

In [None]:
# Number of games the USA played in the 2014 world cup
USA_home_and_away = df[(df['Year'] == 2014) &
                       ((df['Home Team Name'] == 'USA') |
                        (df['Away Team Name'] == 'USA'))]
print(len(USA_home_and_away))

In [None]:
# Number of countries participated in the 1986 world cup
games_86 = df[df['Year'] == 1986]
home = list(games_86['Home Team Name'].unique())
away = list(games_86['Away Team Name'].unique())
print(len(home))
home += away
print(len(home))
print(len(set(home)))

In [None]:
# Number of matches that had more than 5 goals in total
df['Total_Goals'] = df['Home Team Goals'] + df['Away Team Goals']
print(len(df[df['Total_Goals'] >= 5]))

In [None]:
# Create a new column 'Half-time Goals' in df
df['Half-time Goals'] = df['Half-time Home Goals'] + df['Half-time Away Goals']

In [None]:
# Print all records containing the string 'Korea'
df.loc[df['Home Team Name'].str.contains('Korea'), 'Home Team Name']

In [None]:
# Update the 'Home Team Name' and 'Home Team Initials' columns 
df.loc[df['Home Team Name'] == 'Korea DPR', 'Home Team Name'] = 'Korea'
df.loc[df['Home Team Initials'] == 'KOR', 'Home Team Initials'] = 'NSK'

# Check the updated columns
df.loc[df['Home Team Name'].str.contains('Korea')]
df.loc[df['Away Team Name'].str.contains('Korea')]

#### Using Map, Apply, and Lambda Functions

In [None]:
# creates a new column based on criteria applied through a lambda function (instead of writing a sep. function)
df['On_N_Line'] = df['LINENAME'].map(lambda x: 'N' in x)

In [None]:
# creates new column based on criteria
# map function used on a function (contains_n is a function previously defined)
df['On_N_Line'] = df['LINENAME'].map(contains_n)

In [None]:
# returns percentage of each value
df['On_N_Line'].value_counts(normalize=True)

#### Cleaning Column Names

#### Reformatting Column Types

In [None]:
print(df['ENTRIES'].dtype) # check an individual column type rather then all 

df['ENTRIES'] = df['ENTRIES'].astype(float) # changing the column to float/etc

df['ENTRIES'] = df['ENTRIES'].astype(int) # changing the column to float/etc

In [None]:
# attempting to convert a string column to int or float will produce errors if there are actually 
# non-numeric characters
df['LINENAME'] = df['LINENAME'].astype(int)

#### Converting Dates