In [None]:
%pip install pandas

In [None]:
import pandas as pd
# 1 load dataset
dataset_path = 'AfricaCupofNationsMatches.csv'
data = pd.read_csv(dataset_path)
print(data)

In [None]:
# 2 retrieve the first seven rows
first_seven_rows = data.head(7)
print(first_seven_rows)

In [None]:
# 3 selected columns
#To select specific columns from a pandas DataFrame,
#you can use the DataFrame indexing operator [] with a list of column names
selected_columns = data[['HomeTeam', 'AwayTeam', 'HomeTeamGoals', 'AwayTeamGoals']]
print(selected_columns)

In [None]:
# 4 Clean the data: strip spaces and convert to lowercase
data['HomeTeam'] = data['HomeTeam'].astype(str).str.strip().str.lower()
data['AwayTeam'] = data['AwayTeam'].astype(str).str.strip().str.lower()

# Print the cleaned data
# print("Cleaned Data:")
# print(data)

# Select rows where 'egypt' appears in either 'HomeTeam' or 'AwayTeam'
egypt_rows = data[(data['HomeTeam'] == 'egypt') | (data['AwayTeam'] == 'egypt')]

# Display the rows where 'egypt' appears
print("Rows where 'egypt' appears:")
print(egypt_rows)

In [None]:
# 5 To count the number of rows and columns in a pandas DataFrame,
#you can use the .shape attribute.
#This attribute returns a tuple where the first element is the number of rows and the second element is the number of columns.
#Count the number of rows and columns
rows, columns = data.shape

# Display the results
print(f"Number of rows: {rows}")
print(f"Number of columns: {columns}")

In [None]:
# 6 Select rows where 'Attendance' is missing
#To select rows where the 'Attendance' column is missing (i.e., contains NaN values),
#you can use the isna() method combined with the DataFrame's indexing.
missing_attendance = data[data['Attendance'].isna()]
print(missing_attendance)

In [None]:
# 7 Select rows where 'HomeTeamGoals' are between 3 and 6 inclusive
home_team_goals = data[(data['HomeTeamGoals'] >= 3) & (data['HomeTeamGoals'] <=6 )]
print(home_team_goals)


In [None]:
# 8 Change the value of 'AwayTeamGoals' in the 3rd row to 10
#To change the value of "AwayTeamGoals" in the 3rd row of a pandas DataFrame to 10,
#you can use the .loc indexer to specify the row and column you want to modify.
data.loc[2,'AwayTeamGoals'] = 10
print(data)

In [None]:
# 9 To sort a pandas DataFrame first by 'HomeTeam' in ascending order
#and then by 'HomeTeamGoals' in descending order, you can use the .sort_values()
sorted_data = data.sort_values(by=['HomeTeam', 'HomeTeamGoals'], ascending=[True,False])
print(sorted_data)

In [None]:
# 10 Get list of column headers (column names)
#To extract a list of column headers (column names) from a pandas DataFrame,
#you can simply access the columns attribute of the DataFrame.
#This attribute returns an Index object containing the column labels.
column_list = data.columns.tolist() #tolist()-> converts the Index object of column labels into a Python list.
print(column_list)

In [None]:
# 11 new column with values
#To append a new column to a pandas DataFrame, you can simply assign values to a new column name
"""
new_column_values= ['A', 'B', 'C', 'D']
data['Players'] =  new_column_values
"""

#Append Empty column
data['Players'] = None
print(data)

#Insert the new column at position 3 (after 'Time' column, before 'HomeTeam')
#To add a new column between already existing columns in a pandas DataFrame, you can use the .insert() method.
#This method allows you to specify the exact position where you want to insert the new column.
"""
new_column_values = None
data.insert(3,'new_column', new_column_values)
print(data)
"""

In [None]:
# 12 add 2 rows to your DataFrame.
#To add new rows to a pandas DataFrame, you can use the concat() method.
# Define two new rows as dictionaries
new_rows = pd.DataFrame([
    {'Year': 2027, 'Date': '2027-06-25', 'Time': '18:00', 'HomeTeam': 'Morocco', 'AwayTeam': 'Algeria',
'HomeTeamGoals': 2, 'AwayTeamGoals': 1, 'Stage': 'Group', 'SpecialWinConditions': None,
'Stadium': 'Stadium F', 'City': 'City F', 'Attendance': 28000},
    {'Year': 2028, 'Date': '2028-06-25', 'Time': '20:00', 'HomeTeam': 'Nigeria', 'AwayTeam': 'Ghana',
'HomeTeamGoals': 3, 'AwayTeamGoals': 2, 'Stage': 'Group', 'SpecialWinConditions': None,
'Stadium': 'Stadium G', 'City': 'City G', 'Attendance': 32000}
])

# Append the new rows to the DataFrame using pd.concat()
data = pd.concat([data, new_rows], ignore_index=True)

# Display the updated DataFrame
print(data)

In [19]:
# 13 Replace 'Uganda' with 'China' in the 'AwayTeam' column
# data['HomeTeam'] = data['HomeTeam'].astype(str).str.strip().str.lower()
data['AwayTeam'] = data['AwayTeam'].str.strip()
data['AwayTeam'] = data['AwayTeam'].replace('uganda', 'China')
print(data.to_string())


     Year                         Date           Time           HomeTeam             AwayTeam  HomeTeamGoals  AwayTeamGoals                 Stage                                      SpecialWinConditions                         Stadium                  City  Attendance
0    1957                     10-Feb-57            NaN             sudan                egypt            1.0            2.0            Semifinals                                                       NaN               Municipal Stadium              Khartoum     30000.0
1    1957                     10-Feb-57            NaN          ethiopia         south africa            NaN            NaN            Semifinals      Ethiopia  wins due to disqualification of other team                             NaN                   NaN         NaN
2    1957                     16-Feb-57            NaN             egypt             ethiopia            4.0            0.0                 Final                                             

In [None]:
# 14 Reset the index of the DataFrame
#To reset the index of a pandas DataFrame, you can use the reset_index() method.
#This method resets the index to the default integer index (0, 1, 2, ...).
#drop=True parameter drops the old index and resets it to the default integer index. 
data = data.reset_index(drop=True)
print(data)

In [None]:
# 15 Check if 'Stadium' column is present in the DataFrame
if 'Stadium' in data.columns:
    print("Yes, 'Stadium' column is present in the DataFrame.")
else:
    print("No, 'Stadium' column is not present in the DataFrame.")

In [None]:
# 16 Convert 'AwayTeamGoals' from int to float
data['AwayTeamGoals'] = data['AwayTeamGoals'].astype(float)
print(data)

In [None]:
# 17 Remove the last 10 rows from the DataFrame
data = data[:-10]
print(data)


In [None]:
# 18 Iterate over rows using iterrows()
#To iterate over rows in a pandas DataFrame, you can use various methods such as iterrows(), apply(),
#or simply iterating over the index range and accessing rows using .iloc[]
print("\nIterating over rows using iterrows():")
for index, row in data.iterrows():
    print(f"Index: {index}")
    print(row)
    print()
    
#Using apply along axis=1
# print("\nIterating over rows using apply():")
# data.apply(lambda row: print(row), axis=1)

# Iterate over rows using iloc[]
# print("\nIterating over rows using iloc[]:")
# num_rows = len(data)
# for i in range(num_rows):
#     row = data.iloc[i]
#     print(f"Index: {i}")
#     print(row)
#     print()

In [None]:
# 19 Change the order of columns
new_order = ['AwayTeam', 'HomeTeam', 'AwayTeamGoals', 'HomeTeamGoals','Year','Date','Time','Stage','SpecialWinConditions','Stadium','City','Attendance']
data = data[new_order]

# Display the DataFrame with changed column order
print(data)

In [None]:
# Delete rows where 'HomeTeamGoals' is 0
data = data[data['HomeTeamGoals'] != 0]

# Display the DataFrame after deleting rows
print(data)