# DATA ANALYSIS WITH PANDAS

# Series

In [1]:
# Importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
series1 = pd.Series([4, 65, 436, 3, 9])
series2 = pd.Series([7, 0, 3, 897, 9])

comparison_result = series1 == series2
print(comparison_result)


0    False
1    False
2    False
3    False
4     True
dtype: bool


In [3]:
# Performing operations on series

series1 = pd.Series([2, 4, 6, 8, 14])
series2 = pd.Series([1, 3, 5, 7, 9])

addition_result = series1 + series2
subtraction_result = series1 - series2
multiplication_result = series1 * series2
division_result = series1 / series2

print("Addition:")
print(addition_result)
print("\nSubtraction:")
print(subtraction_result)
print("\nMultiplication:")
print(multiplication_result)
print("\nDivision:")
print(division_result)


Addition:
0     3
1     7
2    11
3    15
4    23
dtype: int64

Subtraction:
0    1
1    1
2    1
3    1
4    5
dtype: int64

Multiplication:
0      2
1     12
2     30
3     56
4    126
dtype: int64

Division:
0    2.000000
1    1.333333
2    1.200000
3    1.142857
4    1.555556
dtype: float64


In [4]:
# Converting a dictionary to a Pandas series
dictionary1 = {'Josh': 24, 'Sam': 36, 'Peace': 19, 'Charles': 65, 'Tom': 44}
series = pd.Series(dictionary1)

print(series)


Josh       24
Sam        36
Peace      19
Charles    65
Tom        44
dtype: int64


In [5]:
# Converting a given series to an array
series = pd.Series(['Love', 800, 'Joy', 789.9, 'Peace', True])
array = np.array(series)

print(array)

['Love' 800 'Joy' 789.9 'Peace' True]


In [6]:
# Read the dataset
df = pd.read_csv('datasets/AfricaCupofNationsMatches.csv')

# Get the most frequent value in 'HomeTeamGoals' column
most_frequent = df['HomeTeamGoals'].mode()[0]

# Replace everything else with 'Other'
df['HomeTeamGoals'] = df['HomeTeamGoals'].map(lambda x: x if x == most_frequent else 'Other')

# Display the modified column
print(df['HomeTeamGoals'])


0        1.0
1      Other
2      Other
3      Other
4        1.0
       ...  
617    Other
618      1.0
619    Other
620    Other
621    Other
Name: HomeTeamGoals, Length: 622, dtype: object


# Dataframes

In [7]:
# Read the given CSV file
df = pd.read_csv('datasets/AfricaCupofNationsMatches.csv')
df

Unnamed: 0,Year,Date,Time,HomeTeam,AwayTeam,HomeTeamGoals,AwayTeamGoals,Stage,SpecialWinConditions,Stadium,City,Attendance
0,1957,10-Feb-57,,Sudan,Egypt,1.0,2.0,Semifinals,,Municipal Stadium,Khartoum,30000.0
1,1957,10-Feb-57,,Ethiopia,South Africa,,,Semifinals,Ethiopia wins due to disqualification of othe...,,,
2,1957,16-Feb-57,,Egypt,Ethiopia,4.0,0.0,Final,,Municipal Stadium,Khartoum,30000.0
3,1959,22-May-59,,Egypt,Ethiopia,4.0,0.0,Final Tournament,,Prince Farouk Stadium,Cairo,30000.0
4,1959,25-May-59,,Sudan,Ethiopia,1.0,0.0,Final Tournament,,Prince Farouk Stadium,Cairo,20000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
617,2019,11 July 2019 (2019-07-11),21:00,Madagascar,Tunisia,0.0,3.0,Quarter-finals,,Al Salam Stadium,Cairo,7568.0
618,2019,14 July 2019 (2019-07-14),18:00,Senegal,Tunisia,1.0,0.0,Semi-finals,win after extra time,30 June Stadium,Cairo,9143.0
619,2019,14 July 2019 (2019-07-14),21:00,Algeria,Nigeria,2.0,1.0,Semi-finals,,Cairo International Stadium,Cairo,49775.0
620,2019,17 July 2019 (2019-07-17),21:00,Tunisia,Nigeria,0.0,1.0,Third place play-off,,Al Salam Stadium,Cairo,6340.0


In [8]:
# Get the first 7 rows of the dataframe
df.head(7)

Unnamed: 0,Year,Date,Time,HomeTeam,AwayTeam,HomeTeamGoals,AwayTeamGoals,Stage,SpecialWinConditions,Stadium,City,Attendance
0,1957,10-Feb-57,,Sudan,Egypt,1.0,2.0,Semifinals,,Municipal Stadium,Khartoum,30000.0
1,1957,10-Feb-57,,Ethiopia,South Africa,,,Semifinals,Ethiopia wins due to disqualification of othe...,,,
2,1957,16-Feb-57,,Egypt,Ethiopia,4.0,0.0,Final,,Municipal Stadium,Khartoum,30000.0
3,1959,22-May-59,,Egypt,Ethiopia,4.0,0.0,Final Tournament,,Prince Farouk Stadium,Cairo,30000.0
4,1959,25-May-59,,Sudan,Ethiopia,1.0,0.0,Final Tournament,,Prince Farouk Stadium,Cairo,20000.0
5,1959,29-May-59,,Egypt,Sudan,2.0,1.0,Final Tournament,,Prince Farouk Stadium,Cairo,30000.0
6,1962,14-Jan-62,,Ethiopia,Tunisia,4.0,2.0,Semifinals,,Hailé Sélassié Stadium,Addis Ababa,30000.0


In [9]:
# Select the ‘HomeTeam’, ‘AwayTeam’, ‘HomeTeamGoals’ and ‘AwayTeamGoals’ columns from your data frame.
selected_columns = df[['HomeTeam', 'AwayTeam', 'HomeTeamGoals', 'AwayTeamGoals']]
print(selected_columns)


        HomeTeam       AwayTeam  HomeTeamGoals  AwayTeamGoals
0         Sudan           Egypt            1.0            2.0
1      Ethiopia    South Africa            NaN            NaN
2         Egypt        Ethiopia            4.0            0.0
3          Egypt       Ethiopia            4.0            0.0
4         Sudan        Ethiopia            1.0            0.0
..           ...            ...            ...            ...
617  Madagascar         Tunisia            0.0            3.0
618     Senegal         Tunisia            1.0            0.0
619     Algeria         Nigeria            2.0            1.0
620     Tunisia         Nigeria            0.0            1.0
621     Senegal         Algeria            0.0            1.0

[622 rows x 4 columns]


In [10]:
#Select rows where Egypt appears
egypt_rows = df[df['HomeTeam'] == 'Egypt']
print(egypt_rows)

    Year      Date  Time  HomeTeam   AwayTeam  HomeTeamGoals  AwayTeamGoals  \
3   1959  22-May-59   NaN    Egypt   Ethiopia            4.0            0.0   
5   1959  29-May-59   NaN    Egypt      Sudan            2.0            1.0   
7   1962  18-Jan-62   NaN    Egypt     Uganda            2.0            1.0   
13  1963  24-Nov-63   NaN    Egypt    Nigeria            6.0            3.0   
14  1963  26-Nov-63   NaN    Egypt      Sudan            2.0            2.0   
16  1963  30-Nov-63   NaN    Egypt   Ethiopia            3.0            0.0   

                Stage SpecialWinConditions                 Stadium  \
3    Final Tournament                  NaN   Prince Farouk Stadium   
5    Final Tournament                  NaN   Prince Farouk Stadium   
7          Semifinals                  NaN  Hailé Sélassié Stadium   
13            Group B                  NaN   Kumasi Sports Stadium   
14            Group B                  NaN   Kumasi Sports Stadium   
16  Third place match     

In [11]:
# Count the number of rows and columns in the DataFrame
num_rows, num_columns = df.shape
print("Number of rows:", num_rows)
print("Number of columns:", num_columns)


Number of rows: 622
Number of columns: 12


In [12]:
# Select rows where 'Attendance' is missing
missing_attendance_rows = df[df['Attendance'].isnull()]
print(missing_attendance_rows)

     Year                         Date   Time        HomeTeam       AwayTeam  \
1    1957                     10-Feb-57    NaN      Ethiopia    South Africa   
8    1962                     20-Jan-62    NaN       Tunisia          Uganda   
9    1962                     21-Jan-62    NaN      Ethiopia           Egypt   
10   1963                     24-Nov-63    NaN         Ghana         Tunisia   
11   1963                     26-Nov-63    NaN         Ghana        Ethiopia   
..    ...                           ...    ...            ...            ...   
565  2017  29 January 2017 (2017-01-29)  20:00         Egypt         Morocco   
566  2017  1 February 2017 (2017-02-01)  20:00  Burkina Faso           Egypt   
567  2017  2 February 2017 (2017-02-02)  20:00      Cameroon           Ghana   
568  2017  4 February 2017 (2017-02-04)  20:00  Burkina Faso           Ghana   
569  2017  5 February 2017 (2017-02-05)  20:00         Egypt        Cameroon   

     HomeTeamGoals  AwayTeamGoals      

In [13]:
# Select rows where 'HomeTeamGoals' are between 3 and 6 (inclusive)
selected_rows = df[(df['HomeTeamGoals'] >= 3) & (df['HomeTeamGoals'] <= 6)]
print(selected_rows)

     Year                         Date   Time    HomeTeam     AwayTeam  \
2    1957                     16-Feb-57    NaN     Egypt      Ethiopia   
3    1959                     22-May-59    NaN      Egypt     Ethiopia   
6    1962                     14-Jan-62    NaN  Ethiopia       Tunisia   
8    1962                     20-Jan-62    NaN   Tunisia        Uganda   
9    1962                     21-Jan-62    NaN  Ethiopia         Egypt   
..    ...                           ...    ...        ...          ...   
553  2017  20 January 2017 (2017-01-20)  20:00   Morocco          Togo   
585  2019     27 June 2019 (2019-06-27)  22:00     Kenya      Tanzania   
595  2019     24 June 2019 (2019-06-24)  22:00      Mali    Mauritania   
608  2019      6 July 2019 (2019-07-06)  18:00   Nigeria      Cameroon   
611  2019      7 July 2019 (2019-07-07)  21:00   Algeria        Guinea   

     HomeTeamGoals  AwayTeamGoals              Stage    SpecialWinConditions  \
2              4.0            0

In [14]:
# Change the 'AwayTeamGoals' in the 3rd row to 10
df.at[2, 'AwayTeamGoals'] = 10
print(df)

     Year                      Date   Time      HomeTeam       AwayTeam  \
0    1957                  10-Feb-57    NaN       Sudan           Egypt   
1    1957                  10-Feb-57    NaN    Ethiopia    South Africa   
2    1957                  16-Feb-57    NaN       Egypt        Ethiopia   
3    1959                  22-May-59    NaN        Egypt       Ethiopia   
4    1959                  25-May-59    NaN       Sudan        Ethiopia   
..    ...                        ...    ...          ...            ...   
617  2019  11 July 2019 (2019-07-11)  21:00  Madagascar         Tunisia   
618  2019  14 July 2019 (2019-07-14)  18:00     Senegal         Tunisia   
619  2019  14 July 2019 (2019-07-14)  21:00     Algeria         Nigeria   
620  2019  17 July 2019 (2019-07-17)  21:00     Tunisia         Nigeria   
621  2019  19 July 2019 (2019-07-19)  21:00     Senegal         Algeria   

     HomeTeamGoals  AwayTeamGoals                 Stage  \
0              1.0            2.0       

In [15]:
# Sort the DataFrame first by 'HomeTeam' in ascending order, then by 'HomeTeamGoals' in descending order
sorted_df = df.sort_values(['HomeTeam', 'HomeTeamGoals'], ascending=[True, False])
print(sorted_df)

     Year                         Date           Time    HomeTeam  \
205  1990                      2-Mar-90            NaN   Algeria    
29   1968                     14-Jan-68            NaN   Algeria    
135  1980                     16-Mar-80            NaN   Algeria    
164  1984                      5-Mar-84            NaN   Algeria    
171  1984                     17-Mar-84            NaN   Algeria    
..    ...                           ...            ...        ...   
521  2012                     12-Feb-12  20:30[note 1]    Zambia    
549  2017  23 January 2017 (2017-01-23)          20:00  Zimbabwe    
398  2004                     25-Jan-04          16:30  Zimbabwe    
437  2006                     23-Jan-06          20:00  Zimbabwe    
575  2019     30 June 2019 (2019-06-30)          21:00  Zimbabwe    

         AwayTeam  HomeTeamGoals  AwayTeamGoals              Stage  \
205       Nigeria            5.0            1.0            Group A   
29         Uganda            4.

In [16]:
# Get a list of DataFrame column headers
column_headers = list(df.columns.values)
print(column_headers)

['Year', 'Date ', 'Time ', 'HomeTeam', 'AwayTeam', 'HomeTeamGoals', 'AwayTeamGoals', 'Stage', 'SpecialWinConditions', 'Stadium', 'City', 'Attendance']


In [17]:
# Append a column of your choice to the DataFrame
df['City'] = 'Kampala'
print(df)

     Year                      Date   Time      HomeTeam       AwayTeam  \
0    1957                  10-Feb-57    NaN       Sudan           Egypt   
1    1957                  10-Feb-57    NaN    Ethiopia    South Africa   
2    1957                  16-Feb-57    NaN       Egypt        Ethiopia   
3    1959                  22-May-59    NaN        Egypt       Ethiopia   
4    1959                  25-May-59    NaN       Sudan        Ethiopia   
..    ...                        ...    ...          ...            ...   
617  2019  11 July 2019 (2019-07-11)  21:00  Madagascar         Tunisia   
618  2019  14 July 2019 (2019-07-14)  18:00     Senegal         Tunisia   
619  2019  14 July 2019 (2019-07-14)  21:00     Algeria         Nigeria   
620  2019  17 July 2019 (2019-07-17)  21:00     Tunisia         Nigeria   
621  2019  19 July 2019 (2019-07-19)  21:00     Senegal         Algeria   

     HomeTeamGoals  AwayTeamGoals                 Stage  \
0              1.0            2.0       

In [18]:
# Add 2 rows to the DataFrame
df = df.append([{'HomeTeam': 'Algeria', 'AwayTeam': 'Kenya', 'HomeTeamGoals': 2, 'AwayTeamGoals': 1, 'Attendance': 50000, 'City': 'Kampala'},
                {'HomeTeam': 'Uganda', 'AwayTeam': 'Tanzania', 'HomeTeamGoals': 3, 'AwayTeamGoals': 0, 'Attendance': 60000, 'City': 'Kampala'}],
               ignore_index=True)
print(df)

       Year                      Date   Time    HomeTeam       AwayTeam  \
0    1957.0                  10-Feb-57    NaN     Sudan           Egypt   
1    1957.0                  10-Feb-57    NaN  Ethiopia    South Africa   
2    1957.0                  16-Feb-57    NaN     Egypt        Ethiopia   
3    1959.0                  22-May-59    NaN      Egypt       Ethiopia   
4    1959.0                  25-May-59    NaN     Sudan        Ethiopia   
..      ...                        ...    ...        ...            ...   
619  2019.0  14 July 2019 (2019-07-14)  21:00   Algeria         Nigeria   
620  2019.0  17 July 2019 (2019-07-17)  21:00   Tunisia         Nigeria   
621  2019.0  19 July 2019 (2019-07-19)  21:00   Senegal         Algeria   
622     NaN                        NaN    NaN    Algeria          Kenya   
623     NaN                        NaN    NaN     Uganda       Tanzania   

     HomeTeamGoals  AwayTeamGoals                 Stage  \
0              1.0            2.0       

  df = df.append([{'HomeTeam': 'Algeria', 'AwayTeam': 'Kenya', 'HomeTeamGoals': 2, 'AwayTeamGoals': 1, 'Attendance': 50000, 'City': 'Kampala'},


In [19]:
# Change the country 'Uganda' to 'China' in the 'AwayTeam' column
df.loc[df['AwayTeam'] == 'Uganda', 'AwayTeam'] = 'China'
print(df)


       Year                      Date   Time    HomeTeam       AwayTeam  \
0    1957.0                  10-Feb-57    NaN     Sudan           Egypt   
1    1957.0                  10-Feb-57    NaN  Ethiopia    South Africa   
2    1957.0                  16-Feb-57    NaN     Egypt        Ethiopia   
3    1959.0                  22-May-59    NaN      Egypt       Ethiopia   
4    1959.0                  25-May-59    NaN     Sudan        Ethiopia   
..      ...                        ...    ...        ...            ...   
619  2019.0  14 July 2019 (2019-07-14)  21:00   Algeria         Nigeria   
620  2019.0  17 July 2019 (2019-07-17)  21:00   Tunisia         Nigeria   
621  2019.0  19 July 2019 (2019-07-19)  21:00   Senegal         Algeria   
622     NaN                        NaN    NaN    Algeria          Kenya   
623     NaN                        NaN    NaN     Uganda       Tanzania   

     HomeTeamGoals  AwayTeamGoals                 Stage  \
0              1.0            2.0       

In [20]:
# Reset the index in the DataFrame
df = df.reset_index(drop=True)
print(df)

       Year                      Date   Time    HomeTeam       AwayTeam  \
0    1957.0                  10-Feb-57    NaN     Sudan           Egypt   
1    1957.0                  10-Feb-57    NaN  Ethiopia    South Africa   
2    1957.0                  16-Feb-57    NaN     Egypt        Ethiopia   
3    1959.0                  22-May-59    NaN      Egypt       Ethiopia   
4    1959.0                  25-May-59    NaN     Sudan        Ethiopia   
..      ...                        ...    ...        ...            ...   
619  2019.0  14 July 2019 (2019-07-14)  21:00   Algeria         Nigeria   
620  2019.0  17 July 2019 (2019-07-17)  21:00   Tunisia         Nigeria   
621  2019.0  19 July 2019 (2019-07-19)  21:00   Senegal         Algeria   
622     NaN                        NaN    NaN    Algeria          Kenya   
623     NaN                        NaN    NaN     Uganda       Tanzania   

     HomeTeamGoals  AwayTeamGoals                 Stage  \
0              1.0            2.0       

In [21]:
# Check whether the 'Stadium' column is present in the DataFrame or not
if 'Stadium' in df.columns:
    print("The 'Stadium' column is present in the DataFrame")
else:
    print("The 'Stadium' column is not present in the DataFrame")


The 'Stadium' column is present in the DataFrame


In [22]:
# Convert the datatype of the 'AwayTeamGoals' column from integer to float
df['AwayTeamGoals'] = df['AwayTeamGoals'].astype(float)
print(df.dtypes)

Year                    float64
Date                     object
Time                     object
HomeTeam                 object
AwayTeam                 object
HomeTeamGoals           float64
AwayTeamGoals           float64
Stage                    object
SpecialWinConditions     object
Stadium                  object
City                     object
Attendance              float64
dtype: object


In [23]:
# Remove the last 10 rows from the DataFrame
df = df[:-10]
print(df)

       Year                     Date   Time      HomeTeam       AwayTeam  \
0    1957.0                 10-Feb-57    NaN       Sudan           Egypt   
1    1957.0                 10-Feb-57    NaN    Ethiopia    South Africa   
2    1957.0                 16-Feb-57    NaN       Egypt        Ethiopia   
3    1959.0                 22-May-59    NaN        Egypt       Ethiopia   
4    1959.0                 25-May-59    NaN       Sudan        Ethiopia   
..      ...                       ...    ...          ...            ...   
609  2019.0  6 July 2019 (2019-07-06)  21:00       Egypt    South Africa   
610  2019.0  7 July 2019 (2019-07-07)  18:00  Madagascar        DR Congo   
611  2019.0  7 July 2019 (2019-07-07)  21:00     Algeria          Guinea   
612  2019.0  8 July 2019 (2019-07-08)  18:00        Mali     Ivory Coast   
613  2019.0  8 July 2019 (2019-07-08)  21:00       Ghana         Tunisia   

     HomeTeamGoals  AwayTeamGoals             Stage  \
0              1.0            2.

In [24]:
# Iterate over rows in the DataFrame
for index, row in df.iterrows():
    print(row['HomeTeam'], row['AwayTeam'], row['HomeTeamGoals'], row['AwayTeamGoals'], row['Attendance'], row['City'])


Sudan   Egypt 1.0 2.0 30000.0 Kampala
Ethiopia   South Africa nan nan nan Kampala
Egypt   Ethiopia 4.0 10.0 30000.0 Kampala
Egypt  Ethiopia 4.0 0.0 30000.0 Kampala
Sudan   Ethiopia 1.0 0.0 20000.0 Kampala
Egypt  Sudan 2.0 1.0 30000.0 Kampala
Ethiopia   Tunisia 4.0 2.0 30000.0 Kampala
Egypt  Uganda 2.0 1.0 30000.0 Kampala
Tunisia   Uganda 3.0 0.0 nan Kampala
Ethiopia  Egypt 4.0 2.0 nan Kampala
Ghana   Tunisia 1.0 1.0 nan Kampala
Ghana   Ethiopia 2.0 0.0 nan Kampala
Ethiopia   Tunisia 4.0 2.0 nan Kampala
Egypt  Nigeria 6.0 3.0 nan Kampala
Egypt  Sudan 2.0 2.0 nan Kampala
Sudan   Nigeria 4.0 0.0 nan Kampala
Egypt  Ethiopia 3.0 0.0 nan Kampala
Ghana   Sudan 3.0 0.0 nan Kampala
Tunisia   Ethiopia 4.0 0.0 16000.0 Kampala
Senegal   Tunisia 0.0 0.0 nan Kampala
Senegal   Ethiopia 5.0 1.0 nan Kampala
Ghana   Congo-Léopoldville 5.0 2.0 nan Kampala
Ivory Coast   Congo-Léopoldville 3.0 0.0 nan Kampala
Ghana   Ivory Coast 4.0 1.0 nan Kampala
Senegal   Ivory Coast 0.0 1.0 nan Kampala
Ghana   Tunisi

South Africa   Namibia 4.0 1.0 9500.0 Kampala
Morocco   Zambia 1.0 1.0 10000.0 Kampala
Egypt   Mozambique 2.0 0.0 20000.0 Kampala
Egypt   Zambia 4.0 0.0 5000.0 Kampala
Morocco   Mozambique 3.0 0.0 3000.0 Kampala
Zambia   Mozambique 3.0 1.0 3000.0 Kampala
Morocco   Egypt 1.0 0.0 500.0 Kampala
Cameroon   DR Congo 0.0 1.0 5000.0 Kampala
Tunisia   Burkina Faso 1.0 1.0 35000.0 Kampala
Ivory Coast   Egypt 0.0 0.0 20000.0 Kampala
Morocco   South Africa 1.0 2.0 2000.0 Kampala
DR Congo   South Africa 1.0 2.0 4000.0 Kampala
Burkina Faso   Egypt 0.0 2.0 40000.0 Kampala
DR Congo   Burkina Faso 4.0 4.0 25000.0 Kampala
South Africa   Egypt 0.0 2.0 40000.0 Kampala
Ghana   Cameroon 1.0 1.0 45000.0 Kampala
Ivory Coast   Togo 1.0 1.0 13000.0 Kampala
Ghana   Togo 2.0 0.0 30000.0 Kampala
Cameroon   Ivory Coast 3.0 0.0 5000.0 Kampala
Ghana   Ivory Coast 0.0 2.0 40000.0 Kampala
Cameroon   Togo 0.0 1.0 2000.0 Kampala
South Africa   Gabon 3.0 1.0 20000.0 Kampala
DR Congo   Algeria 0.0 0.0 7000.0 Kampala
South

Senegal   Tanzania 2.0 0.0 7249.0 Kampala
Algeria   Kenya 2.0 0.0 8071.0 Kampala
Senegal   Algeria 0.0 1.0 25765.0 Kampala
Kenya   Tanzania 3.0 2.0 7233.0 Kampala
Kenya   Senegal 0.0 3.0 13224.0 Kampala
Tanzania   Algeria 0.0 3.0 8921.0 Kampala
Morocco   Namibia 1.0 0.0 6857.0 Kampala
Ivory Coast   South Africa 1.0 0.0 4961.0 Kampala
Morocco   Ivory Coast 1.0 0.0 27500.0 Kampala
South Africa   Namibia 1.0 0.0 16090.0 Kampala
South Africa   Morocco 0.0 1.0 12098.0 Kampala
Namibia   Ivory Coast 1.0 4.0 7530.0 Kampala
Tunisia   Angola 1.0 1.0 7345.0 Kampala
Mali   Mauritania 4.0 1.0 6202.0 Kampala
Tunisia   Mali 1.0 1.0 16085.0 Kampala
Mauritania   Angola 0.0 0.0 10120.0 Kampala
Mauritania   Tunisia 0.0 0.0 7732.0 Kampala
Angola   Mali 0.0 1.0 8135.0 Kampala
Cameroon   Guinea-Bissau 2.0 0.0 5983.0 Kampala
Ghana   Benin 2.0 2.0 8094.0 Kampala
Cameroon   Ghana 0.0 0.0 16724.0 Kampala
Benin   Guinea-Bissau 0.0 0.0 9212.0 Kampala
Benin   Cameroon 0.0 0.0 14120.0 Kampala
Guinea-Bissau   Ghana 

In [25]:
# Change the order of DataFrame columns
df = df[['AwayTeam', 'HomeTeam', 'AwayTeamGoals', 'HomeTeamGoals', 'Stadium', 'Attendance']]
print(df)


          AwayTeam     HomeTeam  AwayTeamGoals  HomeTeamGoals  \
0            Egypt       Sudan             2.0            1.0   
1     South Africa    Ethiopia             NaN            NaN   
2         Ethiopia       Egypt            10.0            4.0   
3         Ethiopia        Egypt            0.0            4.0   
4         Ethiopia       Sudan             0.0            1.0   
..             ...          ...            ...            ...   
609   South Africa       Egypt             1.0            0.0   
610       DR Congo  Madagascar             2.0            2.0   
611         Guinea     Algeria             0.0            3.0   
612    Ivory Coast        Mali             1.0            0.0   
613        Tunisia       Ghana             1.0            1.0   

                         Stadium  Attendance  
0              Municipal Stadium     30000.0  
1                            NaN         NaN  
2              Municipal Stadium     30000.0  
3          Prince Farouk Stadiu

In [26]:
# Delete DataFrame row(s) whose value is 0 in the 'HomeTeamGoals' column
df = df[df.HomeTeamGoals != 0]
print(df)

          AwayTeam     HomeTeam  AwayTeamGoals  HomeTeamGoals  \
0            Egypt       Sudan             2.0            1.0   
1     South Africa    Ethiopia             NaN            NaN   
2         Ethiopia       Egypt            10.0            4.0   
3         Ethiopia        Egypt            0.0            4.0   
4         Ethiopia       Sudan             0.0            1.0   
..             ...          ...            ...            ...   
606          Benin     Morocco             1.0            1.0   
608       Cameroon     Nigeria             2.0            3.0   
610       DR Congo  Madagascar             2.0            2.0   
611         Guinea     Algeria             0.0            3.0   
613        Tunisia       Ghana             1.0            1.0   

                   Stadium  Attendance  
0        Municipal Stadium     30000.0  
1                      NaN         NaN  
2        Municipal Stadium     30000.0  
3    Prince Farouk Stadium     30000.0  
4    Prince Fa