In [295]:
import pandas as pd

In [296]:
rankings = pd.read_csv('./data/fifa_ranking-2022-10-06.csv')
rankings

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,1,Germany,GER,57.00,0.00,0,UEFA,1992-12-31
1,96,Syria,SYR,11.00,0.00,0,AFC,1992-12-31
2,97,Burkina Faso,BFA,11.00,0.00,0,CAF,1992-12-31
3,99,Latvia,LVA,10.00,0.00,0,UEFA,1992-12-31
4,100,Burundi,BDI,10.00,0.00,0,CAF,1992-12-31
...,...,...,...,...,...,...,...,...
63911,74,El Salvador,SLV,1330.51,1333.48,3,CONCACAF,2022-10-06
63912,75,Oman,OMA,1320.29,1323.03,0,AFC,2022-10-06
63913,76,Israel,ISR,1316.55,1316.35,0,UEFA,2022-10-06
63914,78,Georgia,GEO,1307.34,1296.46,-4,UEFA,2022-10-06


In [297]:
rankings.info()
rankings.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63916 entries, 0 to 63915
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   rank             63916 non-null  int64  
 1   country_full     63916 non-null  object 
 2   country_abrv     63916 non-null  object 
 3   total_points     63916 non-null  float64
 4   previous_points  63916 non-null  float64
 5   rank_change      63916 non-null  int64  
 6   confederation    63916 non-null  object 
 7   rank_date        63916 non-null  object 
dtypes: float64(2), int64(2), object(4)
memory usage: 3.9+ MB


Unnamed: 0,rank,total_points,previous_points,rank_change
count,63916.0,63916.0,63916.0,63916.0
mean,100.196821,442.40478,440.999013,0.004756
std,58.060019,409.091526,409.321338,5.660809
min,1.0,1.0,0.0,-92.0
25%,50.0,88.0,86.0,-1.0
50%,100.0,347.0,345.0,0.0
75%,149.0,641.0,640.0,2.0
max,211.0,2172.0,2187.0,199.0


## Country_codes load, used for cleaning rankings 

In [298]:
# Using a stadardized table team - fifa_code 
# Use this to clean rankings table
country_codes = pd.read_csv('./data/country_codes.csv')
country_codes

Unnamed: 0,team,code,country
0,Afghanistan,AFG,Afghanistan
1,Albania,ALB,Albania
2,Algeria,ALG,Algeria
3,American Samoa,ASA,American Samoa
4,Andorra,AND,Andorra
...,...,...,...
206,Vietnam,VIE,Vietnam
207,Wales,WAL,United Kingdom
208,Yemen,YEM,Yemen
209,Zambia,ZAM,Zambia


In [299]:
# We want to link tables by the code (country_codes) = country_abrv (rankings)
# So check how well they link

# Match the codes in COUNTRY_CODES to the codes in RANKINGS 
print(list(set(country_codes['code'].unique()) - set(rankings['country_abrv'].unique())))

[]


In [300]:
# EXACT MATCH ABOVE

# now match codes in RANKINGS to the codes in COUNTRY_CODES
list(set(rankings['country_abrv'].unique()) - set(country_codes['code'].unique()))

['YUG', 'ANT', 'SCG', 'ZAI', 'LIB', 'TCH']

### We have a few problems, but at least we know all codes in COUNTRY_CODES map to the RANKINGS
### So we only need to work on the RANKINGS table to clean the codes above
### Also chech the consistency of country naming (country_full) in RANKINGS

# RANKINGS TABLE CLEANING

## Irregular Data 
### Same fifa_code, different country name variation 

In [301]:
rankings_modif = rankings.copy()

In [302]:
# Group all countries by the abreviation (which we know is unique)
abvr = rankings_modif['country_full'].groupby(rankings_modif['country_abrv']).unique().apply(pd.Series)
abvr

Unnamed: 0_level_0,0,1,2
country_abrv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AFG,Afghanistan,,
AIA,Anguilla,,
ALB,Albania,,
ALG,Algeria,,
AND,Andorra,,
...,...,...,...
YEM,Yemen,,
YUG,Yugoslavia,,
ZAI,Zaire,,
ZAM,Zambia,,


In [303]:
abvr_to_fix = abvr[~abvr[1].isnull()]
abvr_to_fix

Unnamed: 0_level_0,0,1,2
country_abrv,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CPV,Cape Verde Islands,Cabo Verde,
CUW,Curacao,Curaçao,
MKD,FYR Macedonia,North Macedonia,
STP,Sao Tome e Principe,São Tomé e Príncipe,São Tomé and Príncipe
SWZ,Eswatini,Swaziland,
TUR,Turkey,Türkiye,
VIN,St. Vincent and the Grenadines,St. Vincent / Grenadines,


In [304]:
# Indexes to fix
abvr_to_fix[0].index

Index(['CPV', 'CUW', 'MKD', 'STP', 'SWZ', 'TUR', 'VIN'], dtype='object', name='country_abrv')

In [305]:
# Take indexes countries from COUNTRY_CODE
abvr_to_fix_countries_list = country_codes.loc[country_codes['code'].isin(abvr_to_fix[0].index)]
# Resulting on a subset of that table, with the indexes and the names we want
abvr_to_fix_countries_list

Unnamed: 0,team,code,country
36,Cape Verde,CPV,Cape Verde
50,Curaçao,CUW,Curaçao
65,Eswatini,SWZ,Eswatini
139,North Macedonia,MKD,North Macedonia
160,Saint Vincent and the Grenadines,VIN,Saint Vincent and the Grenadines
163,São Tomé and Príncipe,STP,São Tomé and Príncipe
194,Turkey,TUR,Turkey


In [306]:
abvr_to_fix_countries_list[abvr_to_fix_countries_list['code']=='CPV']['team'].values[0]

'Cape Verde'

In [307]:
# Iterate through the list of indexes
for index in abvr_to_fix[0].index:
    # Select the value of one column based on the condition that another column (country_abrevation) is some index
    # Assign to the value, the correct team name taken from the COUNTRY_CODES table 
    rankings_modif.loc[rankings_modif['country_abrv'] == index, 'country_full'] = abvr_to_fix_countries_list[abvr_to_fix_countries_list['code'] == index]['team'].values[0]


In [308]:
rankings_modif

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,1,Germany,GER,57.00,0.00,0,UEFA,1992-12-31
1,96,Syria,SYR,11.00,0.00,0,AFC,1992-12-31
2,97,Burkina Faso,BFA,11.00,0.00,0,CAF,1992-12-31
3,99,Latvia,LVA,10.00,0.00,0,UEFA,1992-12-31
4,100,Burundi,BDI,10.00,0.00,0,CAF,1992-12-31
...,...,...,...,...,...,...,...,...
63911,74,El Salvador,SLV,1330.51,1333.48,3,CONCACAF,2022-10-06
63912,75,Oman,OMA,1320.29,1323.03,0,AFC,2022-10-06
63913,76,Israel,ISR,1316.55,1316.35,0,UEFA,2022-10-06
63914,78,Georgia,GEO,1307.34,1296.46,-4,UEFA,2022-10-06


In [309]:
abvr_test = rankings_modif['country_full'].groupby(rankings_modif['country_abrv']).unique().apply(pd.Series)
abvr_test

Unnamed: 0_level_0,0
country_abrv,Unnamed: 1_level_1
AFG,Afghanistan
AIA,Anguilla
ALB,Albania
ALG,Algeria
AND,Andorra
...,...
YEM,Yemen
YUG,Yugoslavia
ZAI,Zaire
ZAM,Zambia


In [310]:
# Fixed irregular country_full naming in RANKINGS based on team in COUNTRY_CODE

## Inconsistent Data
### Same country, different fifa_code

In [311]:
list_of_codes_in_rankings_not_in_current_fifa = list(set(rankings_modif['country_abrv'].unique()) - set(country_codes['code'].unique()))
list_of_codes_in_rankings_not_in_current_fifa

['YUG', 'ANT', 'SCG', 'ZAI', 'LIB', 'TCH']

In [312]:
print(list(set(country_codes['code'].unique()) - set(rankings_modif['country_abrv'].unique()) ))

[]


In [313]:
# We need to see why these abbreviation are extra

In [314]:
rankings_modif.loc[rankings_modif['country_abrv'].isin(list_of_codes_in_rankings_not_in_current_fifa), 'country_full'].unique()

array(['Yugoslavia', 'Czechoslovakia', 'Zaire', 'Serbia and Montenegro',
       'Netherlands Antilles', 'Lebanon'], dtype=object)

In [315]:
# Yugoslavia, Czechoslovakia, Serbia and Montenegro and Netherlands Antilles are historically FIFA

# We can change them and their info to their successor
# Yugoslavia, Serbia and Montenegro -> Serbia
# Czechoslovakia -> Czech Republic
# Netherlands Antilles -> Curacao

# MUST Fixes: 
# Zaire is Democratic Republic of the Congo
# Lebanon appears with two different codes LIB and LBN  

## Fixing Zaire and Lebanon
### Lebanon

In [316]:
rankings_modif2 = rankings_modif.copy()

In [317]:
# taking care of lebanon
rankings_lebanon = rankings_modif2.loc[rankings_modif2['country_full']=='Lebanon']
rankings_lebanon['country_abrv'].unique()

array(['LBN', 'LIB'], dtype=object)

In [318]:
# Change LIB to LBN
rankings_modif2['country_abrv'] = rankings_modif2['country_abrv'].replace('LIB','LBN')

In [319]:
# Test if we fixed Lebanon abrv
rankings_modif2.loc[rankings_modif2['country_abrv']=='LIB']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date


### Zaire

In [320]:
# taking care of Zaire
rankings_zaire = rankings_modif2.loc[rankings_modif2['country_abrv']=='ZAI']
rankings_zaire.iloc[0]

rank                       60
country_full            Zaire
country_abrv              ZAI
total_points             25.0
previous_points           0.0
rank_change                 0
confederation             CAF
rank_date          1992-12-31
Name: 117, dtype: object

In [321]:
rankings_cod = rankings_modif2.loc[rankings_modif2['country_abrv']=='COD']
rankings_cod['country_full'].iloc[0]

'Congo DR'

In [322]:
rankings_modif2['country_full'] = rankings_modif2['country_full'].replace(rankings_zaire['country_full'].iloc[0], rankings_cod['country_full'].iloc[0])
rankings_modif2['country_abrv'] = rankings_modif2['country_abrv'].replace(rankings_zaire['country_abrv'].iloc[0], rankings_cod['country_abrv'].iloc[0])

In [323]:
rankings_modif2.loc[rankings_modif2['country_full']=='Zaire']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date


In [324]:
rankings_modif2

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,1,Germany,GER,57.00,0.00,0,UEFA,1992-12-31
1,96,Syria,SYR,11.00,0.00,0,AFC,1992-12-31
2,97,Burkina Faso,BFA,11.00,0.00,0,CAF,1992-12-31
3,99,Latvia,LVA,10.00,0.00,0,UEFA,1992-12-31
4,100,Burundi,BDI,10.00,0.00,0,CAF,1992-12-31
...,...,...,...,...,...,...,...,...
63911,74,El Salvador,SLV,1330.51,1333.48,3,CONCACAF,2022-10-06
63912,75,Oman,OMA,1320.29,1323.03,0,AFC,2022-10-06
63913,76,Israel,ISR,1316.55,1316.35,0,UEFA,2022-10-06
63914,78,Georgia,GEO,1307.34,1296.46,-4,UEFA,2022-10-06


In [325]:
# Codes check
codes_in_rankings_not_in_fifa = list(set(rankings_modif2['country_abrv'].unique()) - set(country_codes['code'].unique()))
print(codes_in_rankings_not_in_fifa,rankings_modif2.loc[rankings_modif2['country_abrv'].isin(codes_in_rankings_not_in_fifa), 'country_full'].unique())


['YUG', 'ANT', 'TCH', 'SCG'] ['Yugoslavia' 'Czechoslovakia' 'Serbia and Montenegro'
 'Netherlands Antilles']


## These are all former FIFA countries
### Let's explore each and see how we can fix them

## Czechoslovakia -> Czech Republic

In [326]:
rankings_modif2.loc[rankings_modif2['country_full']=='Czechoslovakia']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
93,18,Czechoslovakia,TCH,47.0,0.0,0,UEFA,1992-12-31
257,15,Czechoslovakia,TCH,50.0,47.0,-3,UEFA,1993-08-08
396,17,Czechoslovakia,TCH,50.0,50.0,2,UEFA,1993-09-23
454,21,Czechoslovakia,TCH,47.0,50.0,4,UEFA,1993-10-22
714,19,Czechoslovakia,TCH,50.0,47.0,-2,UEFA,1993-11-19
833,19,Czechoslovakia,TCH,49.0,50.0,0,UEFA,1993-12-23


In [327]:
rankings_modif2.loc[rankings_modif2['country_full']=='Czech Republic']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
1144,67,Czech Republic,CZE,24.00,0.00,0,UEFA,1994-03-15
1255,63,Czech Republic,CZE,28.00,24.00,-4,UEFA,1994-04-19
1372,59,Czech Republic,CZE,28.00,28.00,-4,UEFA,1994-05-17
1560,45,Czech Republic,CZE,35.00,28.00,-14,UEFA,1994-06-14
1814,44,Czech Republic,CZE,35.00,35.00,-1,UEFA,1994-07-21
...,...,...,...,...,...,...,...,...
62959,31,Czech Republic,CZE,1510.42,1510.42,-1,UEFA,2022-02-10
63181,33,Czech Republic,CZE,1500.62,1510.42,2,UEFA,2022-03-31
63383,32,Czech Republic,CZE,1502.90,1500.62,-1,UEFA,2022-06-23
63604,32,Czech Republic,CZE,1502.90,1502.90,0,UEFA,2022-08-25


In [328]:
rankings_modif2.loc[rankings_modif2['country_full']=='Slovakia']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
651,146,Slovakia,SVK,2.00,0.00,0,UEFA,1993-11-19
761,150,Slovakia,SVK,2.00,2.00,4,UEFA,1993-12-23
1022,58,Slovakia,SVK,30.00,2.00,-92,UEFA,1994-02-15
1186,60,Slovakia,SVK,30.00,30.00,2,UEFA,1994-03-15
1246,54,Slovakia,SVK,32.00,30.00,-6,UEFA,1994-04-19
...,...,...,...,...,...,...,...,...
62944,46,Slovakia,SVK,1455.36,1455.36,4,UEFA,2022-02-10
63193,45,Slovakia,SVK,1454.98,1455.36,-1,UEFA,2022-03-31
63364,51,Slovakia,SVK,1439.99,1454.98,6,UEFA,2022-06-23
63623,51,Slovakia,SVK,1439.99,1439.99,0,UEFA,2022-08-25


### After analyzing these, it seems that we can simply eliminate Czechoslovakia rankings from the database

In [329]:
rankings_modif2 = rankings_modif2[rankings_modif2['country_full'] != 'Czechoslovakia']

In [330]:
# See what codes remain codes
codes_in_rankings_not_in_fifa = list(set(rankings_modif2['country_abrv'].unique()) - set(country_codes['code'].unique()))
countries_in_rankings_not_in_fifa = rankings_modif2.loc[rankings_modif['country_abrv'].isin(codes_in_rankings_not_in_fifa), 'country_full'].unique()

print(codes_in_rankings_not_in_fifa, countries_in_rankings_not_in_fifa)

['YUG', 'ANT', 'SCG'] ['Yugoslavia' 'Serbia and Montenegro' 'Netherlands Antilles']


## Yugoslavia -> Serbia and Montenegro -> Serbia
### Since FIFA decided to inherit the points like this, we change the country names
#### We will only represent current fifa countries, anyways

In [331]:
rankings_modif2.loc[rankings_modif2['country_full']=='Yugoslavia']


Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
84,29,Yugoslavia,YUG,39.0,0.0,0,UEFA,1992-12-31
279,56,Yugoslavia,YUG,31.0,39.0,27,UEFA,1993-08-08
418,62,Yugoslavia,YUG,30.0,31.0,6,UEFA,1993-09-23
583,65,Yugoslavia,YUG,29.0,30.0,3,UEFA,1993-10-22
732,67,Yugoslavia,YUG,28.0,29.0,2,UEFA,1993-11-19
...,...,...,...,...,...,...,...,...
18135,16,Yugoslavia,YUG,689.0,702.0,6,UEFA,2002-07-03
18179,16,Yugoslavia,YUG,688.0,689.0,0,UEFA,2002-08-14
18488,20,Yugoslavia,YUG,672.0,688.0,4,UEFA,2002-09-18
18655,16,Yugoslavia,YUG,690.0,672.0,-4,UEFA,2002-10-23


In [332]:
rankings_modif2.loc[rankings_modif2['country_full']=='Serbia and Montenegro']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
19127,19,Serbia and Montenegro,SCG,678.0,0.0,0,UEFA,2002-12-18
19196,19,Serbia and Montenegro,SCG,677.0,678.0,0,UEFA,2003-01-15
19571,19,Serbia and Montenegro,SCG,674.0,677.0,0,UEFA,2003-02-19
19650,20,Serbia and Montenegro,SCG,667.0,674.0,1,UEFA,2003-03-26
19972,22,Serbia and Montenegro,SCG,659.0,667.0,2,UEFA,2003-04-23
20065,22,Serbia and Montenegro,SCG,650.0,659.0,0,UEFA,2003-05-21
20326,27,Serbia and Montenegro,SCG,636.0,650.0,5,UEFA,2003-06-25
20578,33,Serbia and Montenegro,SCG,621.0,636.0,6,UEFA,2003-07-30
20714,29,Serbia and Montenegro,SCG,621.0,621.0,-4,UEFA,2003-08-27
20890,35,Serbia and Montenegro,SCG,622.0,621.0,6,UEFA,2003-09-24


In [333]:
rankings_modif2.loc[rankings_modif2['country_full']=='Serbia']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
26460,47,Serbia,SRB,612.00,0.00,0,UEFA,2005-12-16
27614,36,Serbia,SRB,724.00,0.00,0,UEFA,2006-07-12
27831,33,Serbia,SRB,733.00,724.00,-3,UEFA,2006-08-16
28024,32,Serbia,SRB,759.00,733.00,-1,UEFA,2006-09-13
28220,32,Serbia,SRB,767.00,759.00,0,UEFA,2006-10-18
...,...,...,...,...,...,...,...,...
62964,25,Serbia,SRB,1547.38,1547.38,2,UEFA,2022-02-10
63226,25,Serbia,SRB,1547.53,1547.38,0,UEFA,2022-03-31
63337,25,Serbia,SRB,1549.53,1547.53,0,UEFA,2022-06-23
63650,25,Serbia,SRB,1549.53,1549.53,0,UEFA,2022-08-25


### Change Yugoslavia, Serbia and Montenegro to Serbia
### Change YUG, SCG to SRB

## Netherlands Antilles - Curacao

In [334]:
# Netherlands Antilles 
rankings_modif2.loc[rankings_modif2['country_abrv']=='ANT']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
29765,173,Netherlands Antilles,ANT,43.0,47.0,-1,CONCACAF,2007-06-13
29957,174,Netherlands Antilles,ANT,41.0,43.0,1,CONCACAF,2007-07-18
30137,177,Netherlands Antilles,ANT,41.0,41.0,3,CONCACAF,2007-08-22
30405,182,Netherlands Antilles,ANT,30.0,41.0,5,CONCACAF,2007-09-19
30520,183,Netherlands Antilles,ANT,30.0,30.0,1,CONCACAF,2007-10-24
30750,183,Netherlands Antilles,ANT,30.0,30.0,0,CONCACAF,2007-11-22
31026,183,Netherlands Antilles,ANT,30.0,30.0,0,CONCACAF,2007-12-17
31234,179,Netherlands Antilles,ANT,32.0,30.0,-4,CONCACAF,2008-01-16
31321,161,Netherlands Antilles,ANT,93.0,32.0,-18,CONCACAF,2008-02-13
31631,163,Netherlands Antilles,ANT,93.0,93.0,2,CONCACAF,2008-03-12


In [335]:
# Netherlands Antilles -> Curacao
rankings_modif2.loc[rankings_modif2['country_abrv']=='CUW']

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
38197,150,Curaçao,CUW,107.00,109.00,1,CONCACAF,2011-01-12
38340,150,Curaçao,CUW,107.00,107.00,0,CONCACAF,2011-02-02
38592,146,Curaçao,CUW,115.00,107.00,-4,CONCACAF,2011-03-09
38732,152,Curaçao,CUW,121.00,115.00,6,CONCACAF,2011-04-13
39003,152,Curaçao,CUW,121.00,121.00,0,CONCACAF,2011-05-18
...,...,...,...,...,...,...,...,...
62910,80,Curaçao,CUW,1298.39,1298.39,0,CONCACAF,2022-02-10
63280,79,Curaçao,CUW,1298.39,1298.39,-1,CONCACAF,2022-03-31
63331,84,Curaçao,CUW,1293.35,1298.39,5,CONCACAF,2022-06-23
63657,84,Curaçao,CUW,1293.35,1293.35,0,CONCACAF,2022-08-25


### Change Netherlands Antilles to Curaçao
### Change ANT to CUW

In [336]:
print(codes_in_rankings_not_in_fifa, countries_in_rankings_not_in_fifa)

['YUG', 'ANT', 'SCG'] ['Yugoslavia' 'Serbia and Montenegro' 'Netherlands Antilles']


In [337]:
list_of_codes_changed = ['SRB','CUW','SRB']
list_of_countries_changed = ['Serbia','Serbia','Curaçao']

In [338]:
rankings_modif2['country_full'] = rankings_modif2['country_full'].replace(countries_in_rankings_not_in_fifa, list_of_countries_changed)
rankings_modif2['country_abrv'] = rankings_modif2['country_abrv'].replace(codes_in_rankings_not_in_fifa, list_of_codes_changed)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rankings_modif2['country_full'] = rankings_modif2['country_full'].replace(countries_in_rankings_not_in_fifa, list_of_countries_changed)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rankings_modif2['country_abrv'] = rankings_modif2['country_abrv'].replace(codes_in_rankings_not_in_fifa, list_of_codes_changed)


In [339]:
# Test
# See what codes remain codes
test_abrv = list(set(rankings_modif2['country_abrv'].unique()) - set(country_codes['code'].unique()))
test_countries = rankings_modif2.loc[rankings_modif['country_abrv'].isin(test_abrv), 'country_full'].unique()

print(test_abrv, test_countries)

[] []


In [137]:
rankings_modif2.to_csv('./data/fifa_rankings.csv', index=False)

## Duplicates

### Check duplicate rows based on country and date of evaluation

In [340]:
duplicates = rankings_modif2[rankings_modif2.duplicated(subset=['country_full','rank_date'], keep=False)]
duplicates

Unnamed: 0,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
26434,47,Serbia,SRB,612.0,612.0,0,UEFA,2005-12-16
26460,47,Serbia,SRB,612.0,0.0,0,UEFA,2005-12-16
27613,36,Serbia,SRB,724.0,610.0,-8,UEFA,2006-07-12
27614,36,Serbia,SRB,724.0,0.0,0,UEFA,2006-07-12
30386,186,Montenegro,MNE,21.0,0.0,-13,UEFA,2007-09-19
30407,186,Montenegro,MNE,21.0,0.0,-13,UEFA,2007-09-19
30580,171,Montenegro,MNE,65.0,21.0,-15,UEFA,2007-10-24
30660,171,Montenegro,MNE,65.0,21.0,-15,UEFA,2007-10-24
30783,172,Montenegro,MNE,65.0,65.0,1,UEFA,2007-11-22
30784,172,Montenegro,MNE,65.0,65.0,1,UEFA,2007-11-22


## Standardizing the country_full names to the team from country_codes table