# Data Collection

In [1]:
import pandas as pd

---

### Results DF

In [2]:
results = pd.read_csv('./data/results.csv')
results.head()
# data from kaggle:  https://www.kaggle.com/martj42/international-football-results-from-1872-to-2017?select=results.csv

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
0,1872-11-30,Scotland,England,0.0,0.0,Friendly,Glasgow,Scotland,False
1,1873-03-08,England,Scotland,4.0,2.0,Friendly,London,England,False
2,1874-03-07,Scotland,England,2.0,1.0,Friendly,Glasgow,Scotland,False
3,1875-03-06,England,Scotland,2.0,2.0,Friendly,London,England,False
4,1876-03-04,Scotland,England,3.0,0.0,Friendly,Glasgow,Scotland,False


In [3]:
results.shape

(43045, 9)

In [4]:
results[results['date'] < '1992-12-31'].shape  # <- 1st date of FIFA rankings

(17489, 9)

In [5]:
results.drop(results[results['date'] < '1992-12-31'].index, inplace=True)

In [6]:
results.shape

(25556, 9)

In [7]:
results.reset_index(inplace=True)

In [8]:
results.drop(columns='index', inplace=True)
results.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
0,1993-01-01,Ghana,Mali,1.0,1.0,Friendly,Libreville,Gabon,True
1,1993-01-02,Gabon,Burkina Faso,1.0,1.0,Friendly,Libreville,Gabon,False
2,1993-01-02,Kuwait,Lebanon,2.0,0.0,Friendly,Kuwait City,Kuwait,False
3,1993-01-03,Burkina Faso,Mali,1.0,0.0,Friendly,Libreville,Gabon,True
4,1993-01-03,Gabon,Ghana,2.0,3.0,Friendly,Libreville,Gabon,False


---

### Upcoming Fixtures DF

In [9]:
qualifying_fixtures = pd.read_csv('./data/concacaf_wcq_fixtures.csv')
qualifying_fixtures.tail()
# data from FIFA site:  https://www.fifa.com/tournaments/mens/worldcup/qatar2022/qualifiers/concacaf

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
54,2022-03-29,Panama,Canada,,,FIFA World Cup qualification,,,
55,2022-03-29,Costa Rica,United States,,,FIFA World Cup qualification,,,
56,,,,,,,,,
57,,,,,,,,,
58,,,,,,,,,


In [10]:
qualifying_fixtures[qualifying_fixtures['date'].isnull()]

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral
56,,,,,,,,,
57,,,,,,,,,
58,,,,,,,,,


In [11]:
qualifying_fixtures.drop(index=qualifying_fixtures[qualifying_fixtures['date'].isnull()].index, inplace=True)

---

### Current Table DF

In [12]:
table = pd.read_csv('./data/current_table_12.14.2021.csv')
table
# data from FIFA site:  https://www.fifa.com/tournaments/mens/worldcup/qatar2022/qualifiers/concacaf

Unnamed: 0,rank,team_name,matches_played,wins,draws,losses,goals_for,goals_against,goal_differential,points
0,1,Canada,8,4,4,0,13,5,8,16
1,2,USA,8,4,3,1,12,5,7,15
2,3,Mexico,8,4,2,2,11,7,4,14
3,4,Panama,8,4,2,2,11,9,2,14
4,5,Costa Rica,8,2,3,3,6,7,-1,9
5,6,Jamaica,8,1,4,3,6,10,-4,7
6,7,El Salvador,8,1,3,4,4,10,-6,6
7,8,Honduras,8,0,3,5,5,15,-10,3


---

### World Rankings DF

In [13]:
past_rankings = pd.read_csv('./data/fifa_ranking-2021-05-27.csv')
past_rankings.head()
# data from kaggle:  https://www.kaggle.com/cashncarry/fifaworldranking

Unnamed: 0,id,rank,country_full,country_abrv,total_points,previous_points,rank_change,confederation,rank_date
0,43818,82,Iraq,IRQ,15,0,0,AFC,12/31/1992
1,43873,107,Mozambique,MOZ,9,0,0,CAF,12/31/1992
2,43816,108,Indonesia,IDN,9,0,0,AFC,12/31/1992
3,1882218,109,Antigua and Barbuda,ATG,8,0,0,CONCACAF,12/31/1992
4,43820,110,Jordan,JOR,8,0,0,AFC,12/31/1992


In [14]:
past_rankings = past_rankings[['rank', 'country_full', 'rank_date']]
past_rankings.head()

Unnamed: 0,rank,country_full,rank_date
0,82,Iraq,12/31/1992
1,107,Mozambique,12/31/1992
2,108,Indonesia,12/31/1992
3,109,Antigua and Barbuda,12/31/1992
4,110,Jordan,12/31/1992


In [15]:
past_rankings.shape

(63055, 3)

In [16]:
recent_rankings = pd.read_csv('./data/fifa_ranking_2021-11-19.csv')
recent_rankings.head()
# data from FIFA:  https://www.fifa.com/fifa-world-ranking/

Unnamed: 0,rank,country_full,rank_date
0,1.0,Belgium,2021-08-12
1,2.0,Brazil,2021-08-12
2,3.0,France,2021-08-12
3,4.0,England,2021-08-12
4,5.0,Italy,2021-08-12


In [17]:
recent_rankings.shape

(1678, 3)

In [18]:
rankings = pd.concat([past_rankings, recent_rankings])
rankings.shape

(64733, 3)

---

### Combining and Cleaning the Data

In [19]:
rankings.dropna(inplace=True)

In [20]:
results['home_rank'] = 0

In [21]:
results['away_rank'] = 0

In [22]:
results['datetime'] = pd.to_datetime(results['date'])
rankings['datetime'] = pd.to_datetime(rankings['rank_date'])

In [23]:
results.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,home_rank,away_rank,datetime
0,1993-01-01,Ghana,Mali,1.0,1.0,Friendly,Libreville,Gabon,True,0,0,1993-01-01
1,1993-01-02,Gabon,Burkina Faso,1.0,1.0,Friendly,Libreville,Gabon,False,0,0,1993-01-02
2,1993-01-02,Kuwait,Lebanon,2.0,0.0,Friendly,Kuwait City,Kuwait,False,0,0,1993-01-02
3,1993-01-03,Burkina Faso,Mali,1.0,0.0,Friendly,Libreville,Gabon,True,0,0,1993-01-03
4,1993-01-03,Gabon,Ghana,2.0,3.0,Friendly,Libreville,Gabon,False,0,0,1993-01-03


In [24]:
results.shape

(25556, 12)

In [577]:
results.drop(results[(results['home_team'] == 'United Arab Emirates') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 8 rows of games for UAE because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Netherlands') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Netherlands because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[results['home_team'] == 'Martinique'].index, inplace=True)
# Martinique is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Guyana') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Guyana because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Cayman Islands') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 1 row of games for Cayman Islands because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[results['home_team'] == 'French Guiana'].index, inplace=True)
# French Guiana is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Slovakia') & (results['datetime'] < '1993-11-19 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Slovakia because they do not have a rank prior to 1993-11-19 in the dataframe

results.drop(results[(results['home_team'] == 'Thailand') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 5 rows of games for Thailand because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Anguilla') & (results['datetime'] < '1997-05-14 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Angiulla because they do not have a rank prior to 1997-05-14 in the dataframe

results.drop(results[(results['home_team'] == 'Sri Lanka') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Sri Lanka because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Vietnam') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Vietnam because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[results['home_team'] == 'Guadeloupe'].index, inplace=True)
# Guadeloupe is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Latvia') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 5 rows of games for Latvia because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Dominica') & (results['datetime'] < '1995-02-20 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Dominica because they do not have a rank prior to 1995-02-20 in the dataframe

results.drop(results[(results['home_team'] == 'Czech Republic') & (results['datetime'] < '1994-03-15 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Czech Republic because they do not have a rank prior to 1994-03-15 in the dataframe

results.drop(results[(results['home_team'] == 'Macau') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Macau because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[results['home_team'] == 'Guernsey'].index, inplace=True)
# Guernsey is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Azerbaijan') & (results['datetime'] < '1994-06-14 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Azerbaijan because they do not have a rank prior to 1994-06-14 in the dataframe

results.drop(results[(results['home_team'] == 'Turkmenistan') & (results['datetime'] < '1994-10-25 00:00:00')].index, inplace=True)
# dropping 1 row of games for Turkmenistan because they do not have a rank prior to 1994-10-25 in the dataframe

results.drop(results[(results['home_team'] == 'Bahrain') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Bahrain because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Algeria') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Algeria because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[(results['home_team'] == 'Gibraltar') & (results['datetime'] < '2016-09-15 00:00:00')].index, inplace=True)
# dropping 42 rows of games for Gibraltar because they weren't included in FIFA rankings until 2016-09-15

results.drop(results[results['home_team'] == 'Ynys Môn'].index, inplace=True)
# Ynys Môn is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Isle of Wight'].index, inplace=True)
# Isle of Wight is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Åland Islands'].index, inplace=True)
# Åland Islands is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Jersey'].index, inplace=True)
# Jersey is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Isle of Man'].index, inplace=True)
# Isle of Man is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Greenland'].index, inplace=True)
# Greenland is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Shetland'].index, inplace=True)
# Shetland is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'] == 'Basque Country'].index, inplace=True)
# Basque Country is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Georgia') & (results['datetime'] < '1994-03-15 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Georgia because they do not have a rank prior to 1994-03-15 in the dataframe

results.drop(results[results['home_team'] == 'Sint Maarten'].index, inplace=True)
# Sint Maarten is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'North Macedonia') & (results['datetime'] < '1994-05-17 00:00:00')].index, inplace=True)
# dropping 4 rows of games for North Macedonia because they do not have a rank prior to 1994-05-17 in the dataframe

results.drop(results[(results['home_team'] == 'Kazakhstan') & (results['datetime'] < '1994-11-22 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Kazakhstan because they do not have a rank prior to 1994-11-22 in the dataframe

results.drop(results[(results['home_team'] == 'Uzbekistan') & (results['datetime'] < '1994-10-25 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Uzbekistan because they do not have a rank prior to 1994-10-25 in the dataframe

results.drop(results[(results['home_team'] == 'Tajikistan') & (results['datetime'] < '1994-11-22 00:00:00')].index, inplace=True)
# dropping 1 row of games for Tajikistan because they do not have a rank prior to 1994-11-22 in the dataframe

results.drop(results[(results['home_team'] == 'Papua New Guinea') & (results['datetime'] < '1996-09-25 00:00:00')].index, inplace=True)
# dropping 5 rows of games for Papua New Guinea because they do not have a rank prior to 1996-09-25 in the dataframe

results.drop(results[(results['home_team'] == 'Samoa') & (results['datetime'] < '1996-11-20 00:00:00')].index, inplace=True)
# dropping 4 rows of games for Samoa because they do not have a rank prior to 1996-11-20 in the dataframe

results.drop(results[(results['home_team'] == 'Tonga') & (results['datetime'] < '1996-11-20 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Tonga because they do not have a rank prior to 1996-11-20 in the dataframe

results.drop(results[(results['home_team'] == 'Montserrat') & (results['datetime'] < '1999-03-24 00:00:00')].index, inplace=True)
# dropping 1 row of games for Montserrat because they do not have a rank prior to 1999-03-24 in the dataframe

results.drop(results[(results['home_team'] == 'Montenegro') & (results['datetime'] < '2007-06-13 00:00:00')].index, inplace=True)
# dropping 1 row of games for Montenegro because they do not have a rank prior to 2007-06-13 in the dataframe

results.drop(results[(results['home_team'] == 'Serbia and Montenegro') & (results['datetime'] < '2002-12-18 00:00:00')].index, inplace=True)
# dropping 32 rows of games for Serbia and Montenegro because they weren't included in FIFA rankings until 2002-12-18

results.drop(results[(results['home_team'] == 'Curaçao') & (results['datetime'] < '2007-06-13 00:00:00')].index, inplace=True)
# dropping 19 rows of games for Curaçao because they weren't included in FIFA rankings until 2007-06-13

results.drop(results[(results['home_team'] == 'Cook Islands') & (results['datetime'] < '1996-11-20 00:00:00')].index, inplace=True)
# dropping 3 rows of games for Cook Islands because they do not have a rank prior to 1996-11-20 in the dataframe

results.drop(results[(results['home_team'] == 'Guam') & (results['datetime'] < '1996-08-28 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Guam because they do not have a rank prior to 1996-08-28 in the dataframe

results.drop(results[(results['home_team'] == 'New Caledonia') & (results['datetime'] < '2004-06-09 00:00:00')].index, inplace=True)
# dropping 12 rows of games for New Caledonia because they weren't included in FIFA rankings until 2004-06-09

results.drop(results[(results['home_team'] == 'Dominican Republic') & (results['datetime'] < '1993-08-08 00:00:00')].index, inplace=True)
# dropping 1 row of games for Dominican Republic because they do not have a rank prior to 1993-08-08 in the dataframe

results.drop(results[results['home_team'].str.contains('Zanzibar')].index, inplace=True)
# Zanzibar is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Canary Islands')].index, inplace=True)
# Canary Islands is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Bosnia and Herzegovina') & (results['datetime'] < '1996-05-22 00:00:00')].index, inplace=True)
# dropping 1 row of games for Bosnia and Herzegovina because they do not have a rank prior to 1996-05-22 in the dataframe

results.drop(results[(results['home_team'] == 'Andorra') & (results['datetime'] < '1996-11-20 00:00:00')].index, inplace=True)
# dropping 1 row of games for Andorra because they do not have a rank prior to 1996-11-20 in the dataframe

results.drop(results[(results['home_team'] == 'British Virgin Islands') & (results['datetime'] < '1997-05-14 00:00:00')].index, inplace=True)
# dropping 1 row of games for British Virgin Islands because they do not have a rank prior to 1997-05-14 in the dataframe

results.drop(results[(results['home_team'] == 'Palestine') & (results['datetime'] < '1998-12-23 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Palestine because they do not have a rank prior to 1998-12-23 in the dataframe

results.drop(results[results['home_team'].str.contains('Frøya')].index, inplace=True)
# Frøya is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Hitra')].index, inplace=True)
# Hitra is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Catalonia')].index, inplace=True)
# Catalonia is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'US Virgin Islands') & (results['datetime'] < '1999-03-24 00:00:00')].index, inplace=True)
# dropping 2 rows of games for US Virgin Islands because they do not have a rank prior to 1999-03-24 in the dataframe

results.drop(results[results['home_team'].str.contains('Corsica')].index, inplace=True)
# Corsica is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Brittany')].index, inplace=True)
# Brittany is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Palau')].index, inplace=True)
# Palau is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'São Tomé and Príncipe') & (results['datetime'] < '1998-08-19 00:00:00')].index, inplace=True)
# dropping 1 row of games for São Tomé and Príncipe because they do not have a rank prior to 1998-08-19 in the dataframe

results.drop(results[results['home_team'].str.contains('Gotland')].index, inplace=True)
# Gotland is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Saare County')].index, inplace=True)
# Saare County is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Rhodes')].index, inplace=True)
# Rhodes is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Micronesia')].index, inplace=True)
# Micronesia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Andalusia')].index, inplace=True)
# Andalusia is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Bhutan') & (results['datetime'] < '2000-08-09 00:00:00')].index, inplace=True)
# dropping 1 row of games for Bhutan because they do not have a rank prior to 2000-08-09 in the dataframe

results.drop(results[results['home_team'].str.contains('Kernow')].index, inplace=True)
# Kernow is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Saint Martin')].index, inplace=True)
# Saint Martin is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Orkney')].index, inplace=True)
# Orkney is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Monaco')].index, inplace=True)
# Monaco is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Afghanistan') & (results['datetime'] < '2003-01-15 00:00:00')].index, inplace=True)
# dropping 2 rows of games for Afghanistan because they do not have a rank prior to 2003-01-15 in the dataframe

results.drop(results[results['home_team'].str.contains('Tuvalu')].index, inplace=True)
# Tuvalu is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Sark')].index, inplace=True)
# Sark is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Alderney')].index, inplace=True)
# Alderney is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Kiribati')].index, inplace=True)
# Kiribati is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Mayotte')].index, inplace=True)
# Mayotte is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Western Isles')].index, inplace=True)
# Western Isles is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Falkland Islands')].index, inplace=True)
# Falkland Islands is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Réunion')].index, inplace=True)
# Réunion is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Galicia')].index, inplace=True)
# Galicia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Northern Cyprus')].index, inplace=True)
# Northern Cyprus is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'Kosovo') & (results['datetime'] < '2016-07-14 00:00:00')].index, inplace=True)
# dropping 7 rows of games for Kosovo because they do not have a rank prior to 2016-07-14 in the dataframe

results.drop(results[results['home_team'].str.contains('Republic of St. Pauli')].index, inplace=True)
# Republic of St. Pauli is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Găgăuzia')].index, inplace=True)
# Găgăuzia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Tibet')].index, inplace=True)
# Tibet is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Occitania')].index, inplace=True)
# Occitania is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Sápmi')].index, inplace=True)
# Sápmi is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Silesia')].index, inplace=True)
# Silesia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Northern Mariana Islands')].index, inplace=True)
# Northern Mariana Islands is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Menorca')].index, inplace=True)
# Menorca is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Provence')].index, inplace=True)
# Provence is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Arameans Suryoye')].index, inplace=True)
# Arameans Suryoye is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Padania')].index, inplace=True)
# Padania is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Iraqi Kurdistan')].index, inplace=True)
# Iraqi Kurdistan is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Gozo')].index, inplace=True)
# Gozo is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Bonaire')].index, inplace=True)
# Bonaire is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Western Sahara')].index, inplace=True)
# Western Sahara is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Raetia')].index, inplace=True)
# Raetia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Darfur')].index, inplace=True)
# Darfur is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Tamil Eelam')].index, inplace=True)
# Tamil Eelam is not a member of FIFA and therefore will have no rankings

results.drop(results[(results['home_team'] == 'South Sudan') & (results['datetime'] < '2012-08-08 00:00:00')].index, inplace=True)
# dropping 1 row of games for South Sudan because they do not have a rank prior to 2012-08-08 in the dataframe

results.drop(results[results['home_team'].str.contains('Abkhazia')].index, inplace=True)
# Abkhazia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Saint Pierre and Miquelon')].index, inplace=True)
# Saint Pierre and Miquelon is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Artsakh')].index, inplace=True)
# Artsakh is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Madrid')].index, inplace=True)
# Madrid is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Vatican City')].index, inplace=True)
# Vatican City is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Ellan Vannin')].index, inplace=True)
# Ellan Vannin is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('South Ossetia')].index, inplace=True)
# South Ossetia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('County of Nice')].index, inplace=True)
# County of Nice is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Székely Land')].index, inplace=True)
# Székely Land is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Romani people')].index, inplace=True)
# Romani people is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Felvidék')].index, inplace=True)
# Felvidék is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Chagos Islands')].index, inplace=True)
# Chagos Islands is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('United Koreans in Japan')].index, inplace=True)
# United Koreans in Japan is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Somaliland')].index, inplace=True)
# Somaliland is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Western Armenia')].index, inplace=True)
# Western Armenia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Barawa')].index, inplace=True)
# Barawa is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Kárpátalja')].index, inplace=True)
# Kárpátalja is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Yorkshire')].index, inplace=True)
# Yorkshire is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Panjab')].index, inplace=True)
# Panjab is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Cascadia')].index, inplace=True)
# Cascadia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Matabeleland')].index, inplace=True)
# Matabeleland is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Kabylia')].index, inplace=True)
# Kabylia is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Parishes of Jersey')].index, inplace=True)
# Parishes of Jersey is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Saint Helena')].index, inplace=True)
# Saint Helena is not a member of FIFA and therefore will have no rankings

results.drop(results[results['home_team'].str.contains('Chameria')].index, inplace=True)
# Chameria is not a member of FIFA and therefore will have no rankings

In [578]:
# Ensuring country name matches in rankings and results dataframes

rankings.loc[rankings[rankings['country_full'].str.contains('Congo')].index, 'country_full'] = 'Congo'
results.loc[results[results['home_team'].str.contains('Congo')].index, 'home_team'] = 'Congo'

rankings.loc[rankings[rankings['country_full'].str.contains('Swaziland')].index, 'country_full'] = 'Eswatini'
rankings.loc[rankings[rankings['country_full'].str.contains('Eswatini')].index, 'country_full'] = 'Eswatini'
results.loc[results[results['home_team'].str.contains('Eswatini')].index, 'home_team'] = 'Eswatini'

rankings.loc[rankings[rankings['country_full'].str.contains('Korea DPR')].index, 'country_full'] = 'North Korea'
results.loc[results[results['home_team'].str.contains('North Korea')].index, 'home_team'] = 'North Korea'

rankings.loc[rankings[rankings['country_full'].str.contains('USA')].index, 'country_full'] = 'United States'
results.loc[results[results['home_team'].str.contains('USA')].index, 'home_team'] = 'United States'

rankings.loc[rankings[rankings['country_full'].str.contains('Ivoire')].index, 'country_full'] = 'Ivory Coast'
results.loc[results[results['home_team'].str.contains('Ivoire')].index, 'home_team'] = 'Ivory Coast'

rankings.loc[rankings[rankings['country_full'].str.contains('Kitts and Nevis')].index, 'country_full'] = 'St. Kitts and Nevis'
results.loc[results[results['home_team'].str.contains('Kitts and Nevis')].index, 'home_team'] = 'St. Kitts and Nevis'

rankings.loc[rankings[rankings['country_full'].str.contains('Korea Republic')].index, 'country_full'] = 'South Korea'
results.loc[results[results['home_team'].str.contains('Korea Republic')].index, 'home_team'] = 'South Korea'

rankings.loc[rankings[rankings['country_full'].str.contains('St. Vin')].index, 'country_full'] = 'St. Vincent and the Grenadines'
results.loc[results[results['home_team'].str.contains('Vincent')].index, 'home_team'] = 'St. Vincent and the Grenadines'

rankings.loc[rankings[rankings['country_full'].str.contains('Iran')].index, 'country_full'] = 'Iran'
results.loc[results[results['home_team'].str.contains('Iran')].index, 'home_team'] = 'Iran'

rankings.loc[rankings[rankings['country_full'].str.contains('Kyrgyz')].index, 'country_full'] = 'Kyrgyzstan'
results.loc[results[results['home_team'].str.contains('Kyrgyz')].index, 'home_team'] = 'Kyrgyzstan'

results.loc[results[(results['home_team'] == 'Serbia') & (results['datetime'] < '2006-07-12 00:00:00')].index, 'home_team'] = 'Serbia and Montenegro'

rankings.loc[rankings[rankings['country_full'].str.contains('Lucia')].index, 'country_full'] = 'St. Lucia'
results.loc[results[results['home_team'].str.contains('Lucia')].index, 'home_team'] = 'St. Lucia'

rankings.loc[rankings[rankings['country_full'].str.contains('Netherlands Antilles')].index, 'country_full'] = 'Curaçao'

rankings.loc[rankings[rankings['country_full'].str.contains('Verde')].index, 'country_full'] = 'Cape Verde'

rankings.loc[rankings[rankings['country_full'].str.contains('Brunei')].index, 'country_full'] = 'Brunei'

results.loc[results[results['home_team'].str.contains('United States Virgin Islands')].index, 'home_team'] = 'US Virgin Islands'

rankings.loc[rankings[rankings['country_full'].str.contains('Timor-Leste')].index, 'country_full'] = 'East Timor'

results.loc[results[results['home_team'].str.contains('Taiwan')].index, 'home_team'] = 'Chinese Taipei'

results.loc[results[results['home_team'].str.contains('Timor-Leste')].index, 'home_team'] = 'East Timor'

In [579]:
results_dict = results.to_dict('records')
results_dict[0]

{'date': '1993-01-01',
 'home_team': 'Ghana',
 'away_team': 'Mali',
 'home_score': 1.0,
 'away_score': 1.0,
 'tournament': 'Friendly',
 'city': 'Libreville',
 'country': 'Gabon',
 'neutral': True,
 'home_rank': 39.0,
 'away_rank': 0,
 'datetime': Timestamp('1993-01-01 00:00:00')}

In [580]:
rankings_dict = rankings.to_dict('records')
rankings_dict[0:2]

[{'rank': 82.0,
  'country_full': 'Iraq',
  'rank_date': '12/31/1992',
  'datetime': Timestamp('1992-12-31 00:00:00')},
 {'rank': 107.0,
  'country_full': 'Mozambique',
  'rank_date': '12/31/1992',
  'datetime': Timestamp('1992-12-31 00:00:00')}]

In [581]:
def home_rank(row):
    try:
        hometeam = row['home_team']
        matchday = row['datetime']

        hometeam_rankings = []
        for i in rankings_dict:
            if i['country_full'] == hometeam:
                hometeam_rankings.append(i)

        previous_rankings = []
        for i in hometeam_rankings:
            if i['datetime'] <= matchday:
                previous_rankings.append(i)
        # print(hometeam), print(matchday)

        return previous_rankings[-1]['rank']

    except:
        print(hometeam), print(matchday)
        return -1

In [582]:
results['home_rank'] = results.apply(home_rank, axis=1)

In [583]:
results[results['home_rank'] == -1].shape

(0, 12)

In [569]:
rankings[rankings['country_full'].str.contains('Helena')]

Unnamed: 0,rank,country_full,rank_date,datetime


In [526]:
results[(results['home_team'] == 'South Sudan') & (results['datetime'] < '2012-08-08 00:00:00')]

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,home_rank,away_rank,datetime
16891,2012-07-10,South Sudan,Uganda,2.0,2.0,Friendly,Juba,South Sudan,False,-1.0,0,2012-07-10


In [476]:
results[results['home_team'].str.contains('Taiwan')]

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,home_rank,away_rank,datetime
12714,2008-04-04,Taiwan,Guam,4.0,1.0,AFC Challenge Cup qualification,Taipei,Taiwan,False,0,0,2008-04-04
12717,2008-04-06,Taiwan,Sri Lanka,2.0,2.0,AFC Challenge Cup qualification,Taipei,Taiwan,False,0,0,2008-04-06
20987,2016-10-08,Taiwan,East Timor,2.0,1.0,AFC Asian Cup qualification,Kaohsiung,Taiwan,False,0,0,2016-10-08
21898,2017-10-05,Taiwan,Mongolia,4.0,2.0,Friendly,Taipei,Taiwan,False,0,0,2017-10-05
21986,2017-10-10,Taiwan,Bahrain,2.0,1.0,AFC Asian Cup qualification,Taipei,Taiwan,False,0,0,2017-10-10
22105,2017-12-03,Taiwan,Philippines,3.0,0.0,Friendly,Taipei,Taiwan,False,0,0,2017-12-03
22108,2017-12-04,Taiwan,East Timor,3.0,1.0,Friendly,Taipei,Taiwan,False,0,0,2017-12-04
22113,2017-12-05,Taiwan,Laos,2.0,0.0,Friendly,Taipei,Taiwan,False,0,0,2017-12-05
22338,2018-03-27,Taiwan,Singapore,1.0,0.0,AFC Asian Cup qualification,Taipei,Taiwan,False,0,0,2018-03-27
22467,2018-06-05,Taiwan,New Zealand,0.0,1.0,Intercontinental Cup,Mumbai,India,True,0,0,2018-06-05
