In [1]:
# !pip install thefuzz

In [2]:
import pandas as pd
import sys
import numpy as np
import re
from thefuzz import process

In [3]:
bundesliga = pd.read_csv('1-bundesliga.csv')
eredivisie = pd.read_csv('eredivisie.csv')
liganos = pd.read_csv('liga-nos.csv')
ligue1 = pd.read_csv('ligue-1.csv')
premier = pd.read_csv('premier-league.csv')
premierliga = pd.read_csv('premier-liga.csv')
liga = pd.read_csv('primera-division.csv')
seriea = pd.read_csv('serie-a.csv')

In [4]:
df = pd.concat([bundesliga, eredivisie, liganos, ligue1, premier, premierliga, liga, seriea], ignore_index = True)
df.head()

Unnamed: 0,club_name,player_name,age,position,club_involved_name,fee,transfer_movement,transfer_period,fee_cleaned,league_name,year,season,country
0,1.FC Saarbrücken,Thomas Stratos,25.0,Defensive Midfield,Hamburger SV,€150Th.,in,Summer,0.15,1 Bundesliga,1992,1992/1993,Germany
1,1.FC Saarbrücken,Matthias Lust,22.0,Left Midfield,Waldh. Mannheim,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
2,1.FC Saarbrücken,Henning Bürger,22.0,Left Midfield,FC Schalke 04,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
3,1.FC Saarbrücken,Michael Hennig,20.0,midfield,FC Berlin,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
4,1.FC Saarbrücken,Malik Badji,24.0,defence,CS Sedan,?,in,Summer,,1 Bundesliga,1992,1992/1993,Germany


In [5]:
len(df)

153171

In [6]:
# Remove duplicated rows
df = df.drop_duplicates()
len(df)

153146

In [7]:
# CLEANING

# Remove 'Career break', 'Unknown', 'Ban', 'Retired' and 'Without Club'
df = df[df['club_involved_name'] != 'Career break']
df = df[df['club_involved_name'] != 'Unknown']
df = df[df['club_involved_name'] != 'Ban']
df = df[df['club_involved_name'] != 'Retired']
df = df[df['club_involved_name'] != 'Without Club']

# Remove df[df['fee'] == '0'] - Only one observation
df = df[df['fee'] != '0']

# Remove transfers involving seocond teams/youth sectors
remove = ['II', 'U19', 'U20', ' B', ' C', '-D', 'Youth', 'U21', 'U17', 'U23', 'U18', 
          'Sub-23', ' 2', 'Res.', 'Juve Next Gen', 'Arsenal-2 Tula', 'FShM Torpedo 19', 
          'US Palermo Yout', 'Cremonese Giov.', 'Amkar-Junior', 'Blackburn Acad.', 'Reggina Primaver', 
          'Real Oviedo You', 'Tom-2 Tomsk', 'DYuSSh Spartak', 'CA Osasuna Prom', 'Y19', 
          'Yth.', 'ACR Messina You', 'Akademia KSS', 'Valladolid Prom', 'Southampton Aca', 
          'Siena Junior', 'Leeds Reserves', 'Anzhi-Yunior', 'Barça Atlètic', 'Akademia Ufa']
remove_escaped = [re.escape(x) for x in remove]
pattern = r'\b(?:' + '|'.join(remove_escaped) + r')(?!\w)'
df = df[~df['club_involved_name'].str.contains(pattern, regex = True)]
        
len(df)

134279

In [8]:
# Add 'id' column
df['id'] = range(0, len(df))

In [9]:
#Reorder columns
new_order = ['id', 'club_name', 'player_name', 'age', 'position', 
             'club_involved_name', 'fee', 'transfer_movement', 
             'transfer_period', 'fee_cleaned', 'league_name', 
             'year', 'season', 'country']

df = df[new_order]

In [10]:
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,id,club_name,player_name,age,position,club_involved_name,fee,transfer_movement,transfer_period,fee_cleaned,league_name,year,season,country
0,0,1.FC Saarbrücken,Thomas Stratos,25.0,Defensive Midfield,Hamburger SV,€150Th.,in,Summer,0.15,1 Bundesliga,1992,1992/1993,Germany
1,1,1.FC Saarbrücken,Matthias Lust,22.0,Left Midfield,Waldh. Mannheim,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
2,2,1.FC Saarbrücken,Henning Bürger,22.0,Left Midfield,FC Schalke 04,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
3,3,1.FC Saarbrücken,Michael Hennig,20.0,midfield,FC Berlin,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany
4,4,1.FC Saarbrücken,Malik Badji,24.0,defence,CS Sedan,?,in,Summer,,1 Bundesliga,1992,1992/1993,Germany


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 134279 entries, 0 to 134278
Data columns (total 14 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   id                  134279 non-null  int64  
 1   club_name           134279 non-null  object 
 2   player_name         134279 non-null  object 
 3   age                 134245 non-null  float64
 4   position            134279 non-null  object 
 5   club_involved_name  134279 non-null  object 
 6   fee                 134184 non-null  object 
 7   transfer_movement   134279 non-null  object 
 8   transfer_period     134279 non-null  object 
 9   fee_cleaned         57480 non-null   float64
 10  league_name         134279 non-null  object 
 11  year                134279 non-null  int64  
 12  season              134279 non-null  object 
 13  country             134279 non-null  object 
dtypes: float64(2), int64(2), object(10)
memory usage: 14.3+ MB


In [12]:
def match_name(name, standard_names):
    return process.extractOne(name, standard_names)

df['transfer_id'] = None

datain = df[df['transfer_movement'] == 'in']
dataout = df[df['transfer_movement'] == 'out']

k = 0

CLEAR_LINE = '\033[K'

for i, transfer_in in datain.iterrows():
    index_in = transfer_in['id']
    if transfer_in['transfer_id'] is not None:
        continue
    search = dataout[dataout['season'] == transfer_in['season']]
    search = search[search['transfer_period'] == transfer_in['transfer_period']]
    search = search[search['player_name'] == transfer_in['player_name']]
    search = search[search['age'] == transfer_in['age']]
    search = search[search['position'] == transfer_in['position']]
    search = search[search['fee'] == transfer_in['fee']]
    search = search[search['transfer_id'].isnull()]
    if len(search) > 1:
        teams = list(search['club_involved_name'])
        target = match_name(transfer_in['club_name'], teams)[0]
        search = search[search['club_involved_name'] == target]
    for j, transfer_out in search.iterrows():
        index_out = transfer_out['id']
        datain.loc[datain['id'] == index_in, 'transfer_id'] = k
        dataout.loc[dataout['id'] == index_out, 'transfer_id'] = k
        k += 1

    message = f"Iteration {i}, {np.round(i*100/len(df), 2)}%"
    sys.stdout.write('\r' + CLEAR_LINE + message)
    sys.stdout.flush()

[KIteration 134273, 100.0%

In [13]:
in_notnull = datain[datain['transfer_id'].notnull()].copy()
in_notnull['country2'] = None
out_notnull = dataout[dataout['transfer_id'].notnull()].copy()

for index, row_in in in_notnull.iterrows():
    id = row_in['transfer_id']
    row_out = out_notnull[out_notnull['transfer_id'] == id]
    team_out = row_out['club_name'].values[0]
    contryout = row_out['country'].values[0]
    in_notnull.loc[index, 'club_involved_name'] = team_out
    in_notnull.loc[index, 'country2'] = contryout
    
in_notnull.head() 

Unnamed: 0,id,club_name,player_name,age,position,club_involved_name,fee,transfer_movement,transfer_period,fee_cleaned,league_name,year,season,country,transfer_id,country2
0,0,1.FC Saarbrücken,Thomas Stratos,25.0,Defensive Midfield,Hamburger SV,€150Th.,in,Summer,0.15,1 Bundesliga,1992,1992/1993,Germany,0,Germany
2,2,1.FC Saarbrücken,Henning Bürger,22.0,Left Midfield,FC Schalke 04,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany,1,Germany
8,8,1.FC Saarbrücken,Arno Glesius,26.0,Centre-Forward,Karlsruher SC,?,in,Summer,,1 Bundesliga,1992,1992/1993,Germany,2,Germany
9,9,1.FC Saarbrücken,Wolfram Wuttke,30.0,Second Striker,RCD Espanyol Barcelona,?,in,Summer,,1 Bundesliga,1992,1992/1993,Germany,3,Spain
23,23,SG Wattenscheid 09,Marek Lesniak,28.0,Centre-Forward,Bayer 04 Leverkusen,free transfer,in,Summer,0.0,1 Bundesliga,1992,1992/1993,Germany,4,Germany


In [14]:
network_data = in_notnull[['club_name', 'country', 'club_involved_name', 'country2', 'fee', 'fee_cleaned', 'season', 'transfer_period']].copy()
network_data.rename(columns = {'club_name': 'target', 'country': 'target_country', 'club_involved_name': 'source', 'country2': 'source_country'}, inplace = True)
network_data.reset_index(drop = True, inplace = True)
network_data.head()

Unnamed: 0,target,target_country,source,source_country,fee,fee_cleaned,season,transfer_period
0,1.FC Saarbrücken,Germany,Hamburger SV,Germany,€150Th.,0.15,1992/1993,Summer
1,1.FC Saarbrücken,Germany,FC Schalke 04,Germany,free transfer,0.0,1992/1993,Summer
2,1.FC Saarbrücken,Germany,Karlsruher SC,Germany,?,,1992/1993,Summer
3,1.FC Saarbrücken,Germany,RCD Espanyol Barcelona,Spain,?,,1992/1993,Summer
4,SG Wattenscheid 09,Germany,Bayer 04 Leverkusen,Germany,free transfer,0.0,1992/1993,Summer


In [15]:
len(network_data)

25721

In [16]:
network_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25721 entries, 0 to 25720
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   target           25721 non-null  object 
 1   target_country   25721 non-null  object 
 2   source           25721 non-null  object 
 3   source_country   25721 non-null  object 
 4   fee              25721 non-null  object 
 5   fee_cleaned      13147 non-null  float64
 6   season           25721 non-null  object 
 7   transfer_period  25721 non-null  object 
dtypes: float64(1), object(7)
memory usage: 1.6+ MB


# Team name cleaning


In [17]:
len(network_data['source'].unique()), len(network_data['target'].unique())

(383, 395)

## Serie A

In [18]:
#check for different names for the same club
italian_source_teams = network_data[network_data['source_country'] == 'Italy']['source'].unique()
italian_target_teams = network_data[network_data['target_country'] == 'Italy']['target'].unique()

In [19]:
sorted_italian_source_teams = sorted(italian_source_teams)
print("Italian Source Teams in Alphabetical Order:")
for team in sorted_italian_source_teams:
    print(team)

sorted_italian_target_teams = sorted(italian_target_teams)
print("\nItalian Target Teams in Alphabetical Order:")
for team in sorted_italian_target_teams:
    print(team)

Italian Source Teams in Alphabetical Order:
AC Cesena
AC Fiorentina
AC Milan
AC Monza
AC Parma
AC Perugia
AC Reggiana
AC Siena
AC Venezia 1907
ACF Fiorentina
AS Bari
AS Livorno
AS Roma
Ancona Calcio
Ascoli Calcio 1898
Atalanta BC
Benevento Calcio
Bologna FC 1909
Brescia Calcio
Cagliari Calcio
Calcio Padova
Carpi FC 1909
Catania SSD
Chievo Verona
Como Calcio
Delfino Pescara 1936
Empoli FC
FC Crotone
FC Empoli
FC Internazionale
FC Messina Peloro
Foggia Calcio
Frosinone Calcio
Genoa 1893
Genoa CFC
Hellas Verona
Inter Milan
Juventus FC
Milan AC
Modena FC
Novara Calcio 1908
Parma Calcio 1913
Parma FC
Piacenza FC
Reggina Calcio
SPAL
SS Lazio
SSC Napoli
Spezia Calcio
Torino Calcio
Torino FC
Treviso FBC 1993
UC Sampdoria
US Cremonese
US Lecce
US Palermo
US Salernitana
US Salernitana 1919
US Sassuolo
Udinese Calcio
Venezia FC
Vicenza Calcio

Italian Target Teams in Alphabetical Order:
AC Cesena
AC Fiorentina
AC Milan
AC Monza
AC Parma
AC Perugia
AC Reggiana
AC Siena
AC Venezia 1907
ACF Fiorenti

In [20]:
#source teams
network_data['source'] = network_data['source'].replace('Empoli FC', 'FC Empoli')
network_data['source'] = network_data['source'].replace('FC Internazionale', 'Inter Milan')
network_data['source'] = network_data['source'].replace('ACF Fiorentina', 'AC Fiorentina')
network_data['source'] = network_data['source'].replace('Milan AC', 'AC Milan')
network_data['source'] = network_data['source'].replace('Parma Calcio 1913', 'Parma FC')
network_data['source'] = network_data['source'].replace('AC Parma', 'Parma FC')
network_data['source'] = network_data['source'].replace('AC Venezia 1907', 'Venezia FC')
network_data['source'] = network_data['source'].replace('Torino Calcio', 'Torino FC')
network_data['source'] = network_data['source'].replace('US Salernitana', 'US Salernitana 1919')
network_data['source'] = network_data['source'].replace('Genoa CFC', 'Genoa 1893')


#target team
network_data['target'] = network_data['target'].replace('Empoli FC', 'FC Empoli')
network_data['target'] = network_data['target'].replace('FC Internazionale', 'Inter Milan')
network_data['target'] = network_data['target'].replace('ACF Fiorentina', 'AC Fiorentina')
network_data['target'] = network_data['target'].replace('Milan AC', 'AC Milan')
network_data['target'] = network_data['target'].replace('Parma Calcio 1913', 'Parma FC')
network_data['target'] = network_data['target'].replace('AC Parma', 'Parma FC')
network_data['target'] = network_data['target'].replace('AC Venezia 1907', 'Venezia FC')
network_data['target'] = network_data['target'].replace('Torino Calcio', 'Torino FC')
network_data['target'] = network_data['target'].replace('US Salernitana', 'US Salernitana 1919')
network_data['target'] = network_data['target'].replace('Genoa CFC', 'Genoa 1893')
network_data['target'] = network_data['target'].replace('SPAL 2013', 'SPAL')

## Premier League

In [21]:
english_source_teams = network_data[network_data['source_country'] == 'England']['source'].unique()
english_target_teams = network_data[network_data['target_country'] == 'England']['target'].unique()

In [22]:
sorted_english_source_teams = sorted(english_source_teams)
print("English Source Teams in Alphabetical Order:")
for team in sorted_english_source_teams:
    print(team)

sorted_english_target_teams = sorted(english_target_teams)
print("\nEnglish Target Teams in Alphabetical Order:")
for team in sorted_english_target_teams:
    print(team)

English Source Teams in Alphabetical Order:
AFC Bournemouth
Arsenal FC
Aston Villa
Birmingham City
Blackburn Rovers
Blackpool FC
Bolton Wanderers
Bradford City
Brentford FC
Brighton & Hove Albion
Burnley FC
Cardiff City
Charlton Athletic
Chelsea FC
Coventry City
Crystal Palace
Derby County
Everton FC
Fulham FC
Huddersfield Town
Hull City
Ipswich Town
Leeds United
Leicester City
Liverpool FC
Manchester City
Manchester United
Middlesbrough FC
Newcastle United
Norwich City
Nottingham Forest
Oldham Athletic
Portsmouth FC
Queens Park Rangers
Reading FC
Sheffield United
Sheffield Wednesday
Southampton FC
Stoke City
Sunderland AFC
Swansea City
Swindon Town
Tottenham Hotspur
Watford FC
West Bromwich Albion
West Ham United
Wigan Athletic
Wimbledon FC
Wolverhampton Wanderers

English Target Teams in Alphabetical Order:
AFC Bournemouth
Arsenal FC
Aston Villa
Barnsley FC
Birmingham City
Blackburn Rovers
Blackpool FC
Bolton Wanderers
Bradford City
Brentford FC
Brighton & Hove Albion
Burnley FC
Card

## Ligue 1

In [23]:
french_source_teams = network_data[network_data['source_country'] == 'France']['source'].unique()
french_target_teams = network_data[network_data['target_country'] == 'France']['target'].unique()

In [24]:
sorted_french_source_teams = sorted(french_source_teams)
print("French Source Teams in Alphabetical Order:")
for team in sorted_french_source_teams:
    print(team)

sorted_french_target_teams = sorted(french_target_teams)
print("\nFrench Target Teams in Alphabetical Order:")
for team in sorted_french_target_teams:
    print(team)

French Source Teams in Alphabetical Order:
AC Ajaccio
AC Le Havre
AJ Auxerre
AS Cannes
AS Monaco
AS Nancy-Lorraine
AS Saint-Étienne
Amiens SC
Angers SCO
Association Troyes Aube Champagne
Athlétic Club Arlésien
CS Sedan-Ardennes
Clermont Foot 63
Dijon FCO
EA Guingamp
ESTAC Troyes
FC Girondins Bordeaux
FC Gueugnon
FC Istres Ouest Provence
FC Lorient
FC Martigues
FC Metz
FC Nantes
FC Sochaux-Montbéliard
FC Toulouse
FC Évian Thonon Gaillard
GFC Ajaccio
Grenoble Foot 38
LB Châteauroux
LOSC Lille
Le Mans Union Club 72
Montpellier HSC
Nîmes Olympique
OGC Nice
Olympique Lyon
Olympique Marseille
Paris Saint-Germain
RC Lens
RC Strasbourg Alsace
SC Bastia
SM Caen
Sporting Club de Toulon et du Var
Stade Brestois 29
Stade Reims
Stade Rennais FC
US Boulogne
Valenciennes FC

French Target Teams in Alphabetical Order:
AC Ajaccio
AC Le Havre
AJ Auxerre
AS Cannes
AS Monaco
AS Nancy-Lorraine
AS Saint-Étienne
Amiens SC
Angers SCO
Association Troyes Aube Champagne
Athlétic Club Arlésien
CS Sedan-Ardennes
C

In [25]:
#source teams
network_data['source'] = network_data['source'].replace('GFC Ajaccio', 'AC Ajaccio')

#target teams
network_data['target'] = network_data['target'].replace('GFC Ajaccio', 'AC Ajaccio')

## LaLiga

In [26]:
spanish_source_teams = network_data[network_data['source_country'] == 'Spain']['source'].unique()
spanish_target_teams = network_data[network_data['target_country'] == 'Spain']['target'].unique()

In [27]:
sorted_spanish_source_teams = sorted(spanish_source_teams)
print("Spanish Source Teams in Alphabetical Order:")
for team in sorted_spanish_source_teams:
    print(team)

sorted_spanish_target_teams = sorted(spanish_target_teams)
print("\nSpanish Target Teams in Alphabetical Order:")
for team in sorted_spanish_target_teams:
    print(team)

Spanish Source Teams in Alphabetical Order:
Albacete Balompié
Athletic Bilbao
Atlético de Madrid
CA Osasuna
CD Leganés
CD Logroñés
CD Numancia
CD Tenerife
CF Extremadura (- 2010)
CP Mérida
Celta de Vigo
Cádiz CF
Córdoba CF
Deportivo Alavés
Deportivo de La Coruña
Elche CF
FC Barcelona
Getafe CF
Girona FC
Granada CF
Hércules CF
Levante UD
Málaga CF
RCD Espanyol Barcelona
RCD Mallorca
Racing Santander
Rayo Vallecano
Real Betis Balompié
Real Madrid
Real Murcia CF
Real Oviedo
Real Sociedad
Real Valladolid CF
Real Zaragoza
Recreativo Huelva
SD Compostela
SD Eibar
SD Huesca
Sevilla FC
Sporting Gijón
UD Almería
UD Las Palmas
UD Salamanca
Valencia CF
Villarreal CF
Xerez CD

Spanish Target Teams in Alphabetical Order:
Albacete Balompié
Athletic Bilbao
Atlético de Madrid
CA Osasuna
CD Leganés
CD Logroñés
CD Numancia
CD Tenerife
CF Extremadura (- 2010)
CP Mérida
Celta de Vigo
Cádiz CF
Córdoba CF
Deportivo Alavés
Deportivo de La Coruña
Elche CF
FC Barcelona
Getafe CF
Gimnàstic de Tarragona
Girona F

In [28]:
#source teams
network_data['source'] = network_data['source'].replace('CF Extremadura (- 2010)', 'CF Extremadura')

#target teams
network_data['target'] = network_data['target'].replace('CF Extremadura (- 2010)', 'CF Extremadura')

## Bundesliga

In [29]:
german_source_teams = network_data[network_data['source_country'] == 'Germany']['source'].unique()
german_target_teams = network_data[network_data['target_country'] == 'Germany']['target'].unique()

In [30]:
sorted_german_source_teams = sorted(german_source_teams)
print("German Source Teams in Alphabetical Order:")
for team in sorted_german_source_teams:
    print(team)

sorted_german_target_teams = sorted(german_target_teams)
print("\nGerman Target Teams in Alphabetical Order:")
for team in sorted_german_target_teams:
    print(team)

German Source Teams in Alphabetical Order:
1. FC Köln
1.FC Kaiserslautern
1.FC Nuremberg
1.FC Union Berlin
1.FSV Mainz 05
Arminia Bielefeld
Bayer 04 Leverkusen
Bayer 05 Uerdingen
Bayern Munich
Borussia Dortmund
Borussia Mönchengladbach
Eintracht Frankfurt
FC Augsburg
FC Energie Cottbus
FC Hansa Rostock
FC Ingolstadt 04
FC Schalke 04
FC St. Pauli
Fortuna Düsseldorf
Hamburger SV
Hannover 96
Hertha BSC
KFC Uerdingen 05
Karlsruher SC
MSV Duisburg
RB Leipzig
SC Freiburg
SC Paderborn 07
SG Dynamo Dresden
SG Wattenscheid 09
SV Darmstadt 98
SV Werder Bremen
SpVgg Greuther Fürth
SpVgg Unterhaching
TSG 1899 Hoffenheim
TSV 1860 Munich
VfB Leipzig (- 2004)
VfB Stuttgart
VfL Bochum
VfL Wolfsburg

German Target Teams in Alphabetical Order:
1. FC Köln
1.FC Kaiserslautern
1.FC Nuremberg
1.FC Saarbrücken
1.FC Union Berlin
1.FSV Mainz 05
Alemannia Aachen
Arminia Bielefeld
Bayer 04 Leverkusen
Bayer 05 Uerdingen
Bayern Munich
Borussia Dortmund
Borussia Mönchengladbach
Eintracht Braunschweig
Eintracht Fran

In [31]:
#source teams
network_data['source'] = network_data['source'].replace('VfB Leipzig (- 2004)', 'VfB Leipzig')

#target teams
network_data['target'] = network_data['target'].replace('VfB Leipzig (- 2004)', 'VfB Leipzig')

## Liga Portugal 1

In [32]:
protuguese_source_teams = network_data[network_data['source_country'] == 'Portugal']['source'].unique()
protuguese_target_teams = network_data[network_data['target_country'] == 'Portugal']['target'].unique()

In [33]:
sorted_protuguese_source_teams = sorted(protuguese_source_teams)
print("Portuguese Source Teams in Alphabetical Order:")
for team in sorted_protuguese_source_teams:
    print(team)

sorted_protuguese_target_teams = sorted(protuguese_target_teams)
print("\nPortuguese Target Teams in Alphabetical Order:")
for team in sorted_protuguese_target_teams:
    print(team)

Portuguese Source Teams in Alphabetical Order:
Académica Coimbra
Belenenses SAD
Boavista FC
CD Feirense
CD Nacional
CD Santa Clara
CD Tondela
CD Trofense
CF Os Belenenses
CF União Madeira
CS Marítimo
Casa Pia AC
Clube Futebol Estrela da Amadora
Desportivo Aves (- 2020)
FC Alverca
FC Arouca
FC Famalicão
FC Paços de Ferreira
FC Penafiel
FC Porto
FC Tirsense
FC Vizela
GD Chaves
GD Estoril Praia
Gil Vicente FC
Leixões SC
Leça FC
Moreirense FC
Naval 1º de Maio
Portimonense SC
Rio Ave FC
SC Beira-Mar
SC Braga
SC Campomaiorense (- 2001)
SC Farense
SC Olhanense
SC Salgueiros
SL Benfica
Sporting CP
União de Leiria
Varzim SC
Vitória Guimarães SC
Vitória Setúbal FC

Portuguese Target Teams in Alphabetical Order:
Académica Coimbra
Belenenses SAD
Boavista FC
CA Felgueiras
CD Feirense
CD Nacional
CD Santa Clara
CD Tondela
CD Trofense
CF Os Belenenses
CF União Madeira
CS Marítimo
Casa Pia AC
Clube Futebol Estrela da Amadora
Desportivo Aves (- 2020)
FC Alverca
FC Arouca
FC Famalicão
FC Paços de Ferrei

In [34]:
#source teams
network_data['source'] = network_data['source'].replace('Desportivo Aves (- 2020)', 'Desportivo Aves')
network_data['source'] = network_data['source'].replace('SC Campomaiorense (- 2001)', 'SC Campomaiorense')

#target teams
network_data['target'] = network_data['target'].replace('Desportivo Aves (- 2020)', 'Desportivo Aves')
network_data['target'] = network_data['target'].replace('SC Campomaiorense (- 2001)', 'SC Campomaiorense')

## Prem'er-Liga

In [35]:
russian_source_teams = network_data[network_data['source_country'] == 'Russia']['source'].unique()
russian_target_teams = network_data[network_data['target_country'] == 'Russia']['target'].unique()

In [36]:
sorted_russian_source_teams = sorted(russian_source_teams)
print("Russian Source Teams in Alphabetical Order:")
for team in sorted_russian_source_teams:
    print(team)

sorted_russian_target_teams = sorted(russian_target_teams)
print("\nRussian Target Teams in Alphabetical Order:")
for team in sorted_russian_target_teams:
    print(team)

Russian Source Teams in Alphabetical Order:
Akhmat Grozny
Alania Vladikavkaz
Amkar Perm
Anzhi Makhachkala ( -2022)
Arsenal Tula
Baltika Kaliningrad
CSKA Moscow
Chernomorets Novorossijsk
Dinamo Moscow
Dinamo Stavropol
Dinamo-Gazovik Tyumen
Dynamo Moscow
Enisey Krasnoyarsk
FC Moscow
FC Nizhniy Novgorod
FC Pari Nizhniy Novgorod
FC Sochi
FC Tosno (-2018)
FK Khimki
FK Krasnodar
FK Orenburg
FK Rostov
FK Tambov
FK Tyumen
FK Ufa
Fakel Voronezh
Krylya Sovetov Samara
Kuban Krasnodar (-2018)
Lada-Togliatti-VAZ Togliatti
Lokomotiv Moscow
Lokomotiv Nizhniy Novgorod
Luch Vladivostok
Mordovia Saransk (-2020)
Ocean Nakhodka
PFK Tambov
Rostselmash Rostov
Rotor Volgograd
Rubin Kazan
SKA Khabarovsk
Saturn REN-TV Ramenskoe
Saturn Ramenskoe
Shinnik Yaroslavl
Sibir Novosibirsk (- 2019)
Sokol Saratov
Spartak Moscow
Spartak Nalchik
Spartak Vladikavkaz
Spartak-Alania Vladikavkaz
Terek Grozny
Tom Tomsk
Torpedo Moscow
Torpedo-Metallurg Moscow
Torpedo-ZiL Moscow
Ural Yekaterinburg
Uralmash Ekaterinburg
Volga Nizh

In [37]:
#source teams
network_data['source'] = network_data['source'].replace('Dynamo Moscow', 'Dinamo Moscow')
network_data['source'] = network_data['source'].replace('FC Nizhniy Novgorod', 'FC Pari Nizhniy Novgorod')
network_data['source'] = network_data['source'].replace('FC Tosno (-2018)', 'FC Tosno')
network_data['source'] = network_data['source'].replace('Kuban Krasnodar (-2018)', 'Kuban Krasnodar')
network_data['source'] = network_data['source'].replace('Mordovia Saransk (-2020)', 'Mordovia Saransk')
network_data['source'] = network_data['source'].replace('Saturn REN-TV Ramenskoe', 'Saturn Ramenskoe')
network_data['source'] = network_data['source'].replace('Sibir Novosibirsk (- 2019)', 'Sibir Novosibirsk')
network_data['source'] = network_data['source'].replace('Volga Nizhniy Novgorod (- 2016)', 'Volga Nizhniy Novgorod')
network_data['source'] = network_data['source'].replace('Spartak Vladikavkaz', 'Alania Vladikavkaz')
network_data['source'] = network_data['source'].replace('Spartak-Alania Vladikavkaz', 'Alania Vladikavkaz')
network_data['source'] = network_data['source'].replace('Torpedo-Metallurg Moscow', 'FC Moscow')
network_data['source'] = network_data['source'].replace('Torpedo-ZiL Moscow', 'FC Moscow')

#target teams
network_data['target'] = network_data['target'].replace('Dynamo Moscow', 'Dinamo Moscow')
network_data['target'] = network_data['target'].replace('FC Nizhniy Novgorod', 'FC Pari Nizhniy Novgorod')
network_data['target'] = network_data['target'].replace('FC Tosno (-2018)', 'FC Tosno')
network_data['target'] = network_data['target'].replace('Kuban Krasnodar (-2018)', 'Kuban Krasnodar')
network_data['target'] = network_data['target'].replace('Mordovia Saransk (-2020)', 'Mordovia Saransk')
network_data['target'] = network_data['target'].replace('Saturn REN-TV Ramenskoe', 'Saturn Ramenskoe')
network_data['target'] = network_data['target'].replace('Sibir Novosibirsk (- 2019)', 'Sibir Novosibirsk')
network_data['target'] = network_data['target'].replace('Volga Nizhniy Novgorod (- 2016)', 'Volga Nizhniy Novgorod')
network_data['target'] = network_data['target'].replace('Spartak Vladikavkaz', 'Alania Vladikavkaz')
network_data['target'] = network_data['target'].replace('Spartak-Alania Vladikavkaz', 'Alania Vladikavkaz')
network_data['target'] = network_data['target'].replace('Torpedo-Metallurg Moscow', 'FC Moscow')
network_data['target'] = network_data['target'].replace('Torpedo-ZiL Moscow', 'FC Moscow')

## Eredivisie

In [38]:
dutch_source_teams = network_data[network_data['source_country'] == 'Netherlands']['source'].unique()
dutch_target_teams = network_data[network_data['target_country'] == 'Netherlands']['target'].unique()

In [39]:
sorted_dutch_source_teams = sorted(dutch_source_teams)
print("Dutch Source Teams in Alphabetical Order:")
for team in sorted_dutch_source_teams:
    print(team)

sorted_dutch_source_teams = sorted(dutch_source_teams)
print("\nDutch Target Teams in Alphabetical Order:")
for team in sorted_dutch_source_teams:
    print(team)

Dutch Source Teams in Alphabetical Order:
ADO Den Haag
AZ Alkmaar
Ajax Amsterdam
Cambuur-Leeuwarden bvo
De Graafschap Doetinchem
Dordrecht'90
Excelsior Rotterdam
FC Den Bosch
FC Dordrecht
FC Emmen
FC Groningen
FC Utrecht
FC Volendam
FC Zwolle
Feyenoord Rotterdam
Fortuna Sittard
Go Ahead Eagles
Heracles Almelo
MVV Maastricht
NAC Breda
NEC Nijmegen
PEC Zwolle
PSV Eindhoven
RBC Roosendaal
RKC Waalwijk
Roda JC Kerkrade
SC Cambuur-Leeuwarden
SC Heerenveen
Sparta Rotterdam
Twente Enschede FC
VVV-Venlo
Vitesse Arnhem

Dutch Target Teams in Alphabetical Order:
ADO Den Haag
AZ Alkmaar
Ajax Amsterdam
Cambuur-Leeuwarden bvo
De Graafschap Doetinchem
Dordrecht'90
Excelsior Rotterdam
FC Den Bosch
FC Dordrecht
FC Emmen
FC Groningen
FC Utrecht
FC Volendam
FC Zwolle
Feyenoord Rotterdam
Fortuna Sittard
Go Ahead Eagles
Heracles Almelo
MVV Maastricht
NAC Breda
NEC Nijmegen
PEC Zwolle
PSV Eindhoven
RBC Roosendaal
RKC Waalwijk
Roda JC Kerkrade
SC Cambuur-Leeuwarden
SC Heerenveen
Sparta Rotterdam
Twente Ensc

In [40]:
#source teams
network_data['source'] = network_data['source'].replace("Dordrecht'90", 'FC Dordrecht')
network_data['source'] = network_data['source'].replace('FC Zwolle', 'PEC Zwolle')

#target teams
network_data['target'] = network_data['target'].replace("Dordrecht'90", 'FC Dordrecht')
network_data['target'] = network_data['target'].replace('FC Zwolle', 'PEC Zwolle')

In [41]:
len(network_data['source'].unique()), len(network_data['target'].unique())

(363, 374)

In [42]:
network_data.to_csv('network_data.csv')