In [15]:
import pandas as pd
from sqlalchemy import create_engine
from fuzzywuzzy import fuzz

In [16]:
connection_string = "postgres:postgres@localhost:5432/fifa_db"

engine = create_engine(f'postgresql://{connection_string}')

In [17]:
#confirm players table (18278 Rows)
fifa_names_df = pd.read_sql_query('select long_name, nationality from players', con=engine)
fifa_names_df

Unnamed: 0,long_name,nationality
0,Lionel Andrés Messi Cuccittini,Argentina
1,Cristiano Ronaldo dos Santos Aveiro,Portugal
2,Neymar da Silva Santos Junior,Brazil
3,Jan Oblak,Slovenia
4,Eden Hazard,Belgium
...,...,...
18273,邵帅,China PR
18274,Mingjie Xiao,China PR
18275,张威,China PR
18276,汪海健,China PR


In [18]:
#(569 rows)
fbref_names_df = pd.read_sql_query('select long_name from player_perf', con=engine)
fbref_names_df

Unnamed: 0,long_name
0,Virgil van Dijk
1,Trent Alexander-Arnold
2,Andrew Robertson
3,Georginio Wijnaldum
4,Roberto Firmino
...,...
564,Akin Famewo
565,Philip Heise
566,Timm Klose
567,Archie Mair


In [24]:
data = fbref_names_df.merge(fifa_names_df, 
                        left_on='long_name',
                        right_on='long_name',
                        how='outer')

fbref_names_df

Unnamed: 0,long_name
0,Virgil van Dijk
1,Trent Alexander-Arnold
2,Andrew Robertson
3,Georginio Wijnaldum
4,Roberto Firmino
...,...
564,Akin Famewo
565,Philip Heise
566,Timm Klose
567,Archie Mair


In [20]:
def match_name(name, list_names, min_score=0):
    # -1 score incase we don't get any matches
    max_score = -1
    # Returning empty name for no match as well
    max_name = ""
    # Iternating over all names in the other
    for name2 in list_names:
        #Finding fuzzy match score
        score = fuzz.ratio(name, name2)
        # Checking if we are above our threshold and have a better score
        if (score > min_score) & (score > max_score):
            max_name = name2
            max_score = score
    return (max_name, max_score)



In [23]:
# List for dicts for easy dataframe creation
dict_list = []

# iterating over our players without salaries found above
for name in fbref_names_df.long_name:
    # Use our method to find best match, we can set a threshold here
    match = match_name(name, fifa_names_df, 50)
    
    # New dict for storing data
    dict_ = {}
    dict_.update({"player_name" : name})
    dict_list.append(dict_)
    
merge_table = pd.DataFrame(dict_list)
# Display results
merge_table

Unnamed: 0,player_name
0,Virgil van Dijk
1,Trent Alexander-Arnold
2,Andrew Robertson
3,Georginio Wijnaldum
4,Roberto Firmino
...,...
564,Akin Famewo
565,Philip Heise
566,Timm Klose
567,Archie Mair


In [27]:
player_table = pd.read_sql_query('select * from players INNER JOIN player_perf ON players.long_name = player_perf.long_name', con=engine)
player_table

Unnamed: 0,sofifa_id,player_url,short_name,long_name,age,dob,height_cm,weight_kg,nationality,club,...,per_90_ga,long_name.1,per_90_assts,per_90_goals,perf_goals,exp_assists,exp_np_exp_goals,perf_crd_y,per_90_exp_goals,per_90_g_a_pk
0,192985,https://sofifa.com/player/192985/kevin-de-bruy...,K. De Bruyne,Kevin De Bruyne,28,1991-06-28,181,70,Belgium,Manchester City,...,1.09,Kevin De Bruyne,0.76,0.33,7.0,12.0,4.4,2.0,0.21,1.09
1,203376,https://sofifa.com/player/203376/virgil-van-di...,V. van Dijk,Virgil van Dijk,27,1991-07-08,193,92,Netherlands,Liverpool,...,0.17,Virgil van Dijk,0.00,0.17,4.0,0.5,1.4,1.0,0.06,0.17
2,202126,https://sofifa.com/player/202126/harry-kane/20...,H. Kane,Harry Kane,25,1993-07-28,188,89,England,Tottenham Hotspur,...,0.66,Harry Kane,0.10,0.56,11.0,1.8,5.4,2.0,0.35,0.56
3,215914,https://sofifa.com/player/215914/ngolo-kante/2...,N. Kanté,N'Golo Kanté,28,1991-03-29,168,72,France,Chelsea,...,0.21,N'Golo Kanté,0.00,0.21,3.0,1.9,1.5,3.0,0.10,0.21
4,195864,https://sofifa.com/player/195864/paul-pogba/20...,P. Pogba,Paul Pogba,26,1993-03-15,191,84,France,Manchester United,...,0.35,Paul Pogba,0.35,0.00,0.0,1.5,0.9,0.0,0.28,0.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
381,247741,https://sofifa.com/player/247741/anthony-drisc...,A. Driscoll-Glennon,Anthony Driscoll-Glennon,19,1999-11-26,175,72,England,Burnley,...,,Anthony Driscoll-Glennon,,,,,,,,
382,236529,https://sofifa.com/player/236529/steven-alzate...,S. Alzate,Steven Alzate,20,1998-09-08,180,65,England,Brighton & Hove Albion,...,0.00,Steven Alzate,0.00,0.00,0.0,0.7,1.1,1.0,0.09,0.00
383,232648,https://sofifa.com/player/232648/liam-gibson/2...,L. Gibson,Liam Gibson,22,1997-04-25,185,71,England,Grimsby Town,...,,Liam Gibson,,,,,,,,
384,244715,https://sofifa.com/player/244715/taylor-perry/...,T. Perry,Taylor Perry,17,2001-07-15,180,73,England,Wolverhampton Wanderers,...,,Taylor Perry,,,,,,,,
