In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
import string
import plotly.graph_objects as go

from sklearn.decomposition import PCA
from sklearn.neighbors import NearestNeighbors

import warnings
warnings.filterwarnings('ignore')

import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

pd.set_option('display.max_columns', None)
pd.set_option('max_rows',None)
pd.set_option('precision', 0)

%matplotlib inline
sns.set()

In [2]:
df = pd.read_csv('../data/df_rs3.csv')
df0 = pd.read_csv('../data/fifa19.csv')

In [3]:
features = ['Preferred Foot', 'Weak Foot', 'Skill Moves', 'Crossing', 
             'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys', 'Dribbling',
             'Curve', 'FKAccuracy', 'LongPassing', 'BallControl', 'Acceleration',
             'SprintSpeed', 'Agility', 'Reactions', 'Balance', 'ShotPower',
             'Jumping', 'Stamina', 'Strength', 'LongShots', 'Aggression',
             'Interceptions', 'Positioning', 'Vision', 'Penalties', 'Composure',
             'Marking', 'StandingTackle', 'SlidingTackle']

In [4]:
df[features].head()

Unnamed: 0,Preferred Foot,Weak Foot,Skill Moves,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle
0,0,4,4,84,95,70,90,86,97,93,94,87,96,91,86,91,95,95,85,68,72,59,94,48,22,94,94,75,96,33,28,26
1,1,4,5,84,94,89,81,87,88,81,76,77,94,89,91,87,96,70,95,95,88,79,93,63,29,95,82,85,95,28,31,23
2,1,5,5,79,87,62,84,84,96,88,87,78,95,94,90,96,94,84,80,61,81,49,82,56,36,89,87,81,94,27,24,33
3,1,3,1,17,13,21,50,13,18,21,19,51,42,57,58,60,90,43,31,67,43,64,12,38,30,12,68,40,68,15,21,13
4,1,5,4,93,82,55,92,82,86,85,83,91,91,78,76,79,91,77,91,63,90,75,91,76,61,87,94,79,88,68,58,51


In [5]:
# Combining ID, Name and features into a df
df2 = df[['ID','Name']+features]

# Create a dict with name and ID
id_name = df[['Name','ID']].set_index('ID')['Name'].to_dict()

In [6]:
id_name

{158023: 'L. Messi',
 20801: 'Cristiano Ronaldo',
 190871: 'Neymar Jr',
 193080: 'De Gea',
 192985: 'K. De Bruyne',
 183277: 'E. Hazard',
 177003: 'L. Modrić',
 176580: 'L. Suárez',
 155862: 'Sergio Ramos',
 200389: 'J. Oblak',
 188545: 'R. Lewandowski',
 182521: 'T. Kroos',
 182493: 'D. Godín',
 168542: 'David Silva',
 215914: 'N. Kanté',
 211110: 'P. Dybala',
 202126: 'H. Kane',
 194765: 'A. Griezmann',
 192448: 'M. ter Stegen',
 192119: 'T. Courtois',
 189511: 'Sergio Busquets',
 179813: 'E. Cavani',
 167495: 'M. Neuer',
 153079: 'S. Agüero',
 138956: 'G. Chiellini',
 231747: 'K. Mbappé',
 209331: 'M. Salah',
 200145: 'Casemiro',
 198710: 'J. Rodríguez',
 198219: 'L. Insigne',
 197781: 'Isco',
 190460: 'C. Eriksen',
 189242: 'Coutinho',
 188567: 'P. Aubameyang',
 178603: 'M. Hummels',
 176676: 'Marcelo',
 173731: 'G. Bale',
 167948: 'H. Lloris',
 167664: 'G. Higuaín',
 164240: 'Thiago Silva',
 162835: 'S. Handanovič',
 1179: 'G. Buffon',
 205600: 'S. Umtiti',
 201399: 'M. Icardi',
 

In [8]:
# setting number of recommendations = 10
n = 10

X = df2[features]
nbrs = NearestNeighbors(n_neighbors=n+1, algorithm='ball_tree').fit(X)
dist, rank = nbrs.kneighbors(X)

similar_df = pd.DataFrame(columns=[f'rank_{i}'for i in range(1,n+1)],
                          index=df2['ID'].values,
                          data=rank[:,1:])
dist_df = pd.DataFrame(columns=[f'rank_{i}'for i in range(1,n+1)],
                       index=df2['ID'].values,
                       data=dist[:,1:])


for cols in list(similar_df):
    tg_col = similar_df[cols]
    new_value = df2['ID'].iloc[tg_col].tolist()
    similar_df[cols] = new_value

In [9]:
def player_scouter(similar_df, dist_df, player_id):
    player_id = int(player_id)
    player_name = df2[df2['ID'] == player_id]['Name'].values[0]
    
    ## Bar chart
    Xaxis = 1/(1+dist_df.loc[player_id].values[::-1])
    Yaxis = similar_df.loc[player_id].map(id_name).values[::-1]

    fig = go.Figure(go.Bar(
                x=Xaxis,
                y=Yaxis,
                orientation='h'))
    
    fig.update_layout(title_text='Players similar to  " '+str(player_name)+' "')
    fig.show()

    ## Table display
    similar_player = similar_df.loc[player_id]
    display_col = ['Club', 'Name','Age','Nationality','Overall','Potential', 'Preferred Foot','Weak Foot', 
                'Wage','Value']
    
    display_df = pd.DataFrame({'ID':similar_player})\
    .merge(df[['ID']+display_col], how='left', on='ID')[display_col]

    display(display_df)

## Evaluate recommender performance
---

Now comes the fun part! Let's check out a few players to see if the recommender aligns with our intuition. In the cell below we'll do the following:
1. Create a search term
2. Use that to find all players matching the searched player
3. For each player, we'll list off the ten most similar players

In [15]:
player_scouter(similar_df, dist_df, 158023)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Paris Saint-Germain,Neymar Jr,26,Brazil,92,93,1,5,290000,100000000.0
1,Juventus,P. Dybala,24,Argentina,89,94,0,3,205000,90000000.0
2,Chelsea,E. Hazard,27,Belgium,91,91,1,4,340000,90000000.0
3,FC Bayern München,A. Robben,34,Netherlands,84,84,0,2,110000,20000000.0
4,Manchester City,S. Agüero,30,Argentina,89,89,1,4,300000,60000000.0
5,Napoli,L. Insigne,27,Italy,88,88,1,3,165000,60000000.0
6,Manchester City,R. Mahrez,27,Algeria,85,85,0,4,205000,40000000.0
7,Borussia Dortmund,M. Reus,29,Germany,86,86,1,4,100000,40000000.0
8,FC Barcelona,Malcom,21,Brazil,82,89,0,4,140000,30000000.0
9,Paris Saint-Germain,K. Mbappé,19,France,88,95,1,4,100000,80000000.0


In [11]:
player_scouter(similar_df, dist_df, 20801)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Manchester City,S. Agüero,30,Argentina,89,89,1,4,300000,60000000.0
1,FC Bayern München,R. Lewandowski,29,Poland,90,90,1,4,205000,80000000.0
2,Paris Saint-Germain,K. Mbappé,19,France,88,95,1,4,100000,80000000.0
3,Juventus,P. Dybala,24,Argentina,89,94,0,3,205000,90000000.0
4,Arsenal,P. Aubameyang,29,Gabon,88,88,1,4,265000,60000000.0
5,Olympique de Marseille,F. Thauvin,25,France,84,87,0,3,72000,40000000.0
6,Manchester United,A. Sánchez,29,Chile,85,85,1,3,215000,40000000.0
7,Atlético Madrid,A. Griezmann,27,France,89,90,0,3,145000,80000000.0
8,RC Celta,Iago Aspas,30,Spain,84,84,0,3,45000,30000000.0
9,Olympique Lyonnais,M. Depay,24,Netherlands,84,89,1,3,93000,40000000.0


In [16]:
player_scouter(similar_df, dist_df, 192985)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Sporting CP,Bruno Fernandes,23,Portugal,84,88,1,3,22000,40000000.0
1,FC Bayern München,J. Rodríguez,26,Colombia,88,89,0,3,315000,70000000.0
2,SL Benfica,Pizzi,28,Portugal,83,83,1,4,22000,30000000.0
3,Atlético Madrid,Koke,26,Spain,85,86,1,4,88000,40000000.0
4,Borussia Mönchengladbach,L. Stindl,29,Germany,81,81,1,4,43000,20000000.0
5,FC Barcelona,Coutinho,26,Brazil,88,89,1,4,340000,70000000.0
6,Milan,G. Bonaventura,28,Italy,82,82,1,3,115000,20000000.0
7,Manchester United,P. Pogba,25,France,87,91,1,4,210000,60000000.0
8,Guangzhou Evergrande Taobao FC,Anderson Talisca,24,Brazil,83,90,0,4,18000,40000000.0
9,Atlético Madrid,T. Lemar,22,France,83,89,0,2,64000,40000000.0


In [17]:
player_scouter(similar_df, dist_df, 203376)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Tottenham Hotspur,T. Alderweireld,29,Belgium,86,87,1,3,150000,40000000.0
1,Juventus,L. Bonucci,31,Italy,86,86,1,3,160000,30000000.0
2,Tottenham Hotspur,J. Vertonghen,31,Belgium,87,87,0,3,155000,30000000.0
3,Tottenham Hotspur,E. Dier,24,England,80,84,1,3,73000,20000000.0
4,Ajax,M. de Ligt,18,Netherlands,82,91,1,4,11000,30000000.0
5,Manchester City,V. Kompany,32,Belgium,85,85,1,3,170000,20000000.0
6,Paris Saint-Germain,Thiago Silva,33,Brazil,88,88,1,3,165000,20000000.0
7,Manchester United,V. Lindelöf,23,Sweden,79,85,1,3,91000,10000000.0
8,VfL Wolfsburg,J. Guilavogui,27,France,79,80,1,3,46000,10000000.0
9,Olympique de Marseille,Luiz Gustavo,30,Brazil,83,83,0,3,63000,20000000.0


In [18]:
player_scouter(similar_df, dist_df, 193080)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Everton,J. Pickford,24,England,83,88,0,4,78000,20000000.0
1,Leicester City,K. Schmeichel,31,Denmark,84,84,1,3,78000,20000000.0
2,FC Bayern München,M. Neuer,32,Germany,89,89,1,4,130000,40000000.0
3,Manchester City,Ederson,24,Brazil,86,90,0,3,125000,40000000.0
4,Feyenoord,K. Vermeer,32,Netherlands,75,75,1,4,10000,4000000.0
5,Brighton & Hove Albion,M. Ryan,26,Australia,79,81,1,4,41000,10000000.0
6,Fulham,Fabricio,30,Spain,79,79,1,4,47000,8000000.0
7,Manchester City,C. Bravo,35,Chile,78,78,1,3,50000,2000000.0
8,PSV,J. Zoet,27,Netherlands,80,83,1,4,16000,10000000.0
9,FC Barcelona,J. Cillessen,29,Netherlands,82,84,1,3,135000,20000000.0


### Finding a replacement for an outgoing player

In [13]:
#df[df.Club == 'Liverpool']
df[df.Club == 'Liverpool']

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Position,Contract Expiration,Height,Weight,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Main Position
26,209331,M. Salah,26,Egypt,88,89,Liverpool,70000000.0,255000,0,3,3,4,High,Normal,RM,2023,5'9,157lbs,78,90,59,82,73,89,83,60,72,88,94,91,91,91,88,77,68,84,70,83,63,55,90,82,61,91,38,43,41,14,14,9,11,14,WING
58,208722,S. Mané,26,Senegal,86,87,Liverpool,50000000.0,195000,1,3,4,4,High,Lean,LM,2023,5'9,152lbs,73,84,62,79,71,87,74,64,71,86,95,93,91,86,86,82,75,84,67,74,73,35,87,82,71,80,42,42,38,10,10,15,7,14,WING
59,203376,V. van Dijk,26,Netherlands,86,88,Liverpool,40000000.0,165000,1,3,3,2,Medium,Normal,LCB,2023,6'4,203lbs,53,52,82,76,45,70,60,70,78,73,74,77,61,85,49,81,85,71,92,64,81,86,41,59,62,83,88,89,84,13,10,13,11,11,CB
61,201942,Roberto Firmino,26,Brazil,86,87,Liverpool,50000000.0,195000,1,3,4,4,High,Lean,CAM,2023,5'11,168lbs,72,87,77,86,81,87,80,65,75,88,78,77,80,86,81,81,81,90,74,76,73,58,87,85,72,87,60,64,42,8,11,9,6,10,AM
81,212831,Alisson,25,Brazil,85,90,Liverpool,40000000.0,115000,1,3,3,1,Medium,Normal,GK,2024,6'3,201lbs,17,13,19,45,20,27,19,18,44,30,54,45,40,84,37,21,52,32,78,14,27,11,13,66,23,65,15,19,16,83,81,85,84,88,GK
118,209499,Fabinho,24,Brazil,84,88,Liverpool,30000000.0,120000,1,3,2,3,Medium,Lean,CDM,2023,6'2,172lbs,79,66,75,83,47,76,71,53,78,82,69,72,69,83,67,78,75,92,79,65,85,84,70,75,91,84,83,86,84,13,12,6,8,8,CDM
161,220971,N. Keïta,23,Guinea,83,88,Liverpool,30000000.0,120000,1,2,4,4,High,Lean,CM,2023,5'8,141lbs,62,74,42,88,71,88,64,70,78,88,78,60,89,82,90,76,53,82,58,73,78,75,74,81,58,79,68,62,61,7,14,15,14,9,CM
241,216267,A. Robertson,24,Scotland,82,87,Liverpool,20000000.0,98000,0,1,2,3,High,Lean,LB,2021,5'10,141lbs,86,57,63,76,32,76,74,30,68,78,81,86,78,81,74,67,59,89,66,60,78,80,66,74,55,73,80,81,78,11,8,10,7,14,FB
296,183711,J. Henderson,28,England,82,82,Liverpool,20000000.0,125000,1,2,3,3,Medium,Normal,RCM,2020,6'0,176lbs,81,67,65,84,72,73,80,74,84,80,67,66,70,80,70,77,79,91,72,71,77,80,75,83,64,78,77,79,76,7,13,7,9,13,CM
302,181291,G. Wijnaldum,27,Netherlands,82,82,Liverpool,20000000.0,130000,1,3,3,4,High,Lean,CAM,2021,5'9,152lbs,67,70,72,84,73,84,76,70,76,84,77,76,80,82,81,76,80,85,72,71,64,76,79,78,74,82,74,75,65,16,6,14,12,6,AM


In [19]:
player_scouter(similar_df, dist_df, 181291)

Unnamed: 0,Club,Name,Age,Nationality,Overall,Potential,Preferred Foot,Weak Foot,Wage,Value
0,Chelsea,V. Moses,27,Nigeria,78,78,1,4,105000,10000000.0
1,PFC CSKA Moscow,A. Dzagoev,28,Russia,79,79,1,3,1000,10000000.0
2,Olympique de Marseille,M. Sanson,23,France,80,85,1,4,47000,20000000.0
3,Paris Saint-Germain,A. Rabiot,23,France,83,87,0,3,91000,30000000.0
4,Milan,G. Bonaventura,28,Italy,82,82,1,3,115000,20000000.0
5,Manchester United,Fred,25,Brazil,82,84,0,4,140000,30000000.0
6,PAOK,Maurício,29,Brazil,76,76,1,4,1000,6000000.0
7,Torino,D. Baselli,26,Italy,79,82,1,3,49000,20000000.0
8,FC Schalke 04,D. Caligiuri,30,Italy,79,79,1,4,33000,9000000.0
9,FC Barcelona,Arthur,21,Brazil,82,90,1,3,125000,30000000.0


In [13]:
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity

In [20]:
df.head()

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Position,Contract Expiration,Height,Weight,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Main Position
0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,100000000.0,565000,0,5,4,4,Medium,Lean,RF,2021,5'7,159lbs,84,95,70,90,86,97,93,94,87,96,91,86,91,95,95,85,68,72,59,94,48,22,94,94,75,96,33,28,26,6,11,15,14,8,ST
1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,80000000.0,405000,1,5,4,5,High,Stocky,ST,2022,6'2,183lbs,84,94,89,81,87,88,81,76,77,94,89,91,87,96,70,95,95,88,79,93,63,29,95,82,85,95,28,31,23,7,11,15,14,11,ST
2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,100000000.0,290000,1,5,5,5,High,Lean,LW,2022,5'9,150lbs,79,87,62,84,84,96,88,87,78,95,94,90,96,94,84,80,61,81,49,82,56,36,89,87,81,94,27,24,33,9,9,15,15,11,WING
3,193080,De Gea,27,Spain,91,93,Manchester United,70000000.0,260000,1,4,3,1,Medium,Lean,GK,2020,6'4,168lbs,17,13,21,50,13,18,21,19,51,42,57,58,60,90,43,31,67,43,64,12,38,30,12,68,40,68,15,21,13,90,85,87,88,94,GK
4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,100000000.0,355000,1,4,5,4,High,Normal,RCM,2023,5'11,154lbs,93,82,55,92,82,86,85,83,91,91,78,76,79,91,77,91,63,90,75,91,76,61,87,94,79,88,68,58,51,15,13,5,10,13,CM


In [21]:
#df.loc[df.Name, 'M. Salah']
df[df.Name == 'L. Messi']

Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Position,Contract Expiration,Height,Weight,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Main Position
0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,100000000.0,565000,0,5,4,4,Medium,Lean,RF,2021,5'7,159lbs,84,95,70,90,86,97,93,94,87,96,91,86,91,95,95,85,68,72,59,94,48,22,94,94,75,96,33,28,26,6,11,15,14,8,ST


In [22]:
messi_x = df[df.Name == 'L. Messi'].values.reshape(1,-1)
ronaldo_x = df[df.Name == 'Cristiano Ronaldo'].values.reshape(1,-1)

In [23]:
print('Messi vs. Ronaldo = ', cosine_similarity(messi_x, ronaldo_x))

ValueError: could not convert string to float: 'L. Messi'

In [None]:
from scipy import spatialdef Similarity(movieId1, movieId2):
    a = movies.iloc[movieId1]
    b = movies.iloc[movieId2]
    
    genresA = a['genres_bin']
    genresB = b['genres_bin']
    
    genreDistance = spatial.distance.cosine(genresA, genresB)
    
    scoreA = a['cast_bin']
    scoreB = b['cast_bin']
    scoreDistance = spatial.distance.cosine(scoreA, scoreB)
    
    directA = a['director_bin']
    directB = b['director_bin']
    directDistance = spatial.distance.cosine(directA, directB)
    
    wordsA = a['words_bin']
    wordsB = b['words_bin']
    wordsDistance = spatial.distance.cosine(directA, directB)
    return genreDistance + directDistance + scoreDistance + wordsDistance

In [None]:
Similarity(3,160)

In [None]:
import operatordef predict_score():
    name = input('Enter a movie title: ')
    new_movie = movies[movies['original_title'].str.contains(name)].iloc[0].to_frame().T
    print('Selected Movie: ',new_movie.original_title.values[0])
    def getNeighbors(baseMovie, K):
        distances = []
    
        for index, movie in movies.iterrows():
            if movie['new_id'] != baseMovie['new_id'].values[0]:
                dist = Similarity(baseMovie['new_id'].values[0], movie['new_id'])
                distances.append((movie['new_id'], dist))
    
        distances.sort(key=operator.itemgetter(1))
        neighbors = []
    
        for x in range(K):
            neighbors.append(distances[x])
        return neighbors
    
    K = 10
    avgRating = 0
    neighbors = getNeighbors(new_movie, K)print('\nRecommended Movies: \n')
    for neighbor in neighbors:
        avgRating = avgRating+movies.iloc[neighbor[0]][2]  
        print( movies.iloc[neighbor[0]][0]+" | Genres: "+str(movies.iloc[neighbor[0]][1]).strip('[]').replace(' ','')+" | Rating: "+str(movies.iloc[neighbor[0]][2]))
    
    print('\n')
    avgRating = avgRating/K
    print('The predicted rating for %s is: %f' %(new_movie['original_title'].values[0],avgRating))
    print('The actual rating for %s is %f' %(new_movie['original_title'].values[0],new_movie['vote_average']))