In [1]:
import pandas as pd
import folium
import geopandas as gpd 

In [2]:
map = folium.Map(
    location=['7.54', '-5.5471'],
    tiles='Stamen Toner',
    zoom_start=3
)
map

In [3]:
df = pd.read_csv('cleaned_data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,No,ID,Name,Age,Nationality,Overall,Potential,Club,Value,...,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Feet,Inches
0,0,0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,110.5,...,33,28,26,6,11,15,14,8,5,7
1,1,1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,77.0,...,28,31,23,7,11,15,14,11,6,2
2,2,2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,118.5,...,27,24,33,9,9,15,15,11,5,9
3,3,3,193080,De Gea,27,Spain,91,93,Manchester United,72.0,...,15,21,13,90,85,87,88,94,6,4
4,4,4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,102.0,...,68,58,51,15,13,5,10,13,5,11


In [4]:
grouped_by_country = df.groupby('Nationality', as_index=False)['Name'].count()
grouped_by_country.head()

Unnamed: 0,Nationality,Name
0,Albania,2
1,Algeria,3
2,Argentina,27
3,Armenia,1
4,Austria,3


In [5]:
grouped_by_country.rename(columns={'Name': 'Player Count'}, inplace=True)
grouped_by_country.head()

Unnamed: 0,Nationality,Player Count
0,Albania,2
1,Algeria,3
2,Argentina,27
3,Armenia,1
4,Austria,3


In [6]:
#url = 'https://raw.githubusercontent.com/johan/world.geo.json/master/countries.geo.json'

In [7]:
country_lat_long = pd.read_csv('country_lat_long.csv', encoding='cp1252')

In [8]:
country_lat_long.head()

Unnamed: 0,country,latitude,longitude,name
0,AD,42.546245,1.601554,Andorra
1,AE,23.424076,53.847818,United Arab Emirates
2,AF,33.93911,67.709953,Afghanistan
3,AG,17.060816,-61.796428,Antigua and Barbuda
4,AI,18.220554,-63.068615,Anguilla


In [9]:
with open('country_lat_long.csv') as f:
    print(f)

<_io.TextIOWrapper name='country_lat_long.csv' mode='r' encoding='cp1252'>


In [10]:
grouped_by_country.shape

(59, 2)

In [11]:
merged_df = grouped_by_country.merge(country_lat_long, left_on='Nationality', right_on='name', how='left')

In [12]:
merged_df[merged_df['name'].isnull()]

Unnamed: 0,Nationality,Player Count,country,latitude,longitude,name
6,Bosnia Herzegovina,3,,,,
9,Central African Rep.,1,,,,
15,DR Congo,1,,,,
20,England,22,,,,
31,Ivory Coast,4,,,,
34,Korea Republic,1,,,,
46,Scotland,1,,,,
58,Wales,3,,,,


In [13]:
merged_df.loc[merged_df['Nationality'] == 'Bosnia Herzegovina', 'latitude'] = 43.915886 
merged_df.loc[merged_df['Nationality'] == 'Bosnia Herzegovina', 'longitude'] = 17.679076
merged_df.loc[merged_df['Nationality'] == 'Central African Rep.', 'latitude'] = 6.611111
merged_df.loc[merged_df['Nationality'] == 'Central African Rep.', 'longitude'] = 20.939444
merged_df.loc[merged_df['Nationality'] == 'DR Congo', 'latitude'] = -4.038333
merged_df.loc[merged_df['Nationality'] == 'DR Congo', 'longitude'] = 21.758664
merged_df.loc[merged_df['Nationality'] == 'England', 'latitude'] = 	55.378051
merged_df.loc[merged_df['Nationality'] == 'England', 'longitude'] = -3.435973
merged_df.loc[merged_df['Nationality'] == 'Ivory Coast', 'latitude'] = 	7.54
merged_df.loc[merged_df['Nationality'] == 'Ivory Coast', 'longitude'] = -5.5471
merged_df.loc[merged_df['Nationality'] == 'Korea Republic', 'latitude'] = 35.907757
merged_df.loc[merged_df['Nationality'] == 'Korea Republic', 'longitude'] = 127.766922
merged_df.loc[merged_df['Nationality'] == 'Scotland', 'latitude'] = 56.4907
merged_df.loc[merged_df['Nationality'] == 'Scotland', 'longitude'] = -4.2026
merged_df.loc[merged_df['Nationality'] == 'Wales', 'latitude'] = 52.1307
merged_df.loc[merged_df['Nationality'] == 'Wales', 'longitude'] = -3.7837

In [14]:
merged_df.head()

Unnamed: 0,Nationality,Player Count,country,latitude,longitude,name
0,Albania,2,AL,41.153332,20.168331,Albania
1,Algeria,3,DZ,28.033886,1.659626,Algeria
2,Argentina,27,AR,-38.416097,-63.616672,Argentina
3,Armenia,1,AM,40.069099,45.038189,Armenia
4,Austria,3,AT,47.516231,14.550072,Austria


In [15]:
merged_df['Player Count'] = merged_df['Player Count'].astype(float)

In [16]:
merged_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59 entries, 0 to 58
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Nationality   59 non-null     object 
 1   Player Count  59 non-null     float64
 2   country       51 non-null     object 
 3   latitude      59 non-null     float64
 4   longitude     59 non-null     float64
 5   name          51 non-null     object 
dtypes: float64(3), object(3)
memory usage: 3.2+ KB


In [17]:
#map.choropleth(geo_data=url,
#                data=grouped_by_country,
#                columns=['Nationality', 'Name'],
#                key_on='feature.properties.name',
#                fill_color='BuPu')
#map

In [18]:
for i in range(len(merged_df)):
   folium.Circle(
      location=[merged_df.iloc[i]['latitude'], merged_df.iloc[i]['longitude']],
      popup=f"Country: {merged_df.iloc[i]['Nationality']}, Player Count: {int(merged_df.iloc[i]['Player Count'])}",
      radius=float(merged_df.iloc[i]['Player Count']*5000),
      color='crimson',
      fill=True,
      fill_color='crimson',
      fill_opacity=0.8
   ).add_to(map)
 


In [19]:
map