In [1]:
import pandas as pd
import networkx as nx
from datetime import datetime
from pyvis.network import Network
import networkx.algorithms.community as nx_comm
import pyvis
import warnings
warnings.filterwarnings("ignore")

In [2]:
data=pd.read_csv("all_transfer.csv")

In [3]:
def prepare_data(dataframe, date_th='2012-01-01'):
    dataframe = dataframe[
        ~((dataframe.Date.isna()) | (dataframe.From == "x") | (dataframe.To == "x") | (dataframe.To == "Unknown"))]
    dataframe.Date = pd.to_datetime(dataframe.Date)
    dataframe = dataframe[dataframe.Date >= date_th]
    dataframe.loc[dataframe.Price == "Free", "Price"] = 0
    dataframe.Price = dataframe.Price.replace({'€': '', ' ': '', 'M': 'e+06', 'K': 'e+03'}, regex=True).astype(
        float).astype(int)
    dataframe.Age = dataframe.Age.astype("float", errors='ignore')
    dataframe = dataframe[((dataframe.Age >= 16) & (dataframe.Age <= 40)) | (dataframe.Age.isna())]

    dataframe2 = dataframe.groupby(["From", "To"]).agg({"Date": "count", "Price": "sum"}).reset_index().rename(
        columns={"Date": "Count", "Price": "Price_Sum"})

    return dataframe, dataframe2

In [4]:
data1,data2 = prepare_data(data)

In [5]:
data2

Unnamed: 0,From,To,Count,Price_Sum
0,A. Bucaramanga,AD Pasto,1,0
1,A. Bucaramanga,Aguilas,2,0
2,A. Bucaramanga,Alianza,2,0
3,A. Bucaramanga,America,1,0
4,A. Bucaramanga,Atenas,1,0
...,...,...,...,...
72625,Zwolle,Valur,1,0
72626,Zwolle,Viborg,1,0
72627,Zwolle,Vitesse,1,0
72628,Zwolle,WHC Wezep,1,0


In [6]:
G= nx.from_pandas_edgelist(data2,"From","To", edge_attr=["Count","Price_Sum"], create_using=nx.MultiGraph())

In [7]:
communities = nx_comm.louvain_communities(G,seed=42,resolution=10)

In [8]:
dct={}
for ind,item in enumerate(communities):
    dct[ind+1]=item

In [9]:
df_com =pd.DataFrame.from_dict(dct, orient='index').T.melt(var_name='CommunityID', value_name='Team').dropna(subset=['Team'])

In [10]:
df_com

Unnamed: 0,CommunityID,Team
0,1,Lewis University
1,1,SV Muttenz
2,1,SV Allerheiligen
3,1,SV Wacker Nürnberg
4,1,ASV Draßburg
...,...,...
52437,91,Torreblanca CF
52438,91,Castilleja CF
52439,91,CD 26 de Febrero U19
52440,91,AD Malaka CF


In [11]:
df_com.groupby("CommunityID").agg({"Team":"count"})

Unnamed: 0_level_0,Team
CommunityID,Unnamed: 1_level_1
1,259
2,88
3,468
4,94
5,29
...,...
87,24
88,208
89,123
90,79


In [12]:
df_com[df_com["Team"]=="Galatasaray"]

Unnamed: 0,CommunityID,Team
19584,34,Galatasaray


In [13]:
df_com[df_com["CommunityID"]==34][0:20]

Unnamed: 0,CommunityID,Team
19206,34,Bucaspor
19207,34,Ayazagaspor
19208,34,Zonguldak
19209,34,Beykozspor 1908
19210,34,Carstyle Kilicarslan Spor
19211,34,Silivrispor
19212,34,Sultanbeyli Belediye Spor
19213,34,Leventspor
19214,34,Manisaspor
19215,34,Cine Madranspor


In [14]:
data3 =data2.merge(df_com,left_on="From",right_on="Team",how="left").merge(df_com,left_on="To",right_on="Team",how="left")[["CommunityID_x","From","CommunityID_y","To","Count","Price_Sum"]].rename(columns={"CommunityID_x":"CommunityID_From","CommunityID_y":"CommunityID_To"})

In [15]:
data3

Unnamed: 0,CommunityID_From,From,CommunityID_To,To,Count,Price_Sum
0,2,A. Bucaramanga,2,AD Pasto,1,0
1,2,A. Bucaramanga,2,Aguilas,2,0
2,2,A. Bucaramanga,19,Alianza,2,0
3,2,A. Bucaramanga,2,America,1,0
4,2,A. Bucaramanga,27,Atenas,1,0
...,...,...,...,...,...,...
72625,3,Zwolle,82,Valur,1,0
72626,3,Zwolle,78,Viborg,1,0
72627,3,Zwolle,3,Vitesse,1,0
72628,3,Zwolle,3,WHC Wezep,1,0


In [16]:
data4 =data3[(data3["From"]=="Galatasaray")|(data3["To"]=="Galatasaray")|(data3["From"]=="Besiktas")|(data3["To"]=="Besiktas")|(data3["From"]=="Fenerbahce")|(data3["To"]=="Fenerbahce")|(data3["From"]=="Trabzonspor")|(data3["To"]=="Trabzonspor")]

In [17]:
data4

Unnamed: 0,CommunityID_From,From,CommunityID_To,To,Count,Price_Sum
531,10,AIK Solna,34,Besiktas,1,1000000
686,3,AZ Alkmaar,34,Trabzonspor,1,0
1785,37,Al-Hilal,34,Galatasaray,1,0
1851,34,Alanyaspor,34,Besiktas,3,3000000
1869,34,Alanyaspor,34,Fenerbahce,1,0
...,...,...,...,...,...,...
71578,34,Y. Malatyaspor,34,Besiktas,2,0
71597,34,Y. Malatyaspor,34,Fenerbahce,1,1300000
71599,34,Y. Malatyaspor,34,Galatasaray,2,0
72153,65,Zenit,34,Fenerbahce,2,12500000


In [18]:
gs_net = Network(height='1500px', width='100%', bgcolor='white', font_color='black', notebook=True)

# set the physics layout of the network
gs_net.barnes_hut()

sources = data4['From']
targets = data4['To']
value = data4['Count']
weights = data4['Price_Sum']
grp1 = data4["CommunityID_From"]
grp2 = data4["CommunityID_To"]

edge_data = zip(sources, targets, value,weights,grp1,grp2)

for e in edge_data:
    src = e[0]
    dst = e[1]
    v = e[2]
    w = e[3]
    g1 = e[4]
    g2 = e[5]

    gs_net.add_node(src, src, title=src , group=g1)
    gs_net.add_node(dst, dst, title=dst, group=g1)
    gs_net.add_edge(src, dst, value=v , weight=w)

neighbor_map = gs_net.get_adj_list()

# add neighbor data to node hover data
for node in gs_net.nodes:
    node['title'] += ' Neighbors: |' + '|'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

gs_net.show_buttons(filter_=["nodes","physics"])
gs_net.toggle_physics(True)    
gs_net.show('turkishcom.html')