## Packages

In [8]:
import os
import sys
import pickle

import pandas as pd
import altair as alt

## Import

In [9]:
current_path = os.getcwd()
with open(os.path.dirname(current_path) +'/data/external/google_data', 'rb') as f:
   M = pickle.load(f)
M.keys()

dict_keys(['Buts', 'Dividendes', 'Dividendes Dernière journée', 'PPF', 'Passe Dé', 'Penalty Reussi', 'CARTONS JAUNE', 'ARRETS GARDIEN', 'TITULARISATION', 'MATCH JOUÉ', 'PRIX', 'FBREF'])

In [10]:
fbref_df = pd.DataFrame(M.get('FBREF'))

#set column names equal to values in row index position 0
fbref_df.columns = fbref_df.iloc[0]
#remove first row from DataFrame
fbref_df = fbref_df[1:]

fbref_df.head(1)

Unnamed: 0,Clt,NOM,Nation,Pos,Équipe,Âge,Naissance,MJ,Titulaire,Min,...,Buts/90,PD/90,B+PD/90,B-PénM/90,B+PD-PénM/90,xG/90,xAG/90,xG+xAG/90,npxG/90,npxG+xAG/90
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,35-330,1987,2,2,180,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
ppf_df = pd.DataFrame(M.get('PPF'))

#set column names equal to values in row index position 0
ppf_df.columns = ppf_df.iloc[0]
#remove first row from DataFrame
ppf_df = ppf_df[1:]

ppf_df.head(1)

Unnamed: 0,Numéro,NOM,PRIX,PPF,POSTE
1,48,Abakar SYLLA,4004600,63,Défenseur


In [12]:
div = pd.DataFrame(M.get('Dividendes'))

#set column names equal to values in row index position 0
div.columns = div.iloc[0]
#remove first row from DataFrame
div = div[1:]

div.head(1)

Unnamed: 0,NUMERO,NOM,PRIX,DIVIDENDE,POSTE
1,137,Abakar SYLLA,4004600,170000,Défenseur


## Manipulation

In [13]:
# mettre le nom en minuscule pour facilier la jointure
fbref_df['NOM'] = fbref_df['NOM'].str.lower()
ppf_df['NOM'] = ppf_df['NOM'].str.lower()
div['NOM'] = div['NOM'].str.lower()

In [24]:
# Jointure
df = pd.merge(ppf_df[['NOM','PRIX','PPF','POSTE']], div[['NOM','DIVIDENDE']], on='NOM', how='left')

# probleme de jointure
df[df['DIVIDENDE'].isna()]

Unnamed: 0,NOM,PRIX,PPF,POSTE,DIVIDENDE
390,yvon mvogo,4571800,70,Gardien,
391,zakaria aboukhlal,5469144,11,Attaquant,


In [27]:
df

Unnamed: 0,NOM,PRIX,PPF,POSTE,DIVIDENDE
0,abakar sylla,4004600,63,Défenseur,170000
1,abdoul kader bamba,2638333,9,Milieu,-12667
2,abdoulaye touré,3134778,49,Milieu,-50222
3,ablie jallow,2402000,15,Milieu,-400000
4,abou lô,2499666,4,Défenseur,-177334
...,...,...,...,...,...
387,youssouf fofana,7732300,3,Milieu,460000
388,youssouf ndayishimiye,5252000,18,Défenseur,240000
389,yunis abdelhamid,5480000,22,Défenseur,300000
390,yvon mvogo,4571800,70,Gardien,


In [32]:
# Convertir les colonnes en numérique
df['PRIX'] = df['PRIX'].astype(int)
df['PPF'] = df['PPF'].astype(int)
df['DIVIDENDE'] = df['DIVIDENDE'].str.replace('€', '').astype(float)


## Viz

Rapport prix dividende

In [35]:
df['tx_rdt'] = df['DIVIDENDE'] / df['PRIX']
df.head()

Unnamed: 0,NOM,PRIX,PPF,POSTE,DIVIDENDE,tx_rdt
0,abakar sylla,4004600,63,Défenseur,170000.0,0.042451
1,abdoul kader bamba,2638333,9,Milieu,-12667.0,-0.004801
2,abdoulaye touré,3134778,49,Milieu,-50222.0,-0.016021
3,ablie jallow,2402000,15,Milieu,-400000.0,-0.166528
4,abou lô,2499666,4,Défenseur,-177334.0,-0.070943


In [47]:
# Graphique
base = alt.Chart(df, title=["Taux de rendement"]).mark_point().encode(
    x=alt.X('PRIX:Q', title='Prix'),
    y=alt.Y("DIVIDENDE:Q", title='Dividende'),
    color=alt.Color("POSTE:N", scale=alt.Scale(scheme='dark2')),
).properties(
    width=850,
    height=500,
)

# ligne horizontale a 0
rule = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='black').encode(
    y='y:Q'
)


base + rule