# Deeper look at La Liga 2008/9

I'm now going to use a crumpled old copy of Don Balon from June 2009 to validate the data in greater detail

In [1]:
import sys
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.expand_frame_repr', False)
import numpy as np
import warnings
warnings.filterwarnings('ignore')
sys.path.insert(0, r'..\src')
import config
import utilities
import clubs

In [2]:
fulldata = utilities.get_master("fulldata")

## Clasificación

In [3]:
group_key = "Team"
base_filters = {
        "Season": ["2008-2009"],
        "Div": ["SP1"],
        }
output_metrics = [
        "Points",
        "NumberOfMatches",
        "Win", "Draw", "Loss",
        "Goals", "GoalsOpp", "GoalsDiff",
        ]
laliga = clubs.get_summary(group_key, df=fulldata, base_filters=base_filters, 
                       output_metrics=output_metrics, agg_method=sum)
laliga.sort_values("Points", ascending=False)

Unnamed: 0,Points,NumberOfMatches,Win,Draw,Loss,Goals,GoalsOpp,GoalsDiff
Barcelona,87,38,27,6,5,105.0,35.0,70.0
Real Madrid,78,38,25,3,10,83.0,52.0,31.0
Sevilla,70,38,21,7,10,54.0,39.0,15.0
Ath Madrid,67,38,20,7,11,80.0,57.0,23.0
Villarreal,65,38,18,11,9,61.0,54.0,7.0
Valencia,62,38,18,8,12,68.0,54.0,14.0
La Coruna,58,38,16,10,12,48.0,47.0,1.0
Malaga,55,38,15,10,13,55.0,59.0,-4.0
Mallorca,51,38,14,9,15,53.0,60.0,-7.0
Espanol,47,38,12,11,15,46.0,49.0,-3.0


All looks good. Ordering slightly different because I haven't encoded the tie-breakers used in Spain when teams level on points.

## Juego Limpio

In [4]:
output_metrics = [
        "YellowCards",
        "RedCards",
        ]
juego = clubs.get_summary(group_key, df=fulldata, base_filters=base_filters, 
                       output_metrics=output_metrics, agg_method=sum)
juego["CardPoints"] = juego["YellowCards"] + (3 * juego["RedCards"])
juego.sort_values("CardPoints")

Unnamed: 0,YellowCards,RedCards,CardPoints
Barcelona,71.0,6.0,89.0
La Coruna,81.0,4.0,93.0
Valladolid,76.0,10.0,106.0
Villarreal,92.0,5.0,107.0
Recreativo,91.0,6.0,109.0
Malaga,96.0,7.0,117.0
Espanol,100.0,6.0,118.0
Sevilla,96.0,8.0,120.0
Betis,96.0,9.0,123.0
Numancia,97.0,9.0,124.0


Broadly fine. Red cards spot on apart from Athletic who have an extra red here. I couldn't split out straight reds from 2nd yellows so couldn't check all the details. 

## Efectividad

In [5]:
output_metrics = [
        "Shots",
        "ShotsOnTarget",
        "Goals",
        ]
efect = clubs.get_summary(group_key, df=fulldata, base_filters=base_filters, 
                       output_metrics=output_metrics, agg_method=sum)
efect["% P"] = efect["ShotsOnTarget"] / efect["Shots"]
efect["EFFECT."] = efect["Goals"] / efect["Shots"]
efect[["Shots", "ShotsOnTarget", "% P", "Goals", "EFFECT."]].sort_values("EFFECT.", ascending=False)

Unnamed: 0,Shots,ShotsOnTarget,% P,Goals,EFFECT.
Barcelona,707.0,276.0,0.390382,105.0,0.148515
Ath Madrid,560.0,209.0,0.373214,80.0,0.142857
Real Madrid,626.0,246.0,0.392971,83.0,0.132588
Valencia,530.0,182.0,0.343396,68.0,0.128302
Villarreal,487.0,213.0,0.437372,61.0,0.125257
Malaga,443.0,178.0,0.401806,55.0,0.124153
Santander,442.0,137.0,0.309955,49.0,0.11086
Espanol,422.0,158.0,0.374408,46.0,0.109005
Almeria,416.0,145.0,0.348558,45.0,0.108173
Ath Bilbao,472.0,176.0,0.372881,47.0,0.099576


Numbers completely different!?!?

## Clasificación (Segunda División A)

In [6]:
group_key = "Team"
base_filters = {
        "Season": ["2008-2009"],
        "Div": ["SP2"],
        }
output_metrics = [
        "Points",
        "NumberOfMatches",
        "Win", "Draw", "Loss",
        "Goals", "GoalsOpp", #"GoalsDiff",
        ]
todate = fulldata[fulldata.Date <= u'2009-06-01']
segunda = clubs.get_summary(group_key, df=todate, base_filters=base_filters, 
                       output_metrics=output_metrics, agg_method=sum)
segunda.sort_values("Points", ascending=False)

Unnamed: 0,Points,NumberOfMatches,Win,Draw,Loss,Goals,GoalsOpp
Xerez,78,39,23,9,7,70.0,38.0
Tenerife,75,39,22,9,8,75.0,45.0
Zaragoza,74,39,21,11,7,71.0,38.0
Hercules,69,39,18,15,6,71.0,40.0
Vallecano,65,39,17,14,8,49.0,36.0
Sociedad,63,39,16,15,8,44.0,32.0
Salamanca,60,39,16,12,11,56.0,40.0
Levante,59,39,17,8,14,54.0,56.0
Castellon,56,39,14,14,11,48.0,41.0
Gimnastic,54,39,13,15,11,58.0,48.0


Spot on!