In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

In [2]:
import pandas as pd
%matplotlib inline

# 1. Lectura de tablas

In [4]:
CMP_BALLPOSSESSION = pd.read_csv('data/CMP_BALLPOSSESSION.csv')
CMP_TEAMSTATS = pd.read_csv('data/CMP_TEAMSTATS.csv')
CMP_TRACKING = pd.read_csv('data/CMP_TRACKING.csv')


* La tabla `CMP_BALLPOSSESSION` contiene informaciones acerca de las posesiones de los equipos: Frames de las posesiones por equipos, tiempo de posesión, pases correctos, regates, etc.

In [5]:
CMP_BALLPOSSESSION.head()

Unnamed: 0,IdTeamOwning,IdBallPossession,IdGame,IdFrameStart,IdFrameStop,VlPossessionBallDistance,VlPossessionSeconds,QtPassCorrect,QtPassFail,QtTakeOnCorrect,QtTakeOnFail,QtPassTotal,QtTakeOnTotal
0,175,1596184-1597397,943075,1596184,1597397,67.88,48.52,14,2,0,0,16,0
1,176,1465906-1466307,943075,1465906,1466307,14.78,16.04,0,0,0,0,0,0
2,176,1595681-1596183,943075,1595681,1596183,16.32,20.08,3,2,0,0,5,0
3,176,1599625-1601322,943075,1599625,1601322,76.28,67.88,16,3,0,0,19,0
4,175,1466308-1466360,943075,1466308,1466360,7.54,2.08,0,0,0,0,0,0


* La tabla `CMP_TEAMSTATS` contiene medidas relacionadas con posiciones en cada frame para cada equipo: Valor de las coordenadas máxima y mínima, distancia del equipo, área del convex hull, etc.

In [9]:
CMP_TEAMSTATS

Unnamed: 0,IdGame,IdTeam,IdFrame,IdEventType,HsGoal,QtGoals,DsMatchStatus,VlRealXMax,VlRealXMin,VlRealXAvg,VlRealYMax,VlRealYMin,VlRealYAvg,VlTeamsDistance,VlConvexHullArea
0,943075,175,1463268,,0,0,draw,66.55,31.41,51.309,58.38,20.69,36.819,10.311881,936.96695
1,943075,175,1463399,,0,0,draw,59.38,11.57,39.036,54.08,7.95,25.039,6.548679,1354.42225
2,943075,175,1463440,,0,0,draw,58.92,13.42,39.051,49.32,-0.15,19.629,7.013697,1303.95515
3,943075,175,1463457,,0,0,draw,59.22,14.84,39.906,47.52,-1.62,18.339,7.492541,1271.37535
4,943075,175,1463461,,0,0,draw,59.35,15.22,40.143,47.11,-1.75,18.122,7.620882,1261.37490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139038,943075,176,1553998,,0,0,loosing,72.97,52.57,62.884,47.53,16.49,32.509,17.422943,515.93515
139039,943075,176,1554029,,0,0,loosing,72.89,52.57,62.812,48.04,16.89,32.780,17.200142,523.08470
139040,943075,176,1554058,,0,0,loosing,73.05,52.56,62.797,48.46,16.89,32.822,17.235028,537.03800
139041,943075,176,1554092,,0,0,loosing,73.01,52.57,62.754,48.47,16.91,32.937,17.179426,535.54050


* La tabla `CMP_TRACKING` contiene informaciones acerca de las posiciones de todos los jugadores y de la pelota en cada frame, y medidas calculadas tales como distancia entre jugadores, velocidad, aceleración, etc.

In [12]:
print(CMP_TRACKING.shape)
CMP_TRACKING.head()

(1500000, 34)


Unnamed: 0,IdGame,IdPlayer,IdFrame,IdTeam,VlTrackingX,VlTrackingY,VlTrackingBallX,VlTrackingBallY,VlTrackingBallZ,VlPlayerSpeed,...,VlPlayerDistance,DsMovementCuadrant,DsMovementType,VlPlayerAcceleration,VlRealX_n,VlBallDistance,VlPitchSlide,VlPitchDirection,VlMovementDirection,IdBallPossession
0,943075,17871,1463126,175,870,278,-1,-64,0,0.09,...,0.659469,Bottom-Right,Diagonal,0.003412,61.7,9.357377,8,-1.0,1,
1,943075,76508,1463126,176,-1800,-1998,-1,-64,0,0.45,...,0.898721,Top-Right,Diagonal,0.069043,35.24,26.413551,5,1.0,-1,
2,943075,77390,1463126,175,181,962,-1,-64,0,0.2,...,26.276851,Bottom-Right,Horizontal,0.000607,54.91,10.420173,9,-1.0,1,
3,943075,100263,1463126,176,-1026,-756,-1,-64,0,0.16,...,0.072111,Top-Right,Diagonal,-0.405625,42.3,12.367251,7,1.0,-1,
4,943075,89335,1463126,175,859,-466,-1,-64,0,0.09,...,0.361386,Top-Left,Horizontal,0.009962,61.0,9.493176,8,-1.0,-1,


## Todas estas tablas se manipulan a través de SQL

In [13]:
import sqlite3
#Make the db in memory
conn = sqlite3.connect(':memory:')

#write the tables
CMP_TRACKING.to_sql('CMP_TRACKING', conn, index=False)
CMP_TEAMSTATS.to_sql('CMP_TEAMSTATS', conn, index=False)
CMP_BALLPOSSESSION.to_sql('CMP_BALLPOSSESSION', conn, index=False)

# 2. Query que calcula el tiempo en que un equipo se repliega al perder la posesión
La query devuelve los intervalos de posesión en los que el equipo seleccionado perdió la posesión, y el tiempo en segundos que tardó en replegarse

In [32]:
pd.set_option('display.max_rows', 100)

In [37]:
IdGame = 943075
IdTeam = 175

qry = '''
SELECT IdBallPossession,MIN(RN2)/25.0 AS Segundos
FROM(
SELECT IdBallPossession, IdFrame, SUM(VlConvexHullArea2) as Diff,
       ROW_NUMBER() OVER win2 as RN2
FROM(

    SELECT TRA.*, TEA.IdTeam,
           CASE WHEN IdTeam = {1} THEN -VlConvexHullArea else VlConvexHullArea end as VlConvexHullArea2
    FROM (SELECT DISTINCT IdFrame, IdBallPossession FROM CMP_TRACKING WHERE IdGame = {0}) TRA
    JOIN (SELECT * FROM CMP_TEAMSTATS WHERE IdGame = {0}) TEA
    ON TRA.IdFrame = TEA.IdFrame
    WHERE TRA.IdBallPossession IN

        (
        SELECT DISTINCT IdBallPossession
        FROM(
            SELECT IdBallPossession,IdFrame,SUM(VlConvexHullArea2) AS Diff,
                   ROW_NUMBER() OVER win1 AS RN
            FROM(
                SELECT TRA.*, TEA.IdTeam,
                       CASE WHEN IdTeam = {1} THEN -VlConvexHullArea ELSE VlConvexHullArea END AS VlConvexHullArea2
                FROM
                    (SELECT DISTINCT IdFrame, IdBallPossession 
                        FROM CMP_TRACKING
                        WHERE IdGame = {0}
                        ) TRA
                    JOIN (
                    SELECT * FROM CMP_TEAMSTATS 
                    WHERE IdGame = {0}
                        )TEA
                    ON TRA.IdFrame = TEA.IdFrame
                    WHERE TRA.IdBallPossession IN (SELECT IdBallPossession
                                                    FROM CMP_BALLPOSSESSION
                                                    WHERE IdTeamOwning <> {1}
                                                    AND IdGame = {0})
                    ORDER BY TRA.IdFrame,IdTeam
                )T1
                GROUP BY IdBallPossession,IdFrame
                WINDOW win1 as (PARTITION BY IdBallPossession ORDER BY IdFrame)
            )T2
            WHERE Diff<0 AND RN=1
        )
    ORDER BY TRA.IdFrame, IdTeam
    )T3
    GROUP BY IdBallPossession,IdFrame
    WINDOW win2 AS (PARTITION BY IdBallPossession ORDER BY IdFrame)
    )
    WHERE Diff>0
    GROUP BY IdBallPossession
    ORDER BY IdBallPossession
'''.format(IdGame,IdTeam)

df = pd.read_sql_query(qry,conn)
df.iloc[0:100,]

Unnamed: 0,IdBallPossession,Segundos
0,1463895-1463925,0.4
1,1464232-1464721,2.12
2,1465906-1466307,4.4
3,1468043-1468092,1.68
4,1468217-1468648,0.64
5,1469778-1470893,1.36
6,1481407-1482918,1.72
7,1484608-1484684,2.08
8,1490758-1491795,5.76
9,1501179-1502224,6.32


# 3. Visualización 

In [None]:
def ploting_area_team(IdGame,IdTeam,IdFrame,h_pitch,w_pitch,save=False):
    """This function plots teams' area. 
    
    Args:
        IdGame (int): Identifier of Game.
        IdFrame (int): Identifier of Frame number
        h_pitch (int): height of the pitch.
        w_pitch (int): width of the pitch.
        save (boolean): True for saving the png file
       
    Returns:
        visualization
    """
    from scipy.spatial import ConvexHull
    import matplotlib.pyplot as plt
    import numpy as np
        
    df = pd.read_sql_query('''
        SELECT T.IdGame, T.IdTeam, P.NuJersey, T.IdFrame, T.IdPlayer, T.VlRealX, T.VlRealY,VlRealBallX,VlRealBallY,
                VlConvexHullArea
        FROM (select * from CMP_Tracking where IdGame={0}) T              
        JOIN( 
            SELECT a.NuJersey, b.IdSeason, b.IdGame, a.IdPlayer, a.NaPosition
            FROM OPT_Player a
            JOIN OPT_Game b
              ON b.idSeason = a.IdSeason
            WHERE NaPosition != 'Goalkeeper'
             AND b.IdGame = {0}
            ) AS P
        ON T.IdPlayer = P.IdPlayer
        JOIN cmp_teamstats STA ON T.IdGame=STA.IdGame and T.IdFrame=STA.IdFrame and T.IdTeam=STA.IdTeam
        WHERE T.IdGame = P.IdGame
        AND T.IdFrame = {1}
                            
        
        '''.format(IdGame,IdFrame),conn)
    
    pitch = plt.imread('statszone_football_pitch.png')
    fig, ax = plt.subplots(figsize=(15, 10))

    points1 = np.array(df.loc[df['IdTeam']==IdTeam,['VlRealX','VlRealY']])   
    hull1 = ConvexHull(points1)
    points2 = np.array(df.loc[df['IdTeam']!=IdTeam,['VlRealX','VlRealY']])   
    hull2 = ConvexHull(points2)
    
    minseg = pd.read_sql_query('''select IdMinute,IdSecond from CMP_TIME 
                        where IdGame={0} and IdFrame ={1}
                        '''.format(IdGame,IdFrame),conn)

    
    
    for simplex in hull1.simplices:
        plt.plot(points1[simplex, 0], points1[simplex, 1], 'k', linewidth=5)
    plt.plot(points1[:,0], points1[:,1], 'ro', markersize=30)

    for simplex in hull2.simplices:
        plt.plot(points2[simplex, 0], points2[simplex, 1], 'y', linewidth=5)
    plt.plot(points2[:,0], points2[:,1], 'bo', markersize=30)
    
    annotate_pos = list(zip(df.VlRealX.tolist(), df.VlRealY.tolist()))
    annotate_label = list(zip(df.NuJersey.tolist(), list(annotate_pos)))
    
    
        # Position of the ball
    B  = df[['VlRealBallX','VlRealBallY']].values.tolist()[0]

    plt.plot(B[0], B[1],marker='o', linestyle='-', color='w',markersize=12,markeredgecolor='k',zorder=10)
    
    plt.imshow(pitch, zorder=0, extent=[0, h_pitch, 0, w_pitch], aspect='auto')
    
    for txt, xy in annotate_label:        
        x, y = xy
        xtxt = x - 0.6
        ytxt = y - 0.3
        plt.annotate(txt, xy, xytext=(xtxt, ytxt), weight='bold', color='w', fontsize=12)
    
    plt.annotate('Área propia = '+str(int(df.loc[df['IdTeam']==IdTeam,'VlConvexHullArea'].min())), (10,10), xytext=(5, 65), weight='bold', color='w', fontsize=15)
    plt.annotate('Área rival = '+str(int(df.loc[df['IdTeam']!=IdTeam,'VlConvexHullArea'].min())), (10,10), xytext=(5, 62), weight='bold', color='w', fontsize=15)
    
    plt.annotate(str(minseg.IdMinute.min())+':'+str(minseg.IdSecond.min()), (10,10), xytext=(35, 65), weight='bold', color='w', fontsize=25)
    
    
    plt.axis([0, h_pitch, 0, w_pitch])
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    
    if save: 
        plt.savefig('convexhull_imgs/areas'+str(IdFrame)+'.png')
        plt.close(fig)

In [None]:
IdGame = 943075
IdTeam = 175
IdFrame = 1524081
w_pitch = 68
h_pitch = 105

In [None]:
ploting_area_team(IdGame,IdTeam,IdFrame,h_pitch,w_pitch)

### Generación de animaciones a partir de imágenes