In [None]:
import pandas as pd
import requests
import json
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [None]:
events= pd.read_json('events_England.json')

In [None]:
events.head()

In [None]:
events.info()

In [None]:
matches= pd.read_json('matches_England.json')

In [None]:
matches.head()

In [None]:
matches.info()

In [None]:
matches.rename({'wyId': 'matchId'}, axis=1, inplace=True)

In [None]:
df = pd.merge(matches, events, on=['matchId'], how='outer')

In [None]:
df.head()

In [None]:
teams= pd.read_json('teams.json')

In [None]:
teams.head()

In [None]:
# drop everything except for the team name
teams.drop(['city','name','area','type'], axis=1, inplace=True)

# rename wyId to teamId
teams.rename({'wyId': 'teamId'}, axis=1, inplace=True)

In [None]:
# creating a dictionary to use replace teamIds with names in the primary dataframe
teamName = teams.set_index('teamId').to_dict()['officialName']

In [None]:
# use the dictionary to remap the teamId and winner to the team names from the teams dataframe
df.replace({'teamId':teamName}, inplace=True)
df.replace({'winner':teamName}, inplace=True)

In [None]:
# drop some of the columns that we don't need and rename "teamId" to "team"
df.drop(['matchId','roundId','seasonId','teamsData','status','roundId','tags','venue','competitionId','referees','dateutc','date','duration'], axis=1, inplace=True)

df.rename({'teamId': 'team'}, axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df['location']=df['positions'].apply(tuple)
df['location'] = df['positions'].apply(lambda x: tuple(x))

In [None]:
df.groupby(df.positions.astype(str))['eventName'].value_counts()

In [None]:
df.groupby('team')['eventName'].value_counts()

In [None]:
df.team.unique()

In [None]:
# create a dataframe with only Manchester City events
manCit = df[df.team == 'Liverpool FC']

In [None]:
manCit.head()

In [None]:
# now a dataframe containing just their first game
weekOneMC = manCit[manCit.gameweek == 4]

In [None]:
weekOneMC.info()

The position column contains x,y coordinates for the field grid representing the start and end position of each action. To be able to plot this we need to reduce this to one of those. I am going to go with the first set or starting position.

In [None]:
# convert position into an x and y column for plotting purposes
# retrieved from: https://github.com/Friends-of-Tracking-Data-FoTD/mapping-match-events-in-Python/blob/master/data_exploration.ipynb

weekOneMC['x'] = [x[0]['x'] for x in weekOneMC['positions']]
weekOneMC['y'] = [x[0]['y'] for x in weekOneMC['positions']]

In [None]:
weekOneMC.drop(['positions','team','location'], axis=1, inplace=True)

In [None]:
weekOneMC.head()

In [None]:
weekOneMC.playerId.unique()

In [None]:
weekOneMC.groupby('playerId')['eventSec'].sum()

In [None]:
weekOneMC.groupby('playerId')['eventSec'].nlargest(11).sum(level=0).nsmallest(4)

In [None]:
players = pd.read_json('players.json')

In [None]:
players.head()

In [None]:
players.set_index("wyId", inplace=True)

In [None]:
playerName= players[['firstName','lastName','shortName']]

In [None]:
playerName.loc[340386]

In [None]:
# list out the playerId for the 4 players with the fewest minutes   
bottom = [0,340386,71654,8325]

# create a dataframe with only the 11 players who played the most minutes
for i in bottom:
    weekOneMC.drop(weekOneMC.index[weekOneMC['playerId'] == i], inplace = True)


In [None]:
weekOneMC.playerId.unique()

In [None]:
weekOneMC.describe()

In [None]:
weekOneMC['x'] = (weekOneMC['x']/100)*120
weekOneMC['y'] = (weekOneMC['y']/100)*80

In [None]:
import matplotlib.pyplot as plt
plt.scatter(weekOneMC['x'], weekOneMC['y'],
             c=weekOneMC['playerId'])

In [None]:
from mplsoccer.pitch import Pitch
import matplotsoccer
import matplotlib.pyplot as plt
plt.style.use('ggplot')

pitch = Pitch(pitch_color='grass', line_color='white', stripe=True, figsize=(10, 8))

fig, ax = pitch.draw()

sc = pitch.scatter(weekOneMC.x, weekOneMC.y,
                   c=weekOneMC.playerId, cmap='hsv',
                   s=10, label='scatter', ax=ax)



In [None]:
# creating a dictionary to use replace teamIds with names in the primary dataframe
playerDict = playerName.to_dict()['lastName']

In [None]:
# use the dictionary to remap the playerId and winner to the last name
weekOneMC.replace({'playerId':playerDict}, inplace=True)

In [None]:
# function to find the centroid for each player
def centroid(xVal, yVal, player):
    xCent = np.sum(xVal)/len(xVal)
    yCent = np.sum(yVal)/len(yVal)
    center.append(xCent)
    center.append(yCent)
    center.append(player)
    return center

In [None]:
playerList=weekOneMC.playerId.unique()
print(playerList)

Starting lineup: Moraes, Danilo, Stones, Otamendi, Mendy, Walker, De Bruyne, da Silva, de Jesus, 

In [None]:
# list out the playerId for the 4 players with the fewest minutes   
bottom = ['Milner','Oxlade-Chamberlain','Solanke',0,'Mignolet']

# create a dataframe with only the 11 players who played the most minutes
for i in bottom:
    weekOneMC.drop(weekOneMC.index[weekOneMC['playerId'] == i], inplace = True)


In [None]:
halfOne = weekOneMC[weekOneMC['matchPeriod'] == '1H']

In [None]:
from pandas import DataFrame

form = []
for i in playerList:
    center = []
    role = halfOne[halfOne.playerId == i]
    #centroid(role.x, role.y, i)
    
    form.append(centroid(role.x, role.y, i))
formation = DataFrame (form,columns=['x','y','name'])    

In [None]:
formation.head(11)

In [None]:
pitch = Pitch(pitch_color='grass', line_color='white', stripe=True)

fig, ax = pitch.draw()

sc = pitch.scatter(formation.x, formation.y, 
                   cmap='hsv',
                   s=50, label='scatter', ax=ax)

for i, txt in enumerate(formation.name):
    ax.annotate(txt, (formation.x[i], formation.y[i]))


In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Arc, Rectangle, ConnectionPatch
from matplotlib.offsetbox import  OffsetImage

from functools import reduce

In [None]:
def draw_pitch(ax):
    # focus on only half of the pitch
    #Pitch Outline & Centre Line
    Pitch = Rectangle([0,0], width = 120, height = 80, fill = False)
    #Left, Right Penalty Area and midline
    LeftPenalty = Rectangle([0,22.3], width = 14.6, height = 35.3, fill = False)
    RightPenalty = Rectangle([105.4,22.3], width = 14.6, height = 35.3, fill = False)
    midline = ConnectionPatch([60,0], [60,80], "data", "data")

    #Left, Right 6-yard Box
    LeftSixYard = Rectangle([0,32], width = 4.9, height = 16, fill = False)
    RightSixYard = Rectangle([115.1,32], width = 4.9, height = 16, fill = False)


    #Prepare Circles
    centreCircle = plt.Circle((60,40),8.1,color="black", fill = False)
    centreSpot = plt.Circle((60,40),0.71,color="black")
    #Penalty spots and Arcs around penalty boxes
    leftPenSpot = plt.Circle((9.7,40),0.71,color="black")
    rightPenSpot = plt.Circle((110.3,40),0.71,color="black")
    leftArc = Arc((9.7,40),height=16.2,width=16.2,angle=0,theta1=310,theta2=50,color="black")
    rightArc = Arc((110.3,40),height=16.2,width=16.2,angle=0,theta1=130,theta2=230,color="black")
    
    element = [Pitch, LeftPenalty, RightPenalty, midline, LeftSixYard, RightSixYard, centreCircle, 
               centreSpot, rightPenSpot, leftPenSpot, leftArc, rightArc]
    for i in element:
        ax.add_patch(i)

In [None]:
fig=plt.figure() #set up the figures
fig.set_size_inches(7, 5)
ax=fig.add_subplot(1,1,1)
draw_pitch(ax) #overlay our different objects on the pitch

plt.ylim(-2, 82)
plt.xlim(-2, 122)
plt.axis('off')

plt.scatter(formation.y, formation.x)

plt.show()

In [None]:
plt.scatter(formation.y, formation.x)