# PCA passage patterns

Notebook to reproduce pass pattern PCA shown in lecture 5

#### Functions

In [27]:
def all_teams():
    return {'Austria',
     'Belgium',
     'Croatia',
     'Czech_Republic',
     'Denmark',
     'England',
     'Finland',
     'France',
     'Germany',
     'Hungary',
     'Italy',
     'Netherlands',
     'MACEDONIA_REPUBLIC_OF',
     'Poland',
     'Portugal',
     'Russia',
     'Scotland',
     'Slovakia',
     'Spain',
     'Sweden',
     'Switzerland',
     'Turkey',
     'Ukraine',
     'Wales'}
    

In [23]:
def all_patterns():
    return ["ABAB","ABAC","ABCB","ABCA","ABCD"]

In [24]:
def cypherify(string, team = None):
    letters = list(string)

    if team:
        query = "MATCH p=(A:"+team+")"
        for i in range(len(string) - 1):
            query += "-[p" + str(i) + ":PASS]->(" + letters[i + 1] + ":"+team+")"
    else:
        query = "MATCH (A)"
        for i in range(len(string) - 1):
            query += "-[p" + str(i) + ":PASS]->(" + letters[i + 1] + ")"

    query += "\nWHERE "

    #correct order
    first = True
    for i in range(len(string) - 2):
        if first:
            query += "p" + str(i) + ".order + 1 = p" + str(i + 1) + ".order"
            first = False
        else:
            query += " and p" + str(i) + ".order + 1 = p" + str(i + 1) + ".order"

    #same possession
    first = True
    for i in range(len(string) - 2):
        if first:
            query += " and p" + str(i) + ".possession = p" + str(i + 1) + ".possession"
            first = False
        else:
            query += " and p" + str(i) + ".possession = p" + str(i + 1) + ".possession"

    #same match
    first = True
    for i in range(len(string) - 2):
        if first:
            query += " and p" + str(i) + ".match_id = p" + str(i + 1) + ".match_id"
            first = False
        else:
            query += " and p" + str(i) + ".match_id = p" + str(i + 1) + ".match_id"

    #different players
    unorderedPairGenerator = ((x, y) for x in set(letters) for y in set(letters) if y > x)
    query += " and " + " and ".join([x + ".name <>" + " " +y + ".name" for x, y in list(unorderedPairGenerator)])
    
    
    query += "\nRETURN COUNT(p)"
    return query

In [25]:
from neo4j import GraphDatabase
import logging
from neo4j.exceptions import ServiceUnavailable
import streamlit as st
class App:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        # Don't forget to close the driver connection when you are finished with it
        self.driver.close()

    

    def find_pattern(self, query_string):
        with self.driver.session() as session:
            result = session.read_transaction(self._find_and_return_pattern, query_string)
            for row in result:
                print(row)
                return row["cnt"]


    @staticmethod
    def _find_and_return_pattern(tx, query_string):
        result = tx.run(query_string+" as cnt")
        return [row for row in result]

#### Pattern matching

In [73]:
uri = "bolt://162.55.245.102"
user = "neo4j"
password = "axis-bruno-lorenzo-banana-arthur-8328"
app = App(uri, user, password)

res = {}
for team in all_teams():
    res[team.upper()] = {}

for team in all_teams():
    team = team.upper()
    print("Matching "+team+"...")
    for pattern in all_patterns():
        query = cypherify(pattern, team)
        cnt = app.find_pattern(query)
        res[team][pattern] = cnt
    

Matching FINLAND...
<Record cnt=31>
<Record cnt=98>
<Record cnt=105>
<Record cnt=54>
<Record cnt=329>
Matching CZECH_REPUBLIC...
<Record cnt=39>
<Record cnt=181>
<Record cnt=147>
<Record cnt=91>
<Record cnt=610>
Matching AUSTRIA...
<Record cnt=35>
<Record cnt=182>
<Record cnt=163>
<Record cnt=99>
<Record cnt=769>
Matching MACEDONIA_REPUBLIC_OF...
<Record cnt=26>
<Record cnt=90>
<Record cnt=77>
<Record cnt=44>
<Record cnt=359>
Matching BELGIUM...
<Record cnt=57>
<Record cnt=269>
<Record cnt=256>
<Record cnt=156>
<Record cnt=1218>
Matching CROATIA...
<Record cnt=18>
<Record cnt=177>
<Record cnt=172>
<Record cnt=109>
<Record cnt=745>
Matching SWEDEN...
<Record cnt=18>
<Record cnt=109>
<Record cnt=103>
<Record cnt=58>
<Record cnt=505>
Matching SWITZERLAND...
<Record cnt=46>
<Record cnt=207>
<Record cnt=196>
<Record cnt=116>
<Record cnt=899>
Matching SPAIN...
<Record cnt=90>
<Record cnt=516>
<Record cnt=492>
<Record cnt=276>
<Record cnt=2246>
Matching TURKEY...
<Record cnt=25>
<Record cnt=1

In [61]:
#games played normalization (not used)
games_played = {}
for team in all_teams():
    
    
    query = """
    MATCH (g:GAME)
    WITH g.teams as t, g
    WHERE '""" + team + """' in t = true
    RETURN count(g)"""
    
    #query = "MATCH (a:"+team+")-[:HAS_PLAYED]->(g:GAME) RETURN COUNT(DISTINCT(g))"
    
    team = team.upper()
    
    cnt = app.find_pattern(query)
    
    #print(team,str(cnt))
    games_played[team] = cnt


games_played["CZECH_REPUBLIC"] = 5
games_played["MACEDONIA_REPUBLIC_OF"] = 3

print(games_played)

<Record cnt=3>
<Record cnt=0>
<Record cnt=4>
<Record cnt=0>
<Record cnt=5>
<Record cnt=4>
<Record cnt=4>
<Record cnt=5>
<Record cnt=6>
<Record cnt=3>
<Record cnt=4>
<Record cnt=4>
<Record cnt=3>
<Record cnt=3>
<Record cnt=4>
<Record cnt=3>
<Record cnt=3>
<Record cnt=7>
<Record cnt=7>
<Record cnt=4>
<Record cnt=4>
<Record cnt=3>
<Record cnt=5>
<Record cnt=6>


In [72]:
res_norm = {}
for team in all_teams():
    res_norm[team.upper()] = {}
for team in games_played:
    for value in res[team]:
        res_norm[team][value] = round(res[team][value] / games_played[team] , 2 )

In [75]:
#percentage normalization
percentage = {}
for team in all_teams():
    percentage[team.upper()] = {} 

for team in res:
    tot = 0
    for pattern in res[team]:
        tot += res[team][pattern]
    
    for pattern in res[team]:
        percentage[team][pattern] = round(res[team][pattern] / tot, 2)

In [76]:
percentage

{'FINLAND': {'ABAB': 0.05,
  'ABAC': 0.16,
  'ABCB': 0.17,
  'ABCA': 0.09,
  'ABCD': 0.53},
 'CZECH_REPUBLIC': {'ABAB': 0.04,
  'ABAC': 0.17,
  'ABCB': 0.14,
  'ABCA': 0.09,
  'ABCD': 0.57},
 'AUSTRIA': {'ABAB': 0.03,
  'ABAC': 0.15,
  'ABCB': 0.13,
  'ABCA': 0.08,
  'ABCD': 0.62},
 'MACEDONIA_REPUBLIC_OF': {'ABAB': 0.04,
  'ABAC': 0.15,
  'ABCB': 0.13,
  'ABCA': 0.07,
  'ABCD': 0.6},
 'BELGIUM': {'ABAB': 0.03,
  'ABAC': 0.14,
  'ABCB': 0.13,
  'ABCA': 0.08,
  'ABCD': 0.62},
 'CROATIA': {'ABAB': 0.01,
  'ABAC': 0.14,
  'ABCB': 0.14,
  'ABCA': 0.09,
  'ABCD': 0.61},
 'SWEDEN': {'ABAB': 0.02,
  'ABAC': 0.14,
  'ABCB': 0.13,
  'ABCA': 0.07,
  'ABCD': 0.64},
 'SWITZERLAND': {'ABAB': 0.03,
  'ABAC': 0.14,
  'ABCB': 0.13,
  'ABCA': 0.08,
  'ABCD': 0.61},
 'SPAIN': {'ABAB': 0.02,
  'ABAC': 0.14,
  'ABCB': 0.14,
  'ABCA': 0.08,
  'ABCD': 0.62},
 'TURKEY': {'ABAB': 0.03,
  'ABAC': 0.15,
  'ABCB': 0.13,
  'ABCA': 0.09,
  'ABCD': 0.6},
 'WALES': {'ABAB': 0.02,
  'ABAC': 0.14,
  'ABCB': 0.12,
  'A

In [113]:
import json
json.dump(percentage, open("pca_table.json", "w"))

#### Visualization

In [77]:
import pandas as pd

df = pd.DataFrame(percentage).T
df

Unnamed: 0,ABAB,ABAC,ABCB,ABCA,ABCD
FINLAND,0.05,0.16,0.17,0.09,0.53
CZECH_REPUBLIC,0.04,0.17,0.14,0.09,0.57
AUSTRIA,0.03,0.15,0.13,0.08,0.62
MACEDONIA_REPUBLIC_OF,0.04,0.15,0.13,0.07,0.6
BELGIUM,0.03,0.14,0.13,0.08,0.62
CROATIA,0.01,0.14,0.14,0.09,0.61
SWEDEN,0.02,0.14,0.13,0.07,0.64
SWITZERLAND,0.03,0.14,0.13,0.08,0.61
SPAIN,0.02,0.14,0.14,0.08,0.62
TURKEY,0.03,0.15,0.13,0.09,0.6


In [78]:
import plotly.express as px

features = all_patterns()

fig = px.scatter_matrix(
    df,
    dimensions=features,
    color=df.index
)
fig.update_traces(diagonal_visible=True)
fig.show()

In [114]:
import plotly.express as px
from sklearn.decomposition import PCA

pca = PCA()
components = pca.fit_transform(df)
labels = {
    str(i): f"PC {i+1} ({var:.1f}%)"
    for i, var in enumerate(pca.explained_variance_ratio_ * 100)
}

fig = px.scatter_matrix(
    components,
    labels=labels,
    dimensions=range(4),
    color=df.index
)
fig.update_traces()
fig.show()

In [109]:
pca = PCA(n_components=2)
components = pca.fit_transform(df)

fig = px.scatter(components, x=0, y=1, color=df.index, text = df.index)
fig.update_traces(textposition='bottom center', showlegend = False, textfont_size = 9)
fig.update_traces(marker=dict(size=12,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'),
                 )
fig.show()