# Rumble preprocessing notebook

This notebook is used to preprocess the data, converting it from json to nodes and relationships.

The original dataset can be found here: https://github.com/statsbomb/open-data/tree/master/data

In order to run this notebook, RumbleDB is needed. Info here: https://rumbledb.org/

In [1]:
import requests
import json
import time
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def rumble(line, cell=None):
    if cell is None:
        data = line
    else:
        data = cell
    
    
    start = time.time()                                                         
    response = json.loads(requests.post(server, data=data, params = {"result-size" : 1000}).text)                   
    end = time.time()                                                              
    print("Took: %s s" % (end - start))
    if 'warning' in response:
        print(json.dumps(response['warning']))
    if 'values' in response:
        for e in response['values']:
            print(json.dumps(e))
            print()
        #with open('matches_id.json', 'w', encoding="utf-8") as outfile:
            #json.dump(response['values'], outfile, ensure_ascii = False)
    elif 'error-message' in response:
        return response['error-message']
    else:
        return response

In [2]:
def rumble_return(line, cell=None):
    if cell is None:
        data = line
    else:
        data = cell
    
    
    start = time.time()                                                         
    response = json.loads(requests.post(server, data=data, params = {"result-size" : 10000}).text)                   
    end = time.time()                                                              
    #print("Took: %s s" % (end - start))
    if 'warning' in response:
        print(json.dumps(response['warning']))
    if 'values' in response:
        return json.dumps(response['values'])
        
        with open('matches_id_UT.json', 'w', encoding="utf-8") as outfile:
            json.dump(response['values'], outfile, ensure_ascii=False)
    elif 'error-message' in response:
        return response['error-message']
    else:
        return response

In [3]:
server = 'http://localhost:8001/jsoniq'
neo_uri = "neo4j+s://00e145e7.databases.neo4j.io:7687"
neo_user = "soccer_analytics"
neo_pass = "night-candle-miracle-nickel-declare-32"

### Loading players

In [4]:
from neo4j import GraphDatabase
import logging
from neo4j.exceptions import ServiceUnavailable
import streamlit as st
class App:

    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))

    def close(self):
        # Don't forget to close the driver connection when you are finished with it
        self.driver.close()

    def create_player(self, query):
        with self.driver.session() as session:
            # Write transactions allow the driver to handle retries and transient errors
            result = session.write_transaction(
                self._create_and_return_player, query)
            #for row in result:
                #print("Created player: {p1}".format(p1=row))

    @staticmethod
    def _create_and_return_player(tx, query):
        # To learn more about the Cypher syntax, see https://neo4j.com/docs/cypher-manual/current/
        # The Reference Card is also a good resource for keywords https://neo4j.com/docs/cypher-refcard/current/
        
        result = tx.run(query)

        try:
            return [{"": row["x"]["name"]}
                    for row in result]
        # Capture any errors along with the query and data for traceability
        except ServiceUnavailable as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query, exception=exception))
            raise
    
    def create_relationship(self, query):
        with self.driver.session() as session:
            # Write transactions allow the driver to handle retries and transient errors
            result = session.write_transaction(
                self._create_and_return_relationship, query)
            

    @staticmethod
    def _create_and_return_relationship(tx, diz):

        result = tx.run(query)
        try:
            return [{"row": row}
                    for row in result]

        # Capture any errors along with the query and data for traceability
        except ServiceUnavailable as exception:
            logging.error("{query} raised an error: \n {exception}".format(
                query=query, exception=exception))
            raise

2022-04-24 21:35:24.833 INFO    numexpr.utils: NumExpr defaulting to 8 threads.


In [136]:
user = neo_user
password = neo_pass
uri = neo_uri
app = App(uri, user, password)

In [6]:

a = rumble_return("""let $a := for $matches in json-doc("matches_id.json")[]
for $match in $matches[]
for $doc in json-doc("lineups_cleaned/" || string($match) || ".json")
for $team in $doc[]
for $player in $team.lineup[]
return { "name" : $player.player_name, "player_id":$player.player_id, "jersey_number":$player.jersey_number, "country":$player.country.name}

for $player in $a
group by $player_name := $player.name, $player_id:= $player.player_id, $jersey_number := $player.jersey_number, $country := $player.country
return { "name" : $player_name, "player_id":$player_id, "jersey_number":$jersey_number, "country":$country}""")



In [103]:
def jsontoCypher(x):
    
    if type(x) is str:
        
        return '"'+x+'"'
    
    elif type(x) is dict:
        ret = []
        for key in x:
            ret.append(key+":"+jsontoCypher(x[key]))
        b = ", ".join(ret)
        return "{ " + b +" }"
     
    elif type(x) is list:
        vect = []
        for y in x:
            vect.append(jsontoCypher(y))
        return "["+", ".join(vect)+"]"
    elif type(x) is int or bool:
        
        return str(x)
    

In [119]:
js = json.loads(a)


for i in range(len(js)):
    x = js[i]
    args = ""
    args = "{"+ ", ".join([key+':"'+str(x[key]).replace('"',"'")+'"' for key in x])+"}"
    query = 'CREATE (x:PLAYER:'+x["country"].upper().replace(" ", "_").replace(",", "")+' '+jsontoCypher(x)+' ) RETURN x'
    app.create_player(query)

{name:"Michael Krmencik", player_id:"16037", jersey_number:"11", country:"Czech Republic"}
{ name:"Michael Krmencik", player_id:16037, jersey_number:11, country:"Czech Republic" }
CREATE (x:PLAYER:CZECH_REPUBLIC { name:"Michael Krmencik", player_id:16037, jersey_number:11, country:"Czech Republic" } ) RETURN x
Created player: {'': 'Michael Krmencik'}
{name:"Silvan Widmer", player_id:"7796", jersey_number:"3", country:"Switzerland"}
{ name:"Silvan Widmer", player_id:7796, jersey_number:3, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Silvan Widmer", player_id:7796, jersey_number:3, country:"Switzerland" } ) RETURN x
Created player: {'': 'Silvan Widmer'}
{name:"lukasz Fabianski", player_id:"3262", jersey_number:"22", country:"Poland"}
{ name:"lukasz Fabianski", player_id:3262, jersey_number:22, country:"Poland" }
CREATE (x:PLAYER:POLAND { name:"lukasz Fabianski", player_id:3262, jersey_number:22, country:"Poland" } ) RETURN x
Created player: {'': 'lukasz Fabianski'}
{name:

Created player: {'': 'Stefan Ilsanker'}
{name:"Hakan calhanoglu", player_id:"7039", jersey_number:"10", country:"Turkey"}
{ name:"Hakan calhanoglu", player_id:7039, jersey_number:10, country:"Turkey" }
CREATE (x:PLAYER:TURKEY { name:"Hakan calhanoglu", player_id:7039, jersey_number:10, country:"Turkey" } ) RETURN x
Created player: {'': 'Hakan calhanoglu'}
{name:"Admir Mehmedi", player_id:"8219", jersey_number:"18", country:"Switzerland"}
{ name:"Admir Mehmedi", player_id:8219, jersey_number:18, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Admir Mehmedi", player_id:8219, jersey_number:18, country:"Switzerland" } ) RETURN x
Created player: {'': 'Admir Mehmedi'}
{name:"Jan Bednarek", player_id:"4626", jersey_number:"5", country:"Poland"}
{ name:"Jan Bednarek", player_id:4626, jersey_number:5, country:"Poland" }
CREATE (x:PLAYER:POLAND { name:"Jan Bednarek", player_id:4626, jersey_number:5, country:"Poland" } ) RETURN x
Created player: {'': 'Jan Bednarek'}
{name:"Karol Line

Created player: {'': 'Thomas Mueller'}
{name:"Karim Onisiwo", player_id:"11278", jersey_number:"20", country:"Austria"}
{ name:"Karim Onisiwo", player_id:11278, jersey_number:20, country:"Austria" }
CREATE (x:PLAYER:AUSTRIA { name:"Karim Onisiwo", player_id:11278, jersey_number:20, country:"Austria" } ) RETURN x
Created player: {'': 'Karim Onisiwo'}
{name:"Glen Kamara", player_id:"13215", jersey_number:"6", country:"Finland"}
{ name:"Glen Kamara", player_id:13215, jersey_number:6, country:"Finland" }
CREATE (x:PLAYER:FINLAND { name:"Glen Kamara", player_id:13215, jersey_number:6, country:"Finland" } ) RETURN x
Created player: {'': 'Glen Kamara'}
{name:"Nathan Patterson", player_id:"37338", jersey_number:"22", country:"Scotland"}
{ name:"Nathan Patterson", player_id:37338, jersey_number:22, country:"Scotland" }
CREATE (x:PLAYER:SCOTLAND { name:"Nathan Patterson", player_id:37338, jersey_number:22, country:"Scotland" } ) RETURN x
Created player: {'': 'Nathan Patterson'}
{name:"Sergio Bus

Created player: {'': 'David Zima'}
{name:"Marek Rodak", player_id:"4152", jersey_number:"23", country:"Slovakia"}
{ name:"Marek Rodak", player_id:4152, jersey_number:23, country:"Slovakia" }
CREATE (x:PLAYER:SLOVAKIA { name:"Marek Rodak", player_id:4152, jersey_number:23, country:"Slovakia" } ) RETURN x
Created player: {'': 'Marek Rodak'}
{name:"Teemu Pukki", player_id:"9476", jersey_number:"10", country:"Finland"}
{ name:"Teemu Pukki", player_id:9476, jersey_number:10, country:"Finland" }
CREATE (x:PLAYER:FINLAND { name:"Teemu Pukki", player_id:9476, jersey_number:10, country:"Finland" } ) RETURN x
Created player: {'': 'Teemu Pukki'}
{name:"Joe Allen", player_id:"10931", jersey_number:"7", country:"Wales"}
{ name:"Joe Allen", player_id:10931, jersey_number:7, country:"Wales" }
CREATE (x:PLAYER:WALES { name:"Joe Allen", player_id:10931, jersey_number:7, country:"Wales" } ) RETURN x
Created player: {'': 'Joe Allen'}
{name:"Stanislav Lobotka", player_id:"6798", jersey_number:"22", countr

Created player: {'': 'Jason Denayer'}
{name:"Ilkay Guendogan", player_id:"10287", jersey_number:"21", country:"Germany"}
{ name:"Ilkay Guendogan", player_id:10287, jersey_number:21, country:"Germany" }
CREATE (x:PLAYER:GERMANY { name:"Ilkay Guendogan", player_id:10287, jersey_number:21, country:"Germany" } ) RETURN x
Created player: {'': 'Ilkay Guendogan'}
{name:"Kevin Volland", player_id:"8215", jersey_number:"9", country:"Germany"}
{ name:"Kevin Volland", player_id:8215, jersey_number:9, country:"Germany" }
CREATE (x:PLAYER:GERMANY { name:"Kevin Volland", player_id:8215, jersey_number:9, country:"Germany" } ) RETURN x
Created player: {'': 'Kevin Volland'}
{name:"Stefan Posch", player_id:"11821", jersey_number:"5", country:"Austria"}
{ name:"Stefan Posch", player_id:11821, jersey_number:5, country:"Austria" }
CREATE (x:PLAYER:AUSTRIA { name:"Stefan Posch", player_id:11821, jersey_number:5, country:"Austria" } ) RETURN x
Created player: {'': 'Stefan Posch'}
{name:"Robert Skov", player_

Created player: {'': 'Remo Freuler'}
{name:"Alessandro Bastoni", player_id:"7480", jersey_number:"23", country:"Italy"}
{ name:"Alessandro Bastoni", player_id:7480, jersey_number:23, country:"Italy" }
CREATE (x:PLAYER:ITALY { name:"Alessandro Bastoni", player_id:7480, jersey_number:23, country:"Italy" } ) RETURN x
Created player: {'': 'Alessandro Bastoni'}
{name:"Denys Popov", player_id:"32753", jersey_number:"25", country:"Ukraine"}
{ name:"Denys Popov", player_id:32753, jersey_number:25, country:"Ukraine" }
CREATE (x:PLAYER:UKRAINE { name:"Denys Popov", player_id:32753, jersey_number:25, country:"Ukraine" } ) RETURN x
Created player: {'': 'Denys Popov'}
{name:"Michal Duris", player_id:"10927", jersey_number:"21", country:"Slovakia"}
{ name:"Michal Duris", player_id:10927, jersey_number:21, country:"Slovakia" }
CREATE (x:PLAYER:SLOVAKIA { name:"Michal Duris", player_id:10927, jersey_number:21, country:"Slovakia" } ) RETURN x
Created player: {'': 'Michal Duris'}
{name:"Kristoffer Olsso

Created player: {'': 'William Silva de Carvalho'}
{name:"Leonardo Bonucci", player_id:"7173", jersey_number:"19", country:"Italy"}
{ name:"Leonardo Bonucci", player_id:7173, jersey_number:19, country:"Italy" }
CREATE (x:PLAYER:ITALY { name:"Leonardo Bonucci", player_id:7173, jersey_number:19, country:"Italy" } ) RETURN x
Created player: {'': 'Leonardo Bonucci'}
{name:"Paulus Arajuuri", player_id:"24455", jersey_number:"2", country:"Finland"}
{ name:"Paulus Arajuuri", player_id:24455, jersey_number:2, country:"Finland" }
CREATE (x:PLAYER:FINLAND { name:"Paulus Arajuuri", player_id:24455, jersey_number:2, country:"Finland" } ) RETURN x
Created player: {'': 'Paulus Arajuuri'}
{name:"Jules Kounde", player_id:"4445", jersey_number:"25", country:"France"}
{ name:"Jules Kounde", player_id:4445, jersey_number:25, country:"France" }
CREATE (x:PLAYER:FRANCE { name:"Jules Kounde", player_id:4445, jersey_number:25, country:"France" } ) RETURN x
Created player: {'': 'Jules Kounde'}
{name:"Emil Pete

Created player: {'': 'Jurrien Maduro'}
{name:"Robin Olsen", player_id:"5622", jersey_number:"1", country:"Sweden"}
{ name:"Robin Olsen", player_id:5622, jersey_number:1, country:"Sweden" }
CREATE (x:PLAYER:SWEDEN { name:"Robin Olsen", player_id:5622, jersey_number:1, country:"Sweden" } ) RETURN x
Created player: {'': 'Robin Olsen'}
{name:"Tomas Koubek", player_id:"3428", jersey_number:"23", country:"Czech Republic"}
{ name:"Tomas Koubek", player_id:3428, jersey_number:23, country:"Czech Republic" }
CREATE (x:PLAYER:CZECH_REPUBLIC { name:"Tomas Koubek", player_id:3428, jersey_number:23, country:"Czech Republic" } ) RETURN x
Created player: {'': 'Tomas Koubek'}
{name:"Anton Shunin", player_id:"38119", jersey_number:"1", country:"Russia"}
{ name:"Anton Shunin", player_id:38119, jersey_number:1, country:"Russia" }
CREATE (x:PLAYER:RUSSIA { name:"Anton Shunin", player_id:38119, jersey_number:1, country:"Russia" } ) RETURN x
Created player: {'': 'Anton Shunin'}
{name:"Bryan Cristante", playe

Created player: {'': 'Mathias Jattah-Njie Jorgensen'}
{name:"Marcus Andreas Danielsson", player_id:"26875", jersey_number:"24", country:"Sweden"}
{ name:"Marcus Andreas Danielsson", player_id:26875, jersey_number:24, country:"Sweden" }
CREATE (x:PLAYER:SWEDEN { name:"Marcus Andreas Danielsson", player_id:26875, jersey_number:24, country:"Sweden" } ) RETURN x
Created player: {'': 'Marcus Andreas Danielsson'}
{name:"Joni Ensio Kauko", player_id:"24303", jersey_number:"19", country:"Finland"}
{ name:"Joni Ensio Kauko", player_id:24303, jersey_number:19, country:"Finland" }
CREATE (x:PLAYER:FINLAND { name:"Joni Ensio Kauko", player_id:24303, jersey_number:19, country:"Finland" } ) RETURN x
Created player: {'': 'Joni Ensio Kauko'}
{name:"Artem Dovbyk", player_id:"25079", jersey_number:"26", country:"Ukraine"}
{ name:"Artem Dovbyk", player_id:25079, jersey_number:26, country:"Ukraine" }
CREATE (x:PLAYER:UKRAINE { name:"Artem Dovbyk", player_id:25079, jersey_number:26, country:"Ukraine" } ) R

Created player: {'': 'Simon Sluga'}
{name:"Ethan Ampadu", player_id:"4934", jersey_number:"15", country:"Wales"}
{ name:"Ethan Ampadu", player_id:4934, jersey_number:15, country:"Wales" }
CREATE (x:PLAYER:WALES { name:"Ethan Ampadu", player_id:4934, jersey_number:15, country:"Wales" } ) RETURN x
Created player: {'': 'Ethan Ampadu'}
{name:"Jose Luis Gaya Pena", player_id:"6596", jersey_number:"14", country:"Spain"}
{ name:"Jose Luis Gaya Pena", player_id:6596, jersey_number:14, country:"Spain" }
CREATE (x:PLAYER:SPAIN { name:"Jose Luis Gaya Pena", player_id:6596, jersey_number:14, country:"Spain" } ) RETURN x
Created player: {'': 'Jose Luis Gaya Pena'}
{name:"Kasper Schmeichel", player_id:"3815", jersey_number:"1", country:"Denmark"}
{ name:"Kasper Schmeichel", player_id:3815, jersey_number:1, country:"Denmark" }
CREATE (x:PLAYER:DENMARK { name:"Kasper Schmeichel", player_id:3815, jersey_number:1, country:"Denmark" } ) RETURN x
Created player: {'': 'Kasper Schmeichel'}
{name:"Nathan Ake

Created player: {'': 'Fehmi Mert Gunok'}
{name:"Robert Mak", player_id:"16307", jersey_number:"20", country:"Slovakia"}
{ name:"Robert Mak", player_id:16307, jersey_number:20, country:"Slovakia" }
CREATE (x:PLAYER:SLOVAKIA { name:"Robert Mak", player_id:16307, jersey_number:20, country:"Slovakia" } ) RETURN x
Created player: {'': 'Robert Mak'}
{name:"Marten de Roon", player_id:"6994", jersey_number:"15", country:"Netherlands"}
{ name:"Marten de Roon", player_id:6994, jersey_number:15, country:"Netherlands" }
CREATE (x:PLAYER:NETHERLANDS { name:"Marten de Roon", player_id:6994, jersey_number:15, country:"Netherlands" } ) RETURN x
Created player: {'': 'Marten de Roon'}
{name:"David Olatukunbo Alaba", player_id:"8220", jersey_number:"8", country:"Austria"}
{ name:"David Olatukunbo Alaba", player_id:8220, jersey_number:8, country:"Austria" }
CREATE (x:PLAYER:AUSTRIA { name:"David Olatukunbo Alaba", player_id:8220, jersey_number:8, country:"Austria" } ) RETURN x
Created player: {'': 'David 

Created player: {'': 'Joona Toivio'}
{name:"Albin Ekdal", player_id:"5624", jersey_number:"8", country:"Sweden"}
{ name:"Albin Ekdal", player_id:5624, jersey_number:8, country:"Sweden" }
CREATE (x:PLAYER:SWEDEN { name:"Albin Ekdal", player_id:5624, jersey_number:8, country:"Sweden" } ) RETURN x
Created player: {'': 'Albin Ekdal'}
{name:"Loic Nego", player_id:"16741", jersey_number:"7", country:"Hungary"}
{ name:"Loic Nego", player_id:16741, jersey_number:7, country:"Hungary" }
CREATE (x:PLAYER:HUNGARY { name:"Loic Nego", player_id:16741, jersey_number:7, country:"Hungary" } ) RETURN x
Created player: {'': 'Loic Nego'}
{name:"Thiago Alcantara do Nascimento", player_id:"5208", jersey_number:"10", country:"Spain"}
{ name:"Thiago Alcantara do Nascimento", player_id:5208, jersey_number:10, country:"Spain" }
CREATE (x:PLAYER:SPAIN { name:"Thiago Alcantara do Nascimento", player_id:5208, jersey_number:10, country:"Spain" } ) RETURN x
Created player: {'': 'Thiago Alcantara do Nascimento'}
{nam

Created player: {'': 'Dominik Livakovic'}
{name:"Marlos Romero Bonfim", player_id:"16811", jersey_number:"11", country:"Ukraine"}
{ name:"Marlos Romero Bonfim", player_id:16811, jersey_number:11, country:"Ukraine" }
CREATE (x:PLAYER:UKRAINE { name:"Marlos Romero Bonfim", player_id:16811, jersey_number:11, country:"Ukraine" } ) RETURN x
Created player: {'': 'Marlos Romero Bonfim'}
{name:"Merih Demiral", player_id:"23558", jersey_number:"3", country:"Turkey"}
{ name:"Merih Demiral", player_id:23558, jersey_number:3, country:"Turkey" }
CREATE (x:PLAYER:TURKEY { name:"Merih Demiral", player_id:23558, jersey_number:3, country:"Turkey" } ) RETURN x
Created player: {'': 'Merih Demiral'}
{name:"Egzon Bejtulai", player_id:"46915", jersey_number:"2", country:"Macedonia, Republic of"}
{ name:"Egzon Bejtulai", player_id:46915, jersey_number:2, country:"Macedonia, Republic of" }
CREATE (x:PLAYER:MACEDONIA_REPUBLIC_OF { name:"Egzon Bejtulai", player_id:46915, jersey_number:2, country:"Macedonia, Rep

Created player: {'': 'Irfan Can Kahveci'}
{name:"Tihomir Kostadinov", player_id:"44229", jersey_number:"15", country:"Macedonia, Republic of"}
{ name:"Tihomir Kostadinov", player_id:44229, jersey_number:15, country:"Macedonia, Republic of" }
CREATE (x:PLAYER:MACEDONIA_REPUBLIC_OF { name:"Tihomir Kostadinov", player_id:44229, jersey_number:15, country:"Macedonia, Republic of" } ) RETURN x
Created player: {'': 'Tihomir Kostadinov'}
{name:"adam Bogdan", player_id:"9985", jersey_number:"22", country:"Hungary"}
{ name:"adam Bogdan", player_id:9985, jersey_number:22, country:"Hungary" }
CREATE (x:PLAYER:HUNGARY { name:"adam Bogdan", player_id:9985, jersey_number:22, country:"Hungary" } ) RETURN x
Created player: {'': 'adam Bogdan'}
{name:"Gregor Kobel", player_id:"17974", jersey_number:"21", country:"Switzerland"}
{ name:"Gregor Kobel", player_id:17974, jersey_number:21, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Gregor Kobel", player_id:17974, jersey_number:21, country:"Sw

Created player: {'': 'Harry Wilson'}
{name:"Steven Zuber", player_id:"5548", jersey_number:"14", country:"Switzerland"}
{ name:"Steven Zuber", player_id:5548, jersey_number:14, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Steven Zuber", player_id:5548, jersey_number:14, country:"Switzerland" } ) RETURN x
Created player: {'': 'Steven Zuber'}
{name:"Ousmane Dembele", player_id:"5477", jersey_number:"11", country:"France"}
{ name:"Ousmane Dembele", player_id:5477, jersey_number:11, country:"France" }
CREATE (x:PLAYER:FRANCE { name:"Ousmane Dembele", player_id:5477, jersey_number:11, country:"France" } ) RETURN x
Created player: {'': 'Ousmane Dembele'}
{name:"Kacper Kozlowski", player_id:"26718", jersey_number:"6", country:"Poland"}
{ name:"Kacper Kozlowski", player_id:26718, jersey_number:6, country:"Poland" }
CREATE (x:PLAYER:POLAND { name:"Kacper Kozlowski", player_id:26718, jersey_number:6, country:"Poland" } ) RETURN x
Created player: {'': 'Kacper Kozlowski'}
{name:"La

Created player: {'': 'Mikel Oyarzabal Ugarte'}
{name:"Pierre Bengtsson", player_id:"20871", jersey_number:"5", country:"Sweden"}
{ name:"Pierre Bengtsson", player_id:20871, jersey_number:5, country:"Sweden" }
CREATE (x:PLAYER:SWEDEN { name:"Pierre Bengtsson", player_id:20871, jersey_number:5, country:"Sweden" } ) RETURN x
Created player: {'': 'Pierre Bengtsson'}
{name:"Daler Kuzyaev", player_id:"5179", jersey_number:"23", country:"Russia"}
{ name:"Daler Kuzyaev", player_id:5179, jersey_number:23, country:"Russia" }
CREATE (x:PLAYER:RUSSIA { name:"Daler Kuzyaev", player_id:5179, jersey_number:23, country:"Russia" } ) RETURN x
Created player: {'': 'Daler Kuzyaev'}
{name:"Ezgjan Alioski", player_id:"4713", jersey_number:"8", country:"Macedonia, Republic of"}
{ name:"Ezgjan Alioski", player_id:4713, jersey_number:8, country:"Macedonia, Republic of" }
CREATE (x:PLAYER:MACEDONIA_REPUBLIC_OF { name:"Ezgjan Alioski", player_id:4713, jersey_number:8, country:"Macedonia, Republic of" } ) RETURN 

Created player: {'': 'Kieffer Roberto Francisco Moore'}
{name:"Yvon Landry Mvogo", player_id:"11467", jersey_number:"12", country:"Switzerland"}
{ name:"Yvon Landry Mvogo", player_id:11467, jersey_number:12, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Yvon Landry Mvogo", player_id:11467, jersey_number:12, country:"Switzerland" } ) RETURN x
Created player: {'': 'Yvon Landry Mvogo'}
{name:"Eden Hazard", player_id:"3621", jersey_number:"10", country:"Belgium"}
{ name:"Eden Hazard", player_id:3621, jersey_number:10, country:"Belgium" }
CREATE (x:PLAYER:BELGIUM { name:"Eden Hazard", player_id:3621, jersey_number:10, country:"Belgium" } ) RETURN x
Created player: {'': 'Eden Hazard'}
{name:"Robin Koch", player_id:"13294", jersey_number:"24", country:"Germany"}
{ name:"Robin Koch", player_id:13294, jersey_number:24, country:"Germany" }
CREATE (x:PLAYER:GERMANY { name:"Robin Koch", player_id:13294, jersey_number:24, country:"Germany" } ) RETURN x
Created player: {'': 'Robin Koc

Created player: {'': 'Aaron Ramsey'}
{name:"Tomas Soucek", player_id:"4870", jersey_number:"15", country:"Czech Republic"}
{ name:"Tomas Soucek", player_id:4870, jersey_number:15, country:"Czech Republic" }
CREATE (x:PLAYER:CZECH_REPUBLIC { name:"Tomas Soucek", player_id:4870, jersey_number:15, country:"Czech Republic" } ) RETURN x
Created player: {'': 'Tomas Soucek'}
{name:"Mile skoric", player_id:"29345", jersey_number:"16", country:"Croatia"}
{ name:"Mile skoric", player_id:29345, jersey_number:16, country:"Croatia" }
CREATE (x:PLAYER:CROATIA { name:"Mile skoric", player_id:29345, jersey_number:16, country:"Croatia" } ) RETURN x
Created player: {'': 'Mile skoric'}
{name:"Bruno Petkovic", player_id:"7693", jersey_number:"20", country:"Croatia"}
{ name:"Bruno Petkovic", player_id:7693, jersey_number:20, country:"Croatia" }
CREATE (x:PLAYER:CROATIA { name:"Bruno Petkovic", player_id:7693, jersey_number:20, country:"Croatia" } ) RETURN x
Created player: {'': 'Bruno Petkovic'}
{name:"Chr

Created player: {'': 'Milan Badelj'}
{name:"Marcus Thuram", player_id:"2972", jersey_number:"26", country:"France"}
{ name:"Marcus Thuram", player_id:2972, jersey_number:26, country:"France" }
CREATE (x:PLAYER:FRANCE { name:"Marcus Thuram", player_id:2972, jersey_number:26, country:"France" } ) RETURN x
Created player: {'': 'Marcus Thuram'}
{name:"Stole Dimitrievski", player_id:"21397", jersey_number:"1", country:"Macedonia, Republic of"}
{ name:"Stole Dimitrievski", player_id:21397, jersey_number:1, country:"Macedonia, Republic of" }
CREATE (x:PLAYER:MACEDONIA_REPUBLIC_OF { name:"Stole Dimitrievski", player_id:21397, jersey_number:1, country:"Macedonia, Republic of" } ) RETURN x
Created player: {'': 'Stole Dimitrievski'}
{name:"Mathias Jensen", player_id:"18722", jersey_number:"24", country:"Denmark"}
{ name:"Mathias Jensen", player_id:18722, jersey_number:24, country:"Denmark" }
CREATE (x:PLAYER:DENMARK { name:"Mathias Jensen", player_id:18722, jersey_number:24, country:"Denmark" } )

Created player: {'': 'Fabian Lukas Schar'}
{name:"Jan Vertonghen", player_id:"3077", jersey_number:"5", country:"Belgium"}
{ name:"Jan Vertonghen", player_id:3077, jersey_number:5, country:"Belgium" }
CREATE (x:PLAYER:BELGIUM { name:"Jan Vertonghen", player_id:3077, jersey_number:5, country:"Belgium" } ) RETURN x
Created player: {'': 'Jan Vertonghen'}
{name:"Artem Besedin", player_id:"18881", jersey_number:"19", country:"Ukraine"}
{ name:"Artem Besedin", player_id:18881, jersey_number:19, country:"Ukraine" }
CREATE (x:PLAYER:UKRAINE { name:"Artem Besedin", player_id:18881, jersey_number:19, country:"Ukraine" } ) RETURN x
Created player: {'': 'Artem Besedin'}
{name:"Jakub Brabec", player_id:"27622", jersey_number:"4", country:"Czech Republic"}
{ name:"Jakub Brabec", player_id:27622, jersey_number:4, country:"Czech Republic" }
CREATE (x:PLAYER:CZECH_REPUBLIC { name:"Jakub Brabec", player_id:27622, jersey_number:4, country:"Czech Republic" } ) RETURN x
Created player: {'': 'Jakub Brabec'}

Created player: {'': 'Okay Yokuslu'}
{name:"Kieran Tierney", player_id:"10540", jersey_number:"6", country:"Scotland"}
{ name:"Kieran Tierney", player_id:10540, jersey_number:6, country:"Scotland" }
CREATE (x:PLAYER:SCOTLAND { name:"Kieran Tierney", player_id:10540, jersey_number:6, country:"Scotland" } ) RETURN x
Created player: {'': 'Kieran Tierney'}
{name:"Ricardo Ivan Rodriguez Araya", player_id:"5544", jersey_number:"13", country:"Switzerland"}
{ name:"Ricardo Ivan Rodriguez Araya", player_id:5544, jersey_number:13, country:"Switzerland" }
CREATE (x:PLAYER:SWITZERLAND { name:"Ricardo Ivan Rodriguez Araya", player_id:5544, jersey_number:13, country:"Switzerland" } ) RETURN x
Created player: {'': 'Ricardo Ivan Rodriguez Araya'}
{name:"Robin Quaison", player_id:"8552", jersey_number:"22", country:"Sweden"}
{ name:"Robin Quaison", player_id:8552, jersey_number:22, country:"Sweden" }
CREATE (x:PLAYER:SWEDEN { name:"Robin Quaison", player_id:8552, jersey_number:22, country:"Sweden" } ) 

Created player: {'': 'Jan Gregus'}
{name:"Domagoj Vida", player_id:"5468", jersey_number:"21", country:"Croatia"}
{ name:"Domagoj Vida", player_id:5468, jersey_number:21, country:"Croatia" }
CREATE (x:PLAYER:CROATIA { name:"Domagoj Vida", player_id:5468, jersey_number:21, country:"Croatia" } ) RETURN x
Created player: {'': 'Domagoj Vida'}
{name:"Aleksey Miranchuk", player_id:"6299", jersey_number:"15", country:"Russia"}
{ name:"Aleksey Miranchuk", player_id:6299, jersey_number:15, country:"Russia" }
CREATE (x:PLAYER:RUSSIA { name:"Aleksey Miranchuk", player_id:6299, jersey_number:15, country:"Russia" } ) RETURN x
Created player: {'': 'Aleksey Miranchuk'}
{name:"Joao Felix Sequeira", player_id:"12041", jersey_number:"23", country:"Portugal"}
{ name:"Joao Felix Sequeira", player_id:12041, jersey_number:23, country:"Portugal" }
CREATE (x:PLAYER:PORTUGAL { name:"Joao Felix Sequeira", player_id:12041, jersey_number:23, country:"Portugal" } ) RETURN x
Created player: {'': 'Joao Felix Sequeir

Created player: {'': 'James Forrest'}
{name:"Davy Klaassen", player_id:"4318", jersey_number:"14", country:"Netherlands"}
{ name:"Davy Klaassen", player_id:4318, jersey_number:14, country:"Netherlands" }
CREATE (x:PLAYER:NETHERLANDS { name:"Davy Klaassen", player_id:4318, jersey_number:14, country:"Netherlands" } ) RETURN x
Created player: {'': 'Davy Klaassen'}
{name:"Ben White", player_id:"22809", jersey_number:"22", country:"England"}
{ name:"Ben White", player_id:22809, jersey_number:22, country:"England" }
CREATE (x:PLAYER:ENGLAND { name:"Ben White", player_id:22809, jersey_number:22, country:"England" } ) RETURN x
Created player: {'': 'Ben White'}
{name:"Sam Johnstone", player_id:"9458", jersey_number:"23", country:"England"}
{ name:"Sam Johnstone", player_id:9458, jersey_number:23, country:"England" }
CREATE (x:PLAYER:ENGLAND { name:"Sam Johnstone", player_id:9458, jersey_number:23, country:"England" } ) RETURN x
Created player: {'': 'Sam Johnstone'}
{name:"Vladimir Darida", play

## Loading Matches

In [120]:
a = rumble_return("""
for $match in json-doc("matches.json")[]
return $match """)

In [143]:
js = json.loads(a)

for x in js:
    del x["competition"]
    del x["season"]
    del x["match_status"]
    del x["match_status_360"]
    del x["last_updated"]
    del x["last_updated_360"]
    del x["metadata"]
    x["teams"] = [x["home_team"]["home_team_name"], x["away_team"]["away_team_name"]]
    x["home_team_group"] = x["home_team"]["home_team_group"]
    x["away_team_group"] = x["away_team"]["away_team_group"]
    try:
        x["home_team_manager"] = x["home_team"]["managers"][0]["name"]
    except: 
        pass
    try:
        x["away_team_manager"] = x["away_team"]["managers"][0]["name"]
    except:
        pass
    x["competition_stage"] = x["competition_stage"]["name"]
    x["stadium"] = x["stadium"]["name"]
    x["referee"] = x["referee"]["name"]
    x["away_team"] = x["away_team"]["away_team_name"]
    x["home_team"] = x["home_team"]["home_team_name"]
    x["score"] = str(x["home_score"])+"-"+str(x["away_score"])
    
    query = 'CREATE (x:GAME' + ' '+jsontoCypher(x).replace("None","null")+' ) RETURN x'
    app.create_player(query)
    

Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
Created player: {'': None}
C

## Create player---HAS_PLAYED-->game relationships

In [138]:

a = rumble_return("""
for $matches in json-doc("matches_id.json")[]
for $match in $matches[]
for $doc in json-doc("lineups_cleaned/" || string($match) || ".json")
for $team in $doc[]
for $player in $team.lineup[]
where size($player.positions) ge 1
return {"from": $player.player_name, "to": $match, "properties": $player}""")


In [139]:
js = json.loads(a)

for i in range(len(js)):
    y = js[i]
    x = y["properties"]
    
    del x["player_id"]
    del x["player_name"]
    del x["player_nickname"]
    del x["jersey_number"]
    del x["country"]
    
    if x["positions"][0]["start_reason"] == "Starting XI":
        x["starting"] = True
    else:
        x["starting"] = False
    
    p =[]
    for el in x["positions"]:
        if el["position"] is not None:
            p.append(el["position"])
    
    x["positions"] = p
    
    c =[]
    for el in x["cards"]:
        if el["card_type"] is not None:
            c.append(el["card_type"])
    
    x["cards"] = c
        
    args = ""
    args = "{"+ ", ".join([key+':"'+str(x[key]).replace('"',"'")+'"' for key in x])+"}"
    query = \
           'MATCH (a), (b) \
            WHERE a.name = "' + y["from"] + '" AND b.match_id = '+str(y["to"])+ \
            ' CREATE (a)-[r: HAS_PLAYED '+jsontoCypher(x).replace("None", "null")+']->(b) \
            RETURN r'
    app.create_relationship(query)

2022-04-03 15:32:21.850 ERROR   neo4j: Failed to write data to connection IPv4Address(('162.55.245.102', 7687)) (IPv4Address(('162.55.245.102', 7687)))


## Create Passages

In [57]:
def get_rumble_query(game, team):
    team = '"'+team+'"'
    return """
let $beg := for $doc in json-doc("events_cleaned/"""+str(game)+""".json")
    for $x in $doc[]
    count $c
    where $x."type".name eq "Pass" and $x.possession_team.name eq """ +team +""" and $x.team.name eq """ +team +"""
    where $x.pass.recipient.name
    return { "from" : $x.player.name, "to":$x.pass.recipient.name, "properties":$x, "possession":$x.possession
    }


    let $grouped := for $e in $beg
    group by $p:=$e.possession
    order by $p
    return {"possession":$p, "values":$e}

let $int:=for $g in $grouped
for $el in $g.values[]
order by $el.properties.period, $el.properties.timestamp
count $c
return {"el": $el, "count":$c}

let $scale := for $el in $int
group by $p := $el.el.possession
return {"possession":$p, "min":min($el.count), "max":max($el.count)+1-min($el.count)}

for $el in $int
for $p in $scale
where $p.possession eq $el.el.possession
return { "from" : $el.el.from, "to":$el.el.to,  "possession":$p.possession, "properties":$el.el.properties,
        "order":$el.count +1 - $p.min, "possession_length" : $p.max
    }"""

In [71]:
games = json.loads(rumble_return("""for $matches in json-doc("matches_id.json")[] for $match in $matches[] return $match"""))


for game in games:
    teams = json.loads(rumble_return("""for $match in json-doc("matches.json")[] 
    where $match.match_id eq """+str(game)+"""
    return ($match.home_team.home_team_name, $match.away_team.away_team_name)"""))
    
    print("Processing game "+ str(games.index(game)+1)+" / "+str(len(games)))
    
    for team in teams:
        
        passes = json.loads(rumble_return(get_rumble_query(game, team)))
        print("Processing "+team+" ("+str(len(passes))+" passages)")
        for passage in passes:
            x = passage["properties"]
            x["order"] = passage["order"]
            x["possession_length"] = passage["possession_length"]
            x["match_id"] = game 
            del x["id"]
            del x["type"]
            del x["possession_team"]
            del x["related_events"]
            del x["player"]
            #to deal with non-homogeneous data
            try:
                x["team"] = x["team"]["name"]
            except:
                pass
            try:
                x["play_pattern"] = x["play_pattern"]["name"]
            except:
                pass
            try:
                x["position"] = x["position"]["name"]
            except:
                pass
            try:
                x["length"] = x["pass"]["length"]
            except:
                pass
            try:
                x["angle"] = x["pass"]["angle"]
            except:
                pass
            try:
                x["height"] = x["pass"]["height"]["name"]
            except:
                pass
            try:
                x["end_location"] = x["pass"]["end_location"]
            except:
                pass
            try:
                x["type"] = x["pass"]["type"]["name"]
            except:
                pass
            try:
                x["body_part"] = x["pass"]["body_part"]["name"]
            except:
                pass
            
            del x["pass"]
            
            
            query = \
           'MATCH (a), (b) \
            WHERE a.name = "' + passage["from"] + '" AND b.name = "'+passage["to"]+ \
            '" CREATE (a)-[r: PASS '+jsontoCypher(x)+']->(b) \
            RETURN type(r)'
            
            app.create_relationship(query)
        
    
    

Processing game 1 / 51
Processing Finland (364 passages)
Processing Russia (548 passages)
Processing game 2 / 51
Processing Switzerland (457 passages)
Processing Turkey (442 passages)
Processing game 3 / 51
Processing Belgium (416 passages)
Processing Italy (515 passages)
Processing game 4 / 51
Processing England (716 passages)
Processing Denmark (487 passages)
Processing game 5 / 51
Processing Italy (842 passages)
Processing England (394 passages)
Processing game 6 / 51
Processing England (413 passages)
Processing Germany (519 passages)
Processing game 7 / 51
Processing Sweden (626 passages)
Processing Ukraine (748 passages)
Processing game 8 / 51
Processing Croatia (415 passages)
Processing Spain (853 passages)
Processing game 9 / 51
Processing Belgium (411 passages)
Processing Portugal (577 passages)
Processing game 10 / 51
Processing Italy (661 passages)
Processing Austria (567 passages)
Processing game 11 / 51
Processing Germany (682 passages)
Processing Hungary (188 passages)
Pro

## Ending Events

In [105]:
query = """CREATE (X:SPECIAL:END {info:"dummy node to represent end of an action", type:"END"})"""

app.create_player(query)


In [106]:
def get_rumble_END(game):
    return """for $doc in json-doc("events_cleaned/"""+str(game)+""".json")
    for $x in $doc[]
    where $x.team.name eq $x.possession_team.name and $x."type".name = ("Dispossessed", "Shot", "Foul Won", "Foul Committed", "Miscontrol")
    return {"from": $x.player.name, "to": "END", "properties": $x, "label": $x."type".name}"""

In [107]:
games = json.loads(rumble_return("""for $matches in json-doc("matches_id.json")[] for $match in $matches[] return $match"""))

for game in games:
    
        print("Processing game "+ str(games.index(game)+1)+" / "+str(len(games)))
        events = json.loads(rumble_return(get_rumble_END(game)))
        for event in events:
            x = event["properties"]
            if "id" in x:
                del x["id"]
            if "related_events" in x:
                del x["related_events"]
            
            
            
            if "possession_team" in x:
                x["possession_team"] = x["possession_team"]["name"]
            if "play_pattern" in x:
                x["play_pattern"] = x["play_pattern"]["name"]
            del x["team"]
            del x["player"]
            del x["position"]
            
            if x["type"]["name"] == "Shot":
                y = x["shot"]
                if "statsbomb_xg" in y:
                    del y["statsbomb_xg"]
                if "key_pass_id" in y: 
                    del y["key_pass_id"]
                if "freeze_frame" in y:
                    del y["freeze_frame"]
                if "end_location" in y:
                    x["shot_end_location"] = y["end_location"]
                if "type" in y:
                    x["shot_type"] = y["type"]["name"]
                if "body_part" in y:
                    x["body_part"] = y["body_part"]["name"]
                if "technique" in y:
                    x["technique"] = y["technique"]["name"]
                
                del x["shot"]
                x["outcome"] = y["outcome"]["name"]
                
            if x["type"]["name"] == "Foul Committed":
                if "foul_committed" in x:
                    if "card" in x["foul_committed"]:
                        x["card"] = x["foul_committed"]["card"]["name"]
                    
                    del x["foul_committed"]
        
            if "foul_won" in x:
                del x["foul_won"]
                
            if "miscontrol" in x:
                del x["miscontrol"]
            x["type"] = x["type"]["name"]
            x["match_id"] = game
            
            query = \
               'MATCH (a), (b:END) \
                WHERE a.name = "' + event["from"] + '" AND b.type = "'+str(event["to"])+ \
                '" CREATE (a)-[r:'+event["label"].replace(" ", "_").upper()+" "+jsontoCypher(x).replace("None", "null")+']->(b) \
                RETURN type(r)'
            #print(query)
            
            app.create_relationship(query)
        
        
            

Processing game 1 / 51
Processing game 2 / 51
Processing game 3 / 51
Processing game 4 / 51
Processing game 5 / 51
Processing game 6 / 51
Processing game 7 / 51
Processing game 8 / 51
Processing game 9 / 51
Processing game 10 / 51
Processing game 11 / 51
Processing game 12 / 51
Processing game 13 / 51
Processing game 14 / 51
Processing game 15 / 51
Processing game 16 / 51
Processing game 17 / 51
Processing game 18 / 51
Processing game 19 / 51
Processing game 20 / 51
Processing game 21 / 51
Processing game 22 / 51
Processing game 23 / 51
Processing game 24 / 51
Processing game 25 / 51
Processing game 26 / 51
Processing game 27 / 51
Processing game 28 / 51
Processing game 29 / 51
Processing game 30 / 51
Processing game 31 / 51
Processing game 32 / 51
Processing game 33 / 51
Processing game 34 / 51
Processing game 35 / 51
Processing game 36 / 51
Processing game 37 / 51
Processing game 38 / 51
Processing game 39 / 51
Processing game 40 / 51
Processing game 41 / 51
Processing game 42 / 51
P

## Starting Events

In [117]:
def get_rumble_query_START(game):
    return """for $doc in json-doc("events_cleaned/"""+str(game)+""".json")
    for $x in $doc[]
    where $x.team.name eq $x.possession_team.name and $x."type".name = ("Ball Recovery")
    return {"to": $x.player.name, "from": "START", "properties": $x, "label": $x."type".name}"""

In [118]:
query = """CREATE (X:SPECIAL:START {info:"dummy node to represent the beginning of an action", type:"START"})"""

app.create_player(query)


In [122]:
games = json.loads(rumble_return("""for $matches in json-doc("matches_id.json")[] for $match in $matches[] return $match"""))
for game in games:
    
        print("Processing game "+ str(games.index(game)+1)+" / "+str(len(games)))
        events = json.loads(rumble_return(get_rumble_query_START(game)))
        for event in events:
            
            x = event["properties"]
            if "id" in x:
                del x["id"]
            if "related_events" in x:
                del x["related_events"]

            x["match_id"] = game
            x["type"] = x["type"]["name"]
            x["possession_team"] = x["possession_team"]["name"]
            if "play_pattern" in x:
                x["play_pattern"] = x["play_pattern"]["name"]
            del x["team"]
            
            if "player" in x:
                del x["player"]
            if "position" in x:
                del x["position"]
            if "ball_recovery" in x:
                del x["ball_recovery"]
            
            
            query = \
               'MATCH (a:START), (b) \
                WHERE a.type = "' + event["from"] + '" AND b.name = "'+str(event["to"])+ \
                '" CREATE (a)-[r: '+event["label"].replace(" ", "_").replace("-","_").upper()+" "+jsontoCypher(x).replace("None", "null")+']->(b) \
                RETURN type(r)'
            
            
            app.create_relationship(query)
       
        
        

Processing game 1 / 51
Processing game 2 / 51
Processing game 3 / 51
Processing game 4 / 51
Processing game 5 / 51
Processing game 6 / 51
Processing game 7 / 51
Processing game 8 / 51
Processing game 9 / 51
Processing game 10 / 51
Processing game 11 / 51
Processing game 12 / 51
Processing game 13 / 51
Processing game 14 / 51
Processing game 15 / 51
Processing game 16 / 51
Processing game 17 / 51
Processing game 18 / 51
Processing game 19 / 51
Processing game 20 / 51
Processing game 21 / 51
Processing game 22 / 51
Processing game 23 / 51
Processing game 24 / 51
Processing game 25 / 51
Processing game 26 / 51
Processing game 27 / 51
Processing game 28 / 51
Processing game 29 / 51
Processing game 30 / 51
Processing game 31 / 51
Processing game 32 / 51
Processing game 33 / 51
Processing game 34 / 51
Processing game 35 / 51
Processing game 36 / 51
Processing game 37 / 51
Processing game 38 / 51
Processing game 39 / 51
Processing game 40 / 51
Processing game 41 / 51
Processing game 42 / 51
P