In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences #from keras.preprocessing.sequence import pad_sequences

In [23]:
#Example dataset
data = [
  {
    "Heimteam": "Manchester United",
    "Auswärtsteam": "Liverpool",
    "Heimtore": 2,
    "Auswärtstore": 1
  },
  {
    "Heimteam": "Chelsea",
    "Auswärtsteam": "Arsenal",
    "Heimtore": 3,
    "Auswärtstore": 0
  },
  {
    "Heimteam": "Manchester City",
    "Auswärtsteam": "Tottenham Hotspur",
    "Heimtore": 1,
    "Auswärtstore": 1
  },
  {
    "Heimteam": "Leicester City",
    "Auswärtsteam": "Everton",
    "Heimtore": 2,
    "Auswärtstore": 2
  },
  {
    "Heimteam": "West Ham United",
    "Auswärtsteam": "Southampton",
    "Heimtore": 0,
    "Auswärtstore": 1
  }
]

In [24]:
# Extract the texts and labels from the data set
texts = []
labels = []

for game in data:
    home_team = game["Heimteam"]
    away_team = game["Auswärtsteam"]
    home_goals = game["Heimtore"]
    away_goals = game["Auswärtstore"]
    
    texts.append(f"{home_team} vs. {away_team}")
    labels.append(1 if home_goals > away_goals else 0)

In [25]:
print(texts)

['Manchester United vs. Liverpool', 'Chelsea vs. Arsenal', 'Manchester City vs. Tottenham Hotspur', 'Leicester City vs. Everton', 'West Ham United vs. Southampton']


In [26]:
print(labels)

[1, 1, 0, 0, 0]


In [27]:
# Tokenisation and sequencing of the texts
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [28]:
# Padding of the sequences to obtain a uniform length format
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

In [29]:
# Konvertiere die Eingabedaten in ein numpy.ndarray
padded_sequences = np.array(padded_sequences)
labels = np.array(labels)

In [30]:
# Creation of the neural network
model = Sequential()
model.add(Embedding(1000, 32, input_length=max_sequence_length))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

In [31]:
# Compile and train the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(padded_sequences, labels, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1bf5e8ce2c0>

In [32]:
# Example text for prediction
test_text = 'Chelsea vs. Manchester United'

In [33]:
# Tokenisation, sequencing and padding of the test text
test_sequence = tokenizer.texts_to_sequences([test_text])
padded_test_sequence = pad_sequences(test_sequence, maxlen=max_sequence_length)

In [34]:
# Prediction of the result of the test match
prediction = model.predict(padded_test_sequence)[0][0]
result = 'Heimteam gewinnt' if prediction > 0.5 else 'Auswärtsteam gewinnt oder Unentschieden'

print(f'Für das Spiel "{test_text}" lautet die Vorhersage: {result}.')

Für das Spiel "Chelsea vs. Manchester United" lautet die Vorhersage: Auswärtsteam gewinnt oder Unentschieden.
