## Prediction using the Logistic Regression

In [1]:
import pandas as pd 

In [2]:
laliga = pd.read_csv('merged_laliga_data.csv')

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Selecting relevant columns
data = laliga[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR']]

# Dropping rows with missing values
data = data.dropna()

# Encoding categorical columns
label_encoder = LabelEncoder()
data['HomeTeam'] = label_encoder.fit_transform(data['HomeTeam'])
data['AwayTeam'] = label_encoder.fit_transform(data['AwayTeam'])
data['FTR'] = label_encoder.fit_transform(data['FTR'])

# Splitting data into features (X) and target (y)
X = data[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]
y = data['FTR']

# Splitting into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the class weights based on the encoded class labels: 0 (H), 1 (D), 2 (A)
class_weights = {0: 1, 1: 1.5, 2: 1.5}

# Initialize Random Forest with the corrected class weights
clf = RandomForestClassifier(n_estimators=100, random_state=42, class_weight=class_weights)

# Train the classifier
clf.fit(X_train, y_train)

# Checking accuracy on the test set
accuracy = clf.score(X_test, y_test)
print(f"Accuracy: {accuracy:.2f}")


Accuracy: 1.00


In [4]:
# Check the distribution of match outcomes
print(laliga['FTR'].value_counts())

FTR
H    1014
A     635
D     631
Name: count, dtype: int64


In [5]:
unique_teams = pd.concat([laliga['HomeTeam'], laliga['AwayTeam']]).unique()
print(unique_teams)

['Betis' 'Girona' 'Barcelona' 'Celta' 'Villarreal' 'Eibar' 'Real Madrid'
 'Vallecano' 'Ath Bilbao' 'Valencia' 'Getafe' 'Leganes' 'Alaves'
 'Ath Madrid' 'Valladolid' 'Espanol' 'Sevilla' 'Levante' 'Huesca'
 'Sociedad' 'Mallorca' 'Granada' 'Osasuna' 'Cadiz' 'Elche' 'Almeria'
 'Las Palmas']


In [6]:
from sklearn.preprocessing import LabelEncoder

# Create a new LabelEncoder for team names
team_encoder = LabelEncoder()

# Fit the LabelEncoder on the unique team names from both HomeTeam and AwayTeam columns in your original dataset
all_teams = pd.concat([laliga['HomeTeam'], laliga['AwayTeam']]).unique()
team_encoder.fit(all_teams)

# Create a DataFrame with the upcoming matches, using '0' as placeholders for goals
upcoming_matches = pd.DataFrame({
    'HomeTeam': ['Leganes', 'Espanol', 'Getafe', 'Valladolid', 'Las Palmas', 
                 'Real Madrid', 'Girona', 'Alaves', 'Sevilla', 'Sociedad'],
    'AwayTeam': ['Valencia', 'Mallorca', 'Osasuna', 'Vallecano', 
                 'Celta', 'Villarreal', 'Ath Bilbao', 'Barcelona', 'Betis', 'Ath Madrid'],
    'FTHG': [0] * 10,  # Placeholder, future home goals are not known
    'FTAG': [0] * 10   # Placeholder, future away goals are not known
})

# Encode the teams using the newly trained team_encoder
upcoming_matches['HomeTeam'] = team_encoder.transform(upcoming_matches['HomeTeam'])
upcoming_matches['AwayTeam'] = team_encoder.transform(upcoming_matches['AwayTeam'])

# Now we include the goal placeholders as well
X_upcoming = upcoming_matches[['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]

# Predict the outcomes using the trained model
predictions = clf.predict(X_upcoming)

# Map predictions back to readable results
result_mapping = {0: 'Away Win', 1: 'Draw', 2: 'Home Win'}
predicted_outcomes = [result_mapping[pred] for pred in predictions]

# Display the predictions
for i, match in upcoming_matches.iterrows():
    home_team = team_encoder.inverse_transform([match['HomeTeam']])[0]
    away_team = team_encoder.inverse_transform([match['AwayTeam']])[0]
    print(f"Prediction for {home_team} vs {away_team}: {predicted_outcomes[i]}")

Prediction for Leganes vs Valencia: Draw
Prediction for Espanol vs Mallorca: Draw
Prediction for Getafe vs Osasuna: Draw
Prediction for Valladolid vs Vallecano: Draw
Prediction for Las Palmas vs Celta: Draw
Prediction for Real Madrid vs Villarreal: Draw
Prediction for Girona vs Ath Bilbao: Draw
Prediction for Alaves vs Barcelona: Draw
Prediction for Sevilla vs Betis: Draw
Prediction for Sociedad vs Ath Madrid: Draw
