# Football Outcome Predictions

In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import autoreload
import missingno as msno
import os
import csv
import sys
import requests

from bs4 import BeautifulSoup
from collections import defaultdict, Counter

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

%matplotlib inline

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 100)

sb.set_style('whitegrid')

In [2]:


import netrc

# Read the .netrc file
netrc_info = netrc.netrc("samples/sample.netrc")

# Get the login information for a specific host
login, account, password = netrc_info.authenticators("secret.fbi")

# Print the retrieved information
print("login:", login)
print("account:", account)
print("password:", password)



from espn_api.football import League
# Instantiate the League class
league = League(league_id=222, year=2022)

# Provide a valid team_id as an argument to get_team_data() method
team_id = "your_team_id_here"
team_data = league.get_team_data(team_id)

# Access the desired information from the team_data object
# For example, print the team name
print(team_data["team_name"])

FileNotFoundError: [Errno 2] No such file or directory: 'samples/sample.netrc'

In [3]:
import netrc

# Get the host name and login credentials from the .netrc file
auth = netrc.netrc().authenticators("https://api.football-data.org")

# Construct the headers dictionary with the X-Auth-Token value
headers = { 'X-Auth-Token': auth[2] }

# Make the API request with the headers dictionary
response = requests.get(url, headers=headers)

# Process the response data
for match in response.json()['matches']:
    print(match)

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\anton\\.netrc'

In [4]:
# Step 1: Scraping data from a website

url = 'https://sportsdata.io/scores-and-stats'
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract relevant information from the HTML structure
team_names = [element.text for element in soup.find_all('div', class_='team-name')]
current_form = [element.text for element in soup.find_all('div', class_='form')]
injury_news = [element.text for element in soup.find_all('div', class_='injuries')]


# Create a DataFrame with the extracted data
data = pd.DataFrame({
    'team_name': team_names,
    'current_form': current_form,
    'injury_news': injury_news
})

# Clean the data
data['current_form'] = data['current_form'].str.extract('(\d+)', expand=False).astype(int)

# Handle missing values
data = data.fillna(0)

# Scale the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data[['current_form']])

# Step 3: Train machine learning models

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_data, data['team_name'], test_size=0.2, random_state=42)

# Train logistic regression model
logreg_model = LogisticRegression(random_state=42)
logreg_model.fit(X_train, y_train)

# Train decision tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Train random forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Train neural network model
nn_model = MLPClassifier(random_state=42)
nn_model.fit(X_train, y_train)


# Evaluate logistic regression model
logreg_pred = logreg_model.predict(X_test)
logreg_accuracy = accuracy_score(y_test, logreg_pred)
logreg_precision = precision_score(y_test, logreg_pred, average='weighted')
logreg_recall = recall_score(y_test, logreg_pred, average='weighted')
logreg_f1_score = f1_score(y_test, logreg_pred, average='weighted')

# Evaluate decision tree model
dt_pred = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_pred)
dt_precision = precision_score(y_test, dt_pred, average='weighted')
dt_recall = recall_score(y_test, dt_pred, average='weighted')
dt_f1_score = f1_score(y_test, dt_pred, average='weighted')

# Evaluate random forest model
rf_pred = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_pred)
rf_precision = precision_score(y_test, rf_pred, average='weighted')
rf_recall = recall_score(y_test, rf_pred, average='weighted')
rf_f1_score = f1_score(y_test, rf_pred, average='weighted')

# Evaluate neural network model
nn_pred = nn_model.predict(X_test)
nn_accuracy = accuracy_score(y_test, nn_pred)
nn_precision = precision_score(y_test, nn_pred, average='weighted')
nn_recall = recall_score(y_test, nn_pred, average='weighted')
nn_f1_score = f1_score(y_test, nn_pred, average='weighted')

# Step 5: Select the best model
model_scores = {
    'logreg': logreg_accuracy,
    'dt': dt_accuracy,
    'rf': rf_accuracy,
    'nn': nn_accuracy
}

best_model = max(model_scores, key=model_scores.get)
print('Best model:', best_model)

# Step 6: Make predictions
# You can use the best model to make predictions on new data
# For example:
new_data = pd.DataFrame({
    'team_name': ['Team A', 'Team B'],
    'current_form': [3, 2],
    'injury_news': ['No injuries', '1 player out']
})

# Clean and transform the new data
new_data['current_form'] = new_data['current_form'].astype(int)
new_data = new_data.fillna(0)
new_scaled_data = scaler.transform(new_data[['current_form']])

# Make predictions using the best model
if best_model == 'logreg':
    prediction = logreg_model.predict(new_scaled_data)
elif best_model == 'dt':
    prediction = dt_model.predict(new_scaled_data)
elif best_model == 'rf':
    prediction = rf_model.predict(new_scaled_data)
elif best_model == 'nn':
    prediction = nn_model.predict(new_scaled_data)

print('Prediction:', prediction)



AttributeError: Can only use .str accessor with string values!