Get upcomming match list to predict

In [1]:
import requests
import pandas as pd
from datetime import datetime

# Sample base URL and season ID
base_url = "https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/"
season_id = "2800029"

# Fetch the fixture data
fixture_url = f"{base_url}{season_id}"
response = requests.get(fixture_url)
if response.status_code == 200:
    fixture_data = response.json()
else:
    print(f"Failed to fetch fixture data. Status code: {response.status_code}")
    exit()

# Create a list to store the match data
matches_dataset = []

# Iterate over the matches in the fixture data
for match in fixture_data['doc'][0]['data']['matches']:
    match_info = {
        'season_id': season_id,
        'round': match['round'],
        'week': match['week'],
        'home_team_abbr': match['teams']['home']['abbr'],
        'away_team_abbr': match['teams']['away']['abbr'],
    }

    # Additional features can be added based on your specific requirements

    matches_dataset.append(match_info)

# Create a DataFrame from the extracted data
df_matches = pd.DataFrame(matches_dataset)

# Save the dataset to a CSV file
df_matches.to_csv('season_matches_dataset.csv', index=False)


Previous Season Data

In [2]:
import requests
import json
import pandas as pd

# Define the base URL and the list of numbers
base_url = "https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/"
numbers = [
    "2798481", "2798377", "2798407", "2798431", "2798458",
    "2798508", "2798532", "2798557", "2798588", "2798615",
    "2798638", "2798664" , "2798690", "2798713",
    "2798742", "2798768" , "2798794" , "2798818", "2798843",
    "2798868", "2798893"
]

matches_dataset = []

# Iterate over the numbers, make API calls, and save the responses
for number in numbers:
    url = base_url + number
    print(url)
    response = requests.get(url)
    if response.status_code == 200:
        json_data = response.json()

        # Extracting the 'matches' array from each response
        matches_data = json_data['doc'][0]['data']['matches']

        for match in matches_data:
            match_info = {
                'season_id': number,
                'week': match['week'],
                'round': match['round'],
                'home_team': match['teams']['home']['name'],
                'away_team': match['teams']['away']['name'],
                'home_team_abbr': match['teams']['home']['abbr'],
                'away_team_abbr': match['teams']['away']['abbr'],
                '_id': match['_id'],
                '_sid': match['_sid'],
                '_rcid': match['_rcid'],
                '_tid': match['_tid'],
                '_utid': match['_utid'],
                'time': match['time']['time'],
                'date': match['time']['date'],
                'home_score': match['result']['home'],
                'away_score': match['result']['away'],
                'periods_p1_home': match['periods']['p1']['home'],
                'periods_p1_away': match['periods']['p1']['away'],
                'periods_ft_home': match['periods']['ft']['home'],
                'periods_ft_away': match['periods']['ft']['away'],
                'neutralground': match['neutralground'],
                'inlivescore': match['inlivescore'],
                'winner': match['result']['winner'],
                'comment': match['comment'],
                'status': match['status'],
                'tobeannounced': match['tobeannounced'],
                'postponed': match['postponed'],
                'canceled': match['canceled'],
                'stadiumid': match['stadiumid'],
                'bestof': match['bestof'],
                'walkover': match['walkover'],
                'retired': match['retired'],
                'disqualified': match['disqualified'],
            }
            matches_dataset.append(match_info)

# Create DataFrame from the extracted data
df_matches = pd.DataFrame(matches_dataset)

# Save the dataset to a CSV file
df_matches.to_csv('matches_dataset.csv', index=False)


https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798481
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798377
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798407
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798431
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798458
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798508
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798532
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798557
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dhaka/gismo/stats_season_fixtures2/2798588
https://vgls.betradar.com/vfl/feeds/?/bet9javirtuals/en/Asia:Dha

In [17]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score

# Load the datasets
matches_data = pd.read_csv('matches_dataset.csv')
season_matches_data = pd.read_csv('season_matches_dataset.csv')

# Feature selection
features = ['home_team_abbr', 'away_team_abbr']

# Creating a feature matrix X and target variables y_home and y_away
X = matches_data[features]
y_home = matches_data['home_score']
y_away = matches_data['away_score']

# Split the data into training and testing sets
X_train, X_test, y_home_train, y_home_test, y_away_train, y_away_test = train_test_split(X, y_home, y_away, test_size=0.2, random_state=42)

# Define preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(), ['home_team_abbr', 'away_team_abbr'])
    ])

# Combine preprocessing steps with the model
model_home = Pipeline(steps=[('preprocessor', preprocessor),
                              ('regressor', RandomForestRegressor())])

model_away = Pipeline(steps=[('preprocessor', preprocessor),
                              ('regressor', RandomForestRegressor())])

# Train the models
model_home.fit(X_train, y_home_train)
model_away.fit(X_train, y_away_train)

# Make predictions on the season_matches_data
season_predictions_home = model_home.predict(season_matches_data[features])
season_predictions_away = model_away.predict(season_matches_data[features])

# Convert predictions to integers and create 'predicted_home_score' and 'predicted_away_score' columns
season_matches_data['predicted_home_score'] = season_predictions_home.round().astype(int)
season_matches_data['predicted_away_score'] = season_predictions_away.round().astype(int)

# Evaluate the models on the test set and add R2 scores to the output
model_home_score = r2_score(y_home_test, model_home.predict(X_test))
model_away_score = r2_score(y_away_test, model_away.predict(X_test))

# Display the predictions and model scores
# Print the DataFrame
print(season_matches_data[['home_team_abbr', 'away_team_abbr', 'predicted_home_score', 'predicted_away_score']])

# Save to a CSV file
season_matches_data[['home_team_abbr', 'away_team_abbr', 'predicted_home_score', 'predicted_away_score']].to_csv('predicted_scores.csv', index=False)

print(f"Home Score Model R2 Score: {model_home_score}")
print(f"Away Score Model R2 Score: {model_away_score}")


    home_team_abbr away_team_abbr  predicted_home_score  predicted_away_score
0              LVR            WVL                     2                     1
1              BUR            BOU                     1                     1
2              TOT            CRY                     3                     1
3              CHE            EVE                     2                     1
4              MBL            LON                     2                     1
..             ...            ...                   ...                   ...
235            TOT            BOU                     2                     2
236            BUR            EVE                     1                     1
237            WAT            MRE                     1                     2
238            SOU            LON                     1                     1
239            NEW            CRY                     1                     1

[240 rows x 4 columns]
Home Score Model R2 Score: 0.07662406909