<a href="https://colab.research.google.com/github/angwelo/ASSIGNMENT-/blob/main/premier_league_automation_and_fetching.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from datetime import datetime, timedelta, timezone
from zoneinfo import ZoneInfo
import requests
import pandas as pd
import numpy as np

API_KEY = "bd6e0b59ca634a5c8dd998c8542c603d"  # <- put your key here
BASE_URL = "https://api.football-data.org/v4"
HEADERS = {"X-Auth-Token": API_KEY}

COMPETITION = "PL"
LOOKBACK_DAYS = 540  # ~18 months of history
MAX_GOALS = 10       # Poisson summation upper bound (0..MAX_GOALS)

# -----------------------
# Date Helpers (Africa/Nairobi weekend)
# -----------------------
NAIROBI = ZoneInfo("Africa/Nairobi")
today_ke = datetime.now(NAIROBI).date()

# Get calendar dates for the matches
# weekday(): Mon=0 ... Sun=6
#use the modulo operator to remove negative numbers
days_to_sat = (5 - today_ke.weekday()) % 7
days_to_sun = (6 - today_ke.weekday()) % 7
sat_ke = today_ke + timedelta(days=days_to_sat)
sun_ke = today_ke + timedelta(days=days_to_sun)

# Convert to UTC ISO dates for API (API uses UTC date fences)
sat_utc = datetime.combine(sat_ke, datetime.min.time(), tzinfo=NAIROBI).astimezone(timezone.utc)
sun_utc = datetime.combine(sun_ke, datetime.max.time(), tzinfo=NAIROBI).astimezone(timezone.utc)

#Fetch matches for the Premier League between date_from and date_to (YYYY-MM-DD).
#status: None or one of {"FINISHED","SCHEDULED","IN_PLAY","TIMED"}
def fetch_matches(date_from: str, date_to: str, status=None):

    params = {"dateFrom": date_from, "dateTo": date_to}
    if status:
        params["status"] = status
    url = f"{BASE_URL}/competitions/{COMPETITION}/matches"
    r = requests.get(url, headers=HEADERS, params=params, timeout=30)
    r.raise_for_status()
    return r.json().get("matches", [])

def fetch_historical_matches(lookback_days=540):
    end_date = datetime.now(timezone.utc).date()
    start_date = end_date - timedelta(days=lookback_days)
    matches = fetch_matches(start_date.isoformat(), end_date.isoformat(), status="FINISHED")
    # Keep only matches with score info
    rows = []
    for m in matches:
        score = m.get("score", {})
        fulltime = score.get("fullTime", {}) or {}
        home_goals = fulltime.get("home", None)
        away_goals= fulltime.get("away", None)
        if home_goals is None or away_goals is None:
            continue
        rows.append({
            "utcDate": m["utcDate"],
            "homeTeam": m["homeTeam"]["name"],
            "awayTeam": m["awayTeam"]["name"],
            "homeGoals": int(home_goals),
            "awayGoals": int(away_goals),
        })
    return pd.DataFrame(rows)

def fetch_weekend_fixtures(sat_date_utc: datetime.date, sun_date_utc: datetime.date):
    games = fetch_matches(sat_date_utc.isoformat(), sun_date_utc.isoformat(), status="SCHEDULED")

    if not games:
        return pd.DataFrame(columns=["utcDate", "homeTeam", "awayTeam"])

    rows = []
    for m in games:
        rows.append({
            "utcDate": m["utcDate"],
            "homeTeam": m["homeTeam"]["name"],
            "awayTeam": m["awayTeam"]["name"],
        })

    return pd.DataFrame(rows)

historical_df = fetch_historical_matches(LOOKBACK_DAYS)
print(f"Fetched {len(historical_df)} historical matches")
print(historical_df.head())


weekend_df = fetch_weekend_fixtures(sat_utc.date(), sun_utc.date())
print(f"Fetched {len(weekend_df)} weekend fixtures")
print(weekend_df)




Fetched 511 historical matches
                utcDate             homeTeam              awayTeam  homeGoals  \
0  2024-03-16T15:00:00Z           Burnley FC          Brentford FC          2   
1  2024-03-16T15:00:00Z        Luton Town FC  Nottingham Forest FC          1   
2  2024-03-16T17:30:00Z            Fulham FC  Tottenham Hotspur FC          3   
3  2024-03-17T14:00:00Z   West Ham United FC        Aston Villa FC          1   
4  2024-03-30T12:30:00Z  Newcastle United FC    West Ham United FC          4   

   awayGoals  
0          1  
1          1  
2          0  
3          1  
4          3  
Fetched 0 weekend fixtures
Empty DataFrame
Columns: [utcDate, homeTeam, awayTeam]
Index: []


In [None]:
print(historical_df.tail(10))

                  utcDate                    homeTeam             awayTeam  \
501  2025-08-30T11:30:00Z                  Chelsea FC            Fulham FC   
502  2025-08-30T14:00:00Z              Sunderland AFC         Brentford FC   
503  2025-08-30T14:00:00Z        Manchester United FC           Burnley FC   
504  2025-08-30T14:00:00Z        Tottenham Hotspur FC      AFC Bournemouth   
505  2025-08-30T14:00:00Z  Wolverhampton Wanderers FC           Everton FC   
506  2025-08-30T16:30:00Z             Leeds United FC  Newcastle United FC   
507  2025-08-31T13:00:00Z   Brighton & Hove Albion FC   Manchester City FC   
508  2025-08-31T13:00:00Z        Nottingham Forest FC   West Ham United FC   
509  2025-08-31T15:30:00Z                Liverpool FC           Arsenal FC   
510  2025-08-31T18:00:00Z              Aston Villa FC    Crystal Palace FC   

     homeGoals  awayGoals  
501          2          0  
502          2          1  
503          3          2  
504          0          1  
5

In [None]:
# League averages (per match)
league_home_avg = historical_df["homeGoals"].mean()
league_away_avg = historical_df["awayGoals"].mean()
print(f"League home average: {league_home_avg}")
print(f"League away average: {league_away_avg}")


League home average: 1.5870841487279843
League away average: 1.4109589041095891


In [None]:
historical_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
homeGoals,511.0,1.587084,1.326853,0.0,1.0,1.0,2.0,7.0
awayGoals,511.0,1.410959,1.210816,0.0,1.0,1.0,2.0,6.0


In [None]:
historical_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 511 entries, 0 to 510
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   utcDate    511 non-null    object
 1   homeTeam   511 non-null    object
 2   awayTeam   511 non-null    object
 3   homeGoals  511 non-null    int64 
 4   awayGoals  511 non-null    int64 
dtypes: int64(2), object(3)
memory usage: 20.1+ KB


In [None]:
historical_df.shape

(511, 5)

In [None]:
# Group by team and compute goals scored/conceded

# Home stats
home_group = historical_df.groupby("homeTeam").agg(
    home_goals_for=("homeGoals", "mean"),
    home_goals_against=("awayGoals", "mean"),
    home_matches=("homeGoals", "count")
)

# Away stats
away_group = historical_df.groupby("awayTeam").agg(
    away_goals_for=("awayGoals", "mean"),
    away_goals_against=("homeGoals", "mean"),
    away_matches=("awayGoals", "count")
)

# Merge into one table
team_strengths = home_group.merge(away_group,
                                  left_index=True,
                                  right_index=True,
                                  how="outer").fillna(0)


print(team_strengths)

                            home_goals_for  home_goals_against  home_matches  \
homeTeam                                                                       
AFC Bournemouth                   1.320000            0.840000            25   
Arsenal FC                        2.080000            0.800000            25   
Aston Villa FC                    1.807692            1.230769            26   
Brentford FC                      1.840000            1.600000            25   
Brighton & Hove Albion FC         1.346154            1.500000            26   
Burnley FC                        1.333333            1.500000             6   
Chelsea FC                        2.148148            0.888889            27   
Crystal Palace FC                 1.720000            1.320000            25   
Everton FC                        1.400000            0.920000            25   
Fulham FC                         1.320000            1.600000            25   
Ipswich Town FC                   0.7368