In [1]:
import requests
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

print(" Fetching Premiership Rugby 2025 data...")

# Fetch the JSON data
url = "https://fixturedownload.com/feed/json/premiership-rugby-2025"
response = requests.get(url)
data = response.json()
df = pd.DataFrame(data)

# Clean and prepare the data
df = df.dropna(subset=["HomeTeamScore", "AwayTeamScore"])
df = df[df["HomeTeamScore"] != ""]
df = df[df["AwayTeamScore"] != ""]
df = df[["HomeTeam", "AwayTeam", "HomeTeamScore", "AwayTeamScore"]]
df = df.rename(columns={
    "HomeTeam": "Team_A",
    "AwayTeam": "Team_B",
    "HomeTeamScore": "Score_A",
    "AwayTeamScore": "Score_B"
})
df["Score_A"] = df["Score_A"].astype(int)
df["Score_B"] = df["Score_B"].astype(int)
df["Score_diff"] = df["Score_A"] - df["Score_B"]
df["Winner_flag"] = (df["Score_A"] > df["Score_B"]).astype(int)

print("Data cleaned successfully!")
print(df.head())

# Prepare features and labels
X = df[["Score_diff"]]
y = df["Winner_flag"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train models
models = {
    "DecisionTree": DecisionTreeClassifier(max_depth=3, random_state=42),
    "RandomForest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVC": SVC(kernel='linear', probability=True, random_state=42)
}

accuracies = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    acc = round(accuracy_score(y_test, model.predict(X_test)) * 100, 2)
    accuracies[name] = acc
    with open(f"{name}_model.pkl", "wb") as f:
        pickle.dump(model, f)
    print(f"Saved {name}_model.pkl — Accuracy: {acc}%")

# Save scaler
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)
print("Scaler saved as scaler.pkl")

# Save processed data
df.to_csv("rugby_data_report.csv", index=False)
print("Data saved as rugby_data.csv")

print("\nTraining complete!")
print("Model Accuracies:", accuracies)
print("\nYou can now run → streamlit run app.py")

 Fetching Premiership Rugby 2025 data...
Data cleaned successfully!
                Team_A            Team_B  Score_A  Score_B  Score_diff  \
0          Sale Sharks  Gloucester Rugby       27       10          17   
1  Newcastle Red Bulls          Saracens       31       47         -16   
2           Harlequins        Bath Rugby       17       39         -22   
3   Northampton Saints     Exeter Chiefs       33       33           0   
4        Bristol Bears  Leicester Tigers       42       24          18   

   Winner_flag  
0            1  
1            0  
2            0  
3            0  
4            1  
Saved DecisionTree_model.pkl — Accuracy: 100.0%
Saved RandomForest_model.pkl — Accuracy: 100.0%
Saved SVC_model.pkl — Accuracy: 100.0%
Scaler saved as scaler.pkl
Data saved as rugby_data.csv

Training complete!
Model Accuracies: {'DecisionTree': 100.0, 'RandomForest': 100.0, 'SVC': 100.0}

You can now run → streamlit run app.py
