<a href="https://colab.research.google.com/github/VandanGupte101727/MAN-CITY-VS-WOLVES-/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, mean_squared_error

url1 = "https://www.football-data.co.uk/mmz4281/2223/E0.csv"
url2 = "https://www.football-data.co.uk/mmz4281/2324/E0.csv"
url3 = "https://www.football-data.co.uk/mmz4281/2425/E0.csv"

df1 = pd.read_csv(url1)
df2 = pd.read_csv(url2)
df3 = pd.read_csv(url3)

common_cols = list(set.intersection(*[set(df.columns) for df in [df1, df2, df3]]))
df1, df2, df3 = df1[common_cols], df2[common_cols], df3[common_cols]
df = pd.concat([df1, df2, df3], ignore_index=True)
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

X_class = df.drop(columns=['FTHG', 'FTAG', 'FTR'])
y_class = df['FTR']

X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X_class, y_class, test_size=0.2, random_state=42)

clf = RandomForestClassifier(
    n_estimators=300,
    max_depth=5,
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=42
)
clf.fit(X_train_c, y_train_c)

train_acc = accuracy_score(y_train_c, clf.predict(X_train_c))
test_acc = accuracy_score(y_test_c, clf.predict(X_test_c))

print(f"Train Accuracy (Match Outcome): {train_acc:.2f}")
print(f"Test Accuracy (Match Outcome): {test_acc:.2f}")

X_reg = df.drop(columns=['FTHG', 'FTAG'])
y_home, y_away = df['FTHG'], df['FTAG']

X_train_r, X_test_r, y_train_home, y_test_home = train_test_split(X_reg, y_home, test_size=0.2, random_state=42)
_, _, y_train_away, y_test_away = train_test_split(X_reg, y_away, test_size=0.2, random_state=42)

reg_home = RandomForestRegressor(
    n_estimators=300,
    max_depth=5,
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=42
)
reg_away = RandomForestRegressor(
    n_estimators=300,
    max_depth=5,
    min_samples_split=5,
    min_samples_leaf=3,
    random_state=42
)

reg_home.fit(X_train_r, y_train_home)
reg_away.fit(X_train_r, y_train_away)

mse_home = mean_squared_error(y_test_home, reg_home.predict(X_test_r))
mse_away = mean_squared_error(y_test_away, reg_away.predict(X_test_r))

print(f"Home Goals MSE: {mse_home:.2f}")
print(f"Away Goals MSE: {mse_away:.2f}")

sample_match_data = {col: [0] for col in X_train_r.columns}
sample_match_data['HomeTeam'] = [label_encoders['HomeTeam'].transform(['Man City'])[0]]
sample_match_data['AwayTeam'] = [label_encoders['AwayTeam'].transform(['Wolves'])[0]]
sample_match = pd.DataFrame(sample_match_data)

pred_result = clf.predict(sample_match[X_train_c.columns])[0]
pred_home_goals = reg_home.predict(sample_match)[0]
pred_away_goals = reg_away.predict(sample_match)[0]

pred_result_decoded = label_encoders['FTR'].inverse_transform([pred_result])[0]
print(f"Predicted Outcome: {pred_result_decoded}")
print(f"Predicted Score: {int(round(pred_home_goals))} - {int(round(pred_away_goals))}")


Train Accuracy (Match Outcome): 0.74
Test Accuracy (Match Outcome): 0.64
Home Goals MSE: 0.51
Away Goals MSE: 0.44
Predicted Outcome: A
Predicted Score: 1 - 2
