In [1]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

# Load dataset (replace 'file_path' with actual file path)
file_path = 'epl-training_basic.csv'
data = pd.read_csv(file_path)

# Encode categorical variables
le_team = LabelEncoder()
le_ftr = LabelEncoder()

# Combine unique teams for consistent encoding
unique_teams = pd.concat([data_cleaned['HomeTeam'], data_cleaned['AwayTeam']]).unique()
le_team.fit(unique_teams)

# Apply encoding
data_cleaned['HomeTeam'] = le_team.transform(data_cleaned['HomeTeam'])
data_cleaned['AwayTeam'] = le_team.transform(data_cleaned['AwayTeam'])
data_cleaned['FTR'] = le_ftr.fit_transform(data_cleaned['FTR'])

# Define features (X) and target (y)
X = data_cleaned[['HomeTeam', 'AwayTeam']].values
y = data_cleaned['FTR'].values

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the SVM model
model = SVC(kernel='rbf', C=1, gamma='scale')
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

# Example prediction for new input
new_input = [[le_team.transform(['Arsenal'])[0], le_team.transform(['Chelsea'])[0]]]  # Replace with actual teams
new_input_scaled = scaler.transform(new_input)
prediction = le_ftr.inverse_transform(model.predict(new_input_scaled))
print(f"Prediction for Arsenal vs Chelsea: {prediction[0]}")



NameError: name 'data_cleaned' is not defined