In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the data
file_path = 'asiacup.csv'
data = pd.read_csv(file_path)

# Select the relevant features and target
features = ['Run Scored', 'Wicket Lost', 'Fours', 'Sixes', 'Extras', 'Run Rate', 
            'Avg Bat Strike Rate', 'Highest Score', 'Wicket Taken', 'Given Extras', 
            'Highest Individual wicket']
target = 'Result'

# Encode the target variable (Win=1, Lose=0)
data[target] = data[target].map({'Win': 1, 'Lose': 0})

# Handle missing values only for numeric columns by filling with the column mean
numeric_cols = data.select_dtypes(include=[np.number]).columns
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())

# Split data into input features (X) and target (y)
X = data[features].values
y = data[target].values

# Standardize the feature values (normalization)
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Delta Rule implementation
def delta_rule(X, y, weights, learning_rate, epochs):
    for epoch in range(epochs):
        for i in range(len(X)):
            # Predict the output
            y_pred = np.dot(X[i], weights)
            
            # Calculate the error (difference between actual and predicted output)
            error = y[i] - y_pred
            
            # Update the weights using the Delta Rule: W_new = W_old + (learning_rate * error * X)
            weights += learning_rate * error * X[i]
    return weights

# Initialize weights to zeros
weights = np.zeros(X_train.shape[1])

# Set the learning rate and the number of epochs
learning_rate = 0.01
epochs = 100

# Train the model using the Delta Rule
weights = delta_rule(X_train, y_train, weights, learning_rate, epochs)

# Prediction function (apply threshold to classify)
def predict(X, weights):
    return np.where(np.dot(X, weights) >= 0, 1, 0)

# Predict on the test data
y_pred = predict(X_test, weights)

# Ensure y_test and y_pred are integers (binary labels 0 or 1)
y_test = y_test.astype(int)
y_pred = y_pred.astype(int)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred) * 100

# Print results
print("Trained Weights: ", weights)
print(f"Accuracy: {accuracy}%")


Trained Weights:  [ 0.04888948 -0.21414223  0.0722837   0.00235203  0.0343876  -0.054714
  0.02253954  0.03833222  0.21205605  0.0044195   0.13375513]
Accuracy: 78.43137254901961%
