In [65]:
import torch
import torch.nn as nn
import torch.optim as optim

class RecommendationModel(nn.Module):
    def __init__(self, num_batters, num_bowlers, embedding_dim, num_numerical_features):
        super(RecommendationModel, self).__init__()

        # Embedding layers
        self.batter_embedding = nn.Embedding(num_batters, embedding_dim)
        self.bowler_embedding = nn.Embedding(num_bowlers, embedding_dim)

        # Fully connected layers
        input_size = embedding_dim * 2 + num_numerical_features  # 3 embeddings + numerical features
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)  # Output layer (e.g., predicting rating, click probability, etc.)

        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()  # Use sigmoid for binary classification

    def forward(self, batter_ids, bowler_ids, numerical_features):
        # Get embeddings
        user_emb = self.batter_embedding(batter_ids)
        item_emb = self.bowler_embedding(bowler_ids)

        # Concatenate all features
        x = torch.cat([user_emb, item_emb, numerical_features], dim=1)

        # Forward pass through fully connected layers
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))  # Output probability

        return x

In [66]:
import pandas as pd

dat = pd.read_csv('../../data/step_03/one_day_matches.csv')
print(len(dat))

num_batters = len(dat.batter_on_strike.unique())
num_bowlers = len(dat.bowler_on_strike.unique())
num_other_cols = len(dat.columns) - 3

from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler

# Convert user IDs to integer indices
batter_encoder = LabelEncoder()
bowler_encoder = LabelEncoder()
outcome_scaler = MinMaxScaler()
other_scaler = MinMaxScaler()

batter_encodings = batter_encoder.fit_transform(dat.batter_on_strike.unique())
bowler_encodings = bowler_encoder.fit_transform(dat.bowler_on_strike.unique())

batters_col = dat.batter_on_strike.values
bowlers_col = dat.bowler_on_strike.values
outcomes_col = outcome_scaler.fit_transform(dat.outcome.values.reshape(-1, 1))
other_cols = dat.drop(['batter_on_strike', 'bowler_on_strike', 'outcome'], axis=1).values
other_cols = other_scaler.fit_transform(other_cols)

20380


In [67]:
# create train, valid and test sets
from sklearn.model_selection import train_test_split

batters_train, batters_test, bowlers_train, bowlers_test, other_train, other_test, outcome_train, outcome_test = train_test_split(batters_col, bowlers_col, other_cols, outcomes_col, test_size=0.1)
batters_train, batters_valid, bowlers_train, bowlers_valid, other_train, other_valid, outcome_train, outcome_valid = train_test_split(batters_train, bowlers_train, other_train, outcome_train, test_size=0.2)

# Convert to PyTorch tensors
batters_train = torch.LongTensor(batter_encoder.transform(batters_train))
bowlers_train = torch.LongTensor(bowler_encoder.transform(bowlers_train))
other_train = torch.FloatTensor(other_train)
outcome_train = torch.FloatTensor(outcome_train)

batters_valid = torch.LongTensor(batter_encoder.transform(batters_valid))
bowlers_valid = torch.LongTensor(bowler_encoder.transform(bowlers_valid))
other_valid = torch.FloatTensor(other_valid)
outcome_valid = torch.FloatTensor(outcome_valid)

batters_test = torch.LongTensor(batter_encoder.transform(batters_test))
bowlers_test = torch.LongTensor(bowler_encoder.transform(bowlers_test))
other_test = torch.FloatTensor(other_test)
outcome_test = torch.FloatTensor(outcome_test)

In [68]:
# initialise model
model = RecommendationModel(num_batters, num_bowlers, 16, num_other_cols)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Training loop
num_epochs = 10
batch_size = 32

for epoch in range(num_epochs):
    for i in range(0, len(batters_train), batch_size):
        # Generate batch
        batch_batters = batters_train[i:i+batch_size]
        batch_bowlers = bowlers_train[i:i+batch_size]
        batch_other = other_train[i:i+batch_size]
        batch_outcomes = outcome_train[i:i+batch_size]

        # Forward pass
        outputs = model(batch_batters, batch_bowlers, batch_other)
        loss = criterion(outputs, batch_outcomes.view(-1, 1))

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(batters_train)}], Loss: {loss.item()}')
    
    # Validation loss
    with torch.no_grad():
        outputs = model(batters_valid, bowlers_valid, other_valid)
        loss = criterion(outputs, outcome_valid.view(-1, 1))
        print(f'Validation loss: {loss.item()}')
    
# Test loss

with torch.no_grad():
    outputs = model(batters_test, bowlers_test, other_test)
    loss = criterion(outputs, outcome_test.view(-1, 1))
    print(f'Test loss: {loss.item()}')

Validation loss: 0.02952091209590435
Validation loss: 0.02581561915576458
Validation loss: 0.02556523308157921
Validation loss: 0.025452548637986183
Validation loss: 0.025383658707141876
Validation loss: 0.025334089994430542
Validation loss: 0.02529221773147583
Validation loss: 0.025256101042032242
Validation loss: 0.02522486262023449
Validation loss: 0.025197356939315796
Test loss: 0.027515867725014687


In [70]:
with torch.no_grad():
    outputs = model(batters_test, bowlers_test, other_test) * dat.outcome.max()
    print(outputs.min(), outputs.max())

tensor(0.3120) tensor(1.6585)
