In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score

# Load the dataset
mancity = pd.read_csv('../data/mancity23-24.csv', index_col=0)

# Data preprocessing
mancity['target'] = (mancity['Result'] == 'W').astype('int')
mancity['poss'] = mancity['poss'].astype('int')
mancity['passes'] = mancity['passes'].astype('int')

# Split the data into training and testing sets
train, test = train_test_split(mancity, test_size=0.2, random_state=42)

# Define predictors
predictors = ['poss', 'passes']

# Train the model
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)
rf.fit(train[predictors], train['target'])

# Make predictions
preds = rf.predict(test[predictors].values)

# Evaluate the model
accuracy = accuracy_score(test['target'], preds)
precision = precision_score(test['target'], preds)

# Display results
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')

# Create a DataFrame for actual vs predicted
combined = pd.DataFrame({'actual': test['target'], 'predicted': preds})
print(pd.crosstab(index=combined['actual'], columns=combined['predicted']))