<a href="https://colab.research.google.com/github/frankwillard/StatGPT/blob/main/VORP_Binary_Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [None]:
player_stats = pd.read_csv('players_stats.csv')
vorp = pd.read_csv('vorp_binary.csv')

In [None]:
train_data_with_player, test_data_with_player, train_labels_with_player, test_labels_with_player = train_test_split(player_stats, vorp, test_size=0.25, random_state=42)

In [None]:
train_data = train_data_with_player.iloc[:, 1:]
test_data = test_data_with_player.iloc[:, 1:]

train_labels = train_labels_with_player.iloc[:, 1]
test_labels = test_labels_with_player.iloc[:, 1]

In [None]:
# Initialize the logistic regression model
logreg_model = LogisticRegression()
decision_tree_model = DecisionTreeClassifier()
random_forest_model = RandomForestClassifier()
xgboost_model = XGBClassifier()

In [None]:
# Train the model
logreg_model.fit(train_data, train_labels)
decision_tree_model.fit(train_data, train_labels)
random_forest_model.fit(train_data, train_labels)
xgboost_model.fit(train_data, train_labels)

In [None]:
# Make predictions on the test set
logreg_predictions = logreg_model.predict(test_data)
decision_tree_predictions = decision_tree_model.predict(test_data)
random_forest_predictions = random_forest_model.predict(test_data)
xgboost_predictions = xgboost_model.predict(test_data)

In [None]:
def evaluate(predictions):
  # Print the evaluation metrics
  accuracy = accuracy_score(test_labels, predictions)
  conf_matrix = confusion_matrix(test_labels, predictions)
  classification_rep = classification_report(test_labels, predictions)

  print(f'Accuracy: {accuracy}')
  print(f'Confusion Matrix:\n{conf_matrix}')
  print(f'Classification Report:\n{classification_rep}')

In [None]:
evaluate(logreg_predictions)

Accuracy: 0.8410852713178295
Confusion Matrix:
[[334  43]
 [ 80 317]]
Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.89      0.84       377
           1       0.88      0.80      0.84       397

    accuracy                           0.84       774
   macro avg       0.84      0.84      0.84       774
weighted avg       0.84      0.84      0.84       774



In [None]:
evaluate(decision_tree_predictions)

Accuracy: 0.7454780361757106
Confusion Matrix:
[[282  95]
 [102 295]]
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.75      0.74       377
           1       0.76      0.74      0.75       397

    accuracy                           0.75       774
   macro avg       0.75      0.75      0.75       774
weighted avg       0.75      0.75      0.75       774



In [None]:
evaluate(random_forest_predictions)

Accuracy: 0.810077519379845
Confusion Matrix:
[[311  66]
 [ 81 316]]
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.82      0.81       377
           1       0.83      0.80      0.81       397

    accuracy                           0.81       774
   macro avg       0.81      0.81      0.81       774
weighted avg       0.81      0.81      0.81       774



In [None]:
evaluate(xgboost_predictions)

Accuracy: 0.7868217054263565
Confusion Matrix:
[[299  78]
 [ 87 310]]
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.79      0.78       377
           1       0.80      0.78      0.79       397

    accuracy                           0.79       774
   macro avg       0.79      0.79      0.79       774
weighted avg       0.79      0.79      0.79       774



In [None]:
test_data_with_player['pred'] = logreg_predictions

In [None]:
test_data_with_player

Unnamed: 0,Player,Points,Rebounds,Assists,Steals,Blocks,pred
2441,Rony Seikaly,14.7,9.5,1.3,0.7,1.3,1
2218,Paul Pressey,10.6,3.9,5.1,1.4,0.6,1
2245,Miroslav Raduljica,3.6,2.1,0.4,0.2,0.2,0
2624,Chris Taft,2.8,2.1,0.1,0.1,0.4,0
2637,Jeff Taylor,6.1,2.0,0.8,0.5,0.2,0
...,...,...,...,...,...,...,...
456,Vernon Carey Jr.,1.9,1.4,0.1,0.1,0.2,0
462,Cory Carr,4.1,1.2,1.6,0.5,0.2,0
2295,Jason Richardson,17.1,5.0,2.7,1.2,0.4,1
817,LaPhonso Ellis,11.9,6.5,1.6,0.7,0.8,1
