In [None]:
# Problem 2 - Sports Team Optimization
# You are in charge of assembling a sports team with a budget constraint. You wish to pick
# the team that gives you the best chance of winning (by scoring the most points). Use the
# provided data to create a team picking system that optimizes for expected points,
# constrained by budget (B – nonnegative float), and number of players on a team (lower
# bound M, upper bound N – nonnegative integers).
# More formally:
# Maximize:
# - Expected points
# Subject to:
#  - Total cost (sum of salaries paid) <= B
# Number of players chosen <= N
# - Number of players chosen >= M
# Output: Work in a Jupyter notebook, R markdown file, or script detailing your process

In [37]:
# importing the initial libraries/packages required for reading in the data and performing based transformations

%matplotlib notebook
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns # rich visualizations

# showing the folder structure/path to raw data
# import os
# for dirname, _, filenames in os.walk('/Users/alexanderblaies/Desktop'):
    # for filename in filenames:
       # print(os.path.join(dirname, filename))

In [38]:
# initial inspection
sports_df = pd.read_csv('/Users/alexanderblaies/Desktop/sports_data.csv')
sports_df.head(10)

Unnamed: 0.1,Unnamed: 0,player_id,position,salary,expected_points
0,0,412,A,11900,41.5
1,1,239,B,11700,78.6
2,2,2,B,11500,51.4
3,3,240,C,10400,68.8
4,4,241,A,10000,46.1
5,5,3,B,9900,53.1
6,6,413,B,9400,53.0
7,7,414,A,9400,20.4
8,8,5,A,9300,61.3
9,9,11,C,9200,33.1


In [39]:
# dropping the "Unnamed: 0" column; looks good
sports_df.drop('Unnamed: 0', axis = 1, inplace = True)
sports_df.head(5)

Unnamed: 0,player_id,position,salary,expected_points
0,412,A,11900,41.5
1,239,B,11700,78.6
2,2,B,11500,51.4
3,240,C,10400,68.8
4,241,A,10000,46.1


In [40]:
# checking the data types to ensure compatibility
sports_df.dtypes

player_id            int64
position            object
salary               int64
expected_points    float64
dtype: object

In [41]:
# checking the summary statistics
sports_df.describe()

Unnamed: 0,player_id,salary,expected_points
count,219.0,219.0,219.0
mean,226.936073,5205.022831,21.464384
std,157.768253,1873.23553,14.123291
min,2.0,3500.0,0.0
25%,79.5,3700.0,11.75
50%,228.0,4500.0,19.7
75%,368.5,6400.0,28.9
max,511.0,11900.0,78.6


In [42]:
# just double checking to ensure there are no null values
sports_df.isnull().any()

player_id          False
position           False
salary             False
expected_points    False
dtype: bool

In [43]:
# let's go ahead and build the optimization model using the following variables; EDA would be somewhat valuable here, but
# it is not required to build the model 
# maximize P (points)
# constraints: B (budget - summation of salaries), M (lower bound of players available), N (upper bound of players available)
B_val = float(input("Enter your value for total budget available: "))
M_val = int(input("Enter your value for the lower bound of players available: "))
N_val = int(input("Enter your value for the upper bound of players available: "))


Enter your value for total budget available: 23234545
Enter your value for the lower bound of players available: 4
Enter your value for the upper bound of players available: 6


In [46]:
# now, I could be a complex algo from scratch, but for the sake of time, I'll use libraries that are available
# that's where the "pulp" library comes into play
# importing the solver/optimizer
import pulp
from pulp import *

# initializing an instance of the solver:
model = LpProblem('Maximize Points', sense= LpMaximize)

# defining the input array/decision variables
players = list(sports_df.index)
sports_vars = LpVariable.dicts("Sprts", players, 0)
T = LpVariable('T', lowBound=M_val, upBound=N_val, cat='Integer')

# objective function: maximize P given a number of players available between M and N that sums to <= B
model += (lpSum([sports_df.expected_points[i] for i in players]),"Total Points Expected",)

# creating the constraints
model += ((lpSum([sports_df.salary[i] for i in players]) * T) <= B_val, "Budget Requirement")

# solving the model 
model.solve()
# model =+ players_available >= M_val
# model =+ players_available <= N_val

1