# Importing Libraries

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from statsmodels.miscmodels.ordinal_model import OrderedModel

# Setting up model dataframe

In [4]:
df2020 = pd.read_csv("data/df2020.csv")
df2021 = pd.read_csv("data/df2021.csv")
df2022 = pd.read_csv("data/df2022.csv")

In [5]:
df2020.drop(columns = df2020.columns[0], axis = 1, inplace = True)
df2021.drop(columns = df2021.columns[0], axis = 1, inplace = True)
df2022.drop(columns = df2022.columns[0], axis = 1, inplace = True)

In [6]:
df2020['Year'] = 2020
df2021['Year'] = 2021
df2022['Year'] = 2022

In [7]:
df = pd.concat([df2020,df2021,df2022])

In [8]:
df.to_csv('data/combineddata.csv', index = False)

In [9]:
df.columns

Index(['Player', 'Round', 'DraftNumber', 'PreTeam', 'urlsummary', 'PreType',
       'GP', 'GS', 'MIN', 'PTS', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
       'FTM', 'FTA', 'FT%', 'OFF', 'DEF', 'TRB', 'AST', 'STL', 'BLK', 'TOV',
       'PF', 'Position', 'Year'],
      dtype='object')

In [10]:
df = pd.read_csv('data/combineddata.csv')

In [11]:
modeldf = df.drop(columns = ['Player','Round','PreTeam','urlsummary'], axis = 1)

In [12]:
modeldf.reset_index(inplace = True)

In [13]:
modeldf = modeldf.drop(modeldf[modeldf['GP']==0].index)

In [14]:
modeldf.reset_index(inplace = True)

In [15]:
modeldf.drop(columns = 'index', axis = 1, inplace = True)

In [16]:
modeldf.drop(columns = 'Year', axis = 1, inplace = True)

In [17]:
modeldf.drop(columns = 'level_0', axis = 1, inplace = True)

In [18]:
modeldf.to_csv('data/modeldf.csv', index = False)

# Pre-processing

In [19]:
modeldf = pd.read_csv('data/modeldf.csv')

In [20]:
pos2 = []
for i in range(len(modeldf)):
    if modeldf['Position'][i] == 'G':
        pos2.append('PG')
    elif modeldf['Position'][i] == 'F':
        pos2.append('SF')
    elif modeldf['Position'][i] == 'FC':
        pos2.append('PF')
    elif modeldf['Position'][i] == 'GF':
        pos2.append('SG')
    else:
        pos2.append(modeldf['Position'][i])


In [21]:
pos3 = []
for j in range(len(modeldf)):
    if modeldf['Position'][j] == 'G':
        pos3.append('SG')
    elif modeldf['Position'][j] == 'F':
        pos3.append('PF')
    elif modeldf['Position'][j] == 'FC':
        pos3.append('C')
    elif modeldf['Position'][j] == 'GF':
        pos3.append('SF')
    else:
        pos3.append(modeldf['Position'][j])

In [22]:
modeldf['Position2'] = pos2
modeldf['Position3'] = pos3

In [23]:
pos4 = []
for k in range(len(modeldf)):
    if modeldf['Position2'][k] in ['PG', 'SG']:
        pos4.append('Backcourt')
    else:
        pos4.append('Frontcourt')

In [24]:
modeldf['Position4'] = pos4

In [25]:
categorical_columns = ['PreType', 'Position', 'Position2', 'Position3', 'Position4']

In [26]:
for col in categorical_columns:
    col_ohe = pd.get_dummies(modeldf[col], prefix = col)
    modeldf = pd.concat((modeldf,col_ohe), axis = 1).drop(col, axis = 1)

In [27]:
modeldf.to_csv('data/modeldfcategorical.csv',index = False)