In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [2]:
#File path
file = "../tables/2021AP.xlsx"

#Set next week
next_week = 7

In [3]:
#Read in df
df = pd.read_excel(file)
df

Unnamed: 0,Team,Week,Rank,W,L,Winning Percentage,Opp. Rank,Opp. P5,Home,Result,Points Scored,Points Against,Margin,Next Week Rank,Movement
0,Alabama,1,1,0,0,1,14,1,0,W,44,13,31,1,0
1,Oklahoma,1,2,0,0,1,26,0,1,W,40,35,5,4,-2
2,Clemson,1,3,0,0,1,5,1,0,L,3,10,-7,6,-3
3,Ohio State,1,4,0,0,1,26,1,0,W,45,31,14,3,1
4,Georgia,1,5,0,0,1,3,1,0,W,10,3,7,2,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,NC State,7,22,4,1,0.8,,,,,,,,,
172,SMU,7,23,6,0,1,,,,,,,,,
173,San Diego State,7,24,5,0,1,,,,,,,,,
174,Texas,7,25,4,2,0.666667,,,,,,,,,


In [4]:
def prep_df(next_week):
    df = pd.read_excel(file)

    #1. Drop footer
    df.drop(index = (df.index.stop - 1), inplace = True)
    
    #2. Set week, by dropping later weeks
    for index, row in df.iterrows():
        if row['Week'] >= next_week:
            df.drop(index = index, inplace = True)
    
    #3. Convert W/L to binary values
    for index, row in df.iterrows():
        if row['Result'] == 'W':
            df.at[index, 'Result'] = 1
        else: 
            df.at[index, 'Result'] = 0

    #4. Drop null rows (teams with byes)
    df.dropna(inplace = True)

    #5. Drop unneeded cols
    df = df.drop(columns = ["Movement", "Team"])

    #6. Convert all cols to numeric
    for name, values in df.iteritems():
        df[name] = pd.to_numeric(values)

    #7. Group ranking changes into categories:
    #0 = dropped out, 1 = moved up, 2 = moved down, but didn't drop out, 3 = no change
    for index, row in df.iterrows():
        if row['Next Week Rank'] == 26:
            df.at[index, 'Next Week Rank Category'] = 0
        elif row['Next Week Rank'] < row['Rank']:
            df.at[index, 'Next Week Rank Category'] = 1
        elif row['Next Week Rank'] > row['Rank']:
            df.at[index, 'Next Week Rank Category'] = 2
        elif row['Next Week Rank'] == row['Rank']:
            df.at[index, 'Next Week Rank Category'] = 3     
            
    return df

In [5]:
def divide(df):
    #Divide df into features and target
    X = df.drop(columns = ['Next Week Rank', 'Next Week Rank Category'])
    y = df['Next Week Rank Category']
    
    return X, y

In [6]:
#Prep df
prep_df = prep_df(next_week)

#Divide into features/target
X, y = divide(prep_df)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 8)

In [8]:
#Decision Tree classifier
from sklearn import tree
dtc = tree.DecisionTreeClassifier(random_state = 8)
dtc.fit(X_train, y_train)
dtc.score(X_test, y_test)

0.6666666666666666

In [9]:
#Support Vector Machine classifier
from sklearn.svm import SVC
svm = SVC(random_state = 8)
svm.fit(X_train, y_train)
svm.score(X_test, y_test)

0.6666666666666666

In [10]:
tree_model_pred = {}
svm_model_pred = {}

In [11]:
#This loop runs the two models with different random states of the models, where both models have the same 
#random state. train_test_split has a constant random state.
for i in range(40, 50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 8)
    
    dtc = DecisionTreeClassifier(random_state = i)
    dtc.fit(X_train, y_train)
    tree_score = dtc.score(X_test, y_test)
    tree_model_pred[f'Random State (Models) = {i}'] = list(dtc.predict(X_test))
    
    svm = SVC(random_state = i)
    svm.fit(X_train, y_train)
    svm_score = svm.score(X_test, y_test)
    svm_model_pred[f'Random State (Models) = {i}'] = list(svm.predict(X_test))

    print(f'Random State (Models) = {i}\nDTC Score: {tree_score}\nSVM Score: {svm_score}\n')

Random State (Models) = 40
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models) = 41
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models) = 42
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models) = 43
DTC Score: 0.6111111111111112
SVM Score: 0.6666666666666666

Random State (Models) = 44
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models) = 45
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models) = 46
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models) = 47
DTC Score: 0.6111111111111112
SVM Score: 0.6666666666666666

Random State (Models) = 48
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models) = 49
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666



In [12]:
for key, value in tree_model_pred.items():
    print(key, ':\n', value, '\n')

Random State (Models) = 40 :
 [1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models) = 41 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models) = 42 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models) = 43 :
 [1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0] 

Random State (Models) = 44 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 2.0, 1.0, 3.0, 1.0, 0.

In [13]:
for key, value in svm_model_pred.items():
    print(key, ':\n', value, '\n')

Random State (Models) = 40 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models) = 41 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models) = 42 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models) = 43 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models) = 44 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.

In [14]:
for key in tree_model_pred:
    if tree_model_pred[key] == svm_model_pred[key]:
        print(key)
    else:
        print('!=')

!=
!=
!=
!=
!=
!=
!=
!=
!=
!=


In [15]:
tree_split_pred = {}
svm_split_pred = {}

In [16]:
#This loop runs the two models with a constant random state, where both models have the same random state. 
#The random state of train_test_split changes with each iteration.
for i in range(40, 50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = i)
    
    dtc = DecisionTreeClassifier(random_state = 8)
    dtc.fit(X_train, y_train)
    tree_score = dtc.score(X_test, y_test)
    tree_split_pred[f'Random State (train_test_split) = {i}'] = list(dtc.predict(X_test))
    
    svm = SVC(random_state = 8)
    svm.fit(X_train, y_train)
    svm_score = svm.score(X_test, y_test)
    svm_split_pred[f'Random State (train_test_split) = {i}'] = list(svm.predict(X_test))
    
    print(f'Random State (train_test_split) = {i}\nDTC Score: {tree_score}\nSVM Score: {svm_score}\n')

Random State (train_test_split) = 40
DTC Score: 0.6111111111111112
SVM Score: 0.5833333333333334

Random State (train_test_split) = 41
DTC Score: 0.6666666666666666
SVM Score: 0.5833333333333334

Random State (train_test_split) = 42
DTC Score: 0.4444444444444444
SVM Score: 0.6111111111111112

Random State (train_test_split) = 43
DTC Score: 0.6111111111111112
SVM Score: 0.5555555555555556

Random State (train_test_split) = 44
DTC Score: 0.5833333333333334
SVM Score: 0.7222222222222222

Random State (train_test_split) = 45
DTC Score: 0.6666666666666666
SVM Score: 0.5833333333333334

Random State (train_test_split) = 46
DTC Score: 0.6944444444444444
SVM Score: 0.6666666666666666

Random State (train_test_split) = 47
DTC Score: 0.6111111111111112
SVM Score: 0.7222222222222222

Random State (train_test_split) = 48
DTC Score: 0.6111111111111112
SVM Score: 0.6666666666666666

Random State (train_test_split) = 49
DTC Score: 0.7222222222222222
SVM Score: 0.6666666666666666



In [17]:
for key, value in tree_split_pred.items():
    print(key, ':\n', value, '\n')

Random State (train_test_split) = 40 :
 [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 1.0, 3.0, 0.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 3.0, 0.0, 1.0, 1.0, 0.0, 2.0, 2.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 3.0, 1.0] 

Random State (train_test_split) = 41 :
 [3.0, 1.0, 0.0, 3.0, 1.0, 2.0, 1.0, 1.0, 0.0, 3.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 0.0, 3.0, 1.0, 1.0, 1.0, 3.0, 1.0] 

Random State (train_test_split) = 42 :
 [3.0, 0.0, 2.0, 3.0, 2.0, 1.0, 3.0, 1.0, 0.0, 2.0, 3.0, 3.0, 0.0, 1.0, 3.0, 1.0, 2.0, 0.0, 3.0, 3.0, 2.0, 1.0, 0.0, 0.0, 1.0, 3.0, 2.0, 2.0, 3.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 0.0] 

Random State (train_test_split) = 43 :
 [3.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0, 1.0, 1.0, 3.0, 3.0, 1.0, 0.0, 2.0, 1.0, 2.0, 0.0, 1.0, 1.0, 3.0, 0.0, 1.0, 3.0, 3.0, 3.0, 0.0, 3.0, 2.0, 1.0, 0.0, 3.0, 2.0, 1.0] 

Random State (train_test_split) = 44 :
 [1.0, 2.0, 2.0, 2.0, 3.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.

In [18]:
for key, value in svm_split_pred.items():
    print(key, ':\n', value, '\n')

Random State (train_test_split) = 40 :
 [2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 0.0, 0.0, 2.0, 1.0, 1.0] 

Random State (train_test_split) = 41 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0] 

Random State (train_test_split) = 42 :
 [1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 2.0, 1.0, 0.0, 1.0, 1.0] 

Random State (train_test_split) = 43 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 2.0, 1.0, 0.0, 0.0, 1.0, 1.0, 2.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 2.0, 1.0, 0.0, 1.0, 2.0, 1.0] 

Random State (train_test_split) = 44 :
 [1.0, 1.0, 2.0, 2.0, 3.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 3.

In [19]:
for key in tree_split_pred:
    if tree_split_pred[key] == svm_split_pred[key]:
        print(key)
    else:
        print('!=')

!=
!=
!=
!=
!=
!=
!=
!=
!=
!=


In [20]:
tree_ind_pred = {}
svm_ind_pred = {}

In [21]:
#This loop runs the two models with a different random states from each other and different states on each iteration. 
#The random state of train_test_split is constant.
for i in range(40, 50):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 8)
    
    dtc = DecisionTreeClassifier(random_state = i)
    dtc.fit(X_train, y_train)
    tree_score = dtc.score(X_test, y_test)
    tree_ind_pred[f'Random State (Models-Independent) = {i}'] = list(dtc.predict(X_test))
    
    svm = SVC(random_state = (2 * i))
    svm.fit(X_train, y_train)
    svm_score = svm.score(X_test, y_test)
    svm_ind_pred[f'Random State (Models-Independent) = {i}'] = list(svm.predict(X_test))
    
    print(f'Random State (Models-Independent) = {i}, {2 * i}\nDTC Score: {tree_score}\nSVM Score: {svm_score}\n')

Random State (Models-Independent) = 40, 80
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 41, 82
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 42, 84
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 43, 86
DTC Score: 0.6111111111111112
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 44, 88
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 45, 90
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 46, 92
DTC Score: 0.6388888888888888
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 47, 94
DTC Score: 0.6111111111111112
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 48, 96
DTC Score: 0.6666666666666666
SVM Score: 0.6666666666666666

Random State (Models-Independent) = 49, 98
DTC Score: 0.63888888

In [22]:
for key, value in tree_ind_pred.items():
    print(key, ':\n', value, '\n')

Random State (Models-Independent) = 40 :
 [1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models-Independent) = 41 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models-Independent) = 42 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 2.0, 1.0, 3.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 3.0, 1.0, 1.0, 3.0, 2.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 2.0] 

Random State (Models-Independent) = 43 :
 [1.0, 3.0, 2.0, 2.0, 2.0, 2.0, 2.0, 1.0, 2.0, 3.0, 3.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 3.0, 0.0, 1.0, 1.0, 3.0, 1.0, 0.0, 2.0, 1.0, 1.0, 1.0, 0.0, 3.0, 1.0, 2.0, 0.0, 2.0, 1.0, 3.0] 

Random State (Models-Independent) = 44 :
 [1.0, 3.0, 2.0, 3.0, 2.0, 2.0, 3.0, 1.0, 3.0, 3.0, 3.0, 0.

In [23]:
for key, value in svm_ind_pred.items():
    print(key, ':\n', value, '\n')

Random State (Models-Independent) = 40 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models-Independent) = 41 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models-Independent) = 42 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models-Independent) = 43 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 1.0, 1.0] 

Random State (Models-Independent) = 44 :
 [1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 2.0, 1.0, 1.0, 0.

In [24]:
for key in tree_ind_pred:
    if tree_ind_pred[key] == svm_ind_pred[key]:
        print(key)
    else:
        print('!=')

!=
!=
!=
!=
!=
!=
!=
!=
!=
!=
