In [None]:
from sklearn import linear_model
import pandas as pd

In [None]:
def createPosModel(file, position):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = pd.read_csv(file)
    df = df.dropna()
    df = df[df['FantPos'] == position]
    
    # the model will use different independent variables depending on position
    if position == 'QB':
        X = df[['PassAtt/G','PassYds/G', 'PassTD/G', 'Int/G', 'RushAtt/G', 'RushYds/G',
                    'RushYds/Att', 'RushTD/G','TotTD/G','PPR/G']]
    elif position == 'RB':
        X = df[['Age', 'RushAtt/G', 'RushYds/G','RushYds/Att', 'RushTD/G', 'Tgt/G', 
                    'Rec/G', 'RecYds/G', 'Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    elif position == 'WR' or 'TE':
        X = df[['Tgt/G','Rec/G','RecYds/G','Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    else:
        print('Invalid position entered')
        return

    y = df['Next_Yr_PPG']
    reg = linear_model.LinearRegression()
    reg.fit(X, y)
    return reg

In [None]:
def testModelAccuracy(model, file, position):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = pd.read_csv(file)
    df = df.dropna()
    df = df[df['FantPos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[['PassAtt/G','PassYds/G', 'PassTD/G', 'Int/G', 'RushAtt/G', 'RushYds/G',
                    'RushYds/Att', 'RushTD/G','TotTD/G','PPR/G']]
    elif position == 'RB':
        XTest = df[['Age', 'RushAtt/G', 'RushYds/G','RushYds/Att', 'RushTD/G', 'Tgt/G', 
                    'Rec/G', 'RecYds/G', 'Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    elif position == 'WR' or 'TE':
        XTest = df[['Tgt/G','Rec/G','RecYds/G','Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    else:
        print('Invalid position entered')
        return

    yTest = df['Next_Yr_PPG']
    results = model.score(XTest, yTest)
    return results

In [None]:
def testModelDifference(model, file, position):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = pd.read_csv(file)
    df = df.dropna()
    df = df[df['FantPos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[['PassAtt/G','PassYds/G', 'PassTD/G', 'Int/G', 'RushAtt/G', 'RushYds/G',
                    'RushYds/Att', 'RushTD/G','TotTD/G','PPR/G']]
    elif position == 'RB':
        XTest = df[['Age', 'RushAtt/G', 'RushYds/G','RushYds/Att', 'RushTD/G', 'Tgt/G', 
                    'Rec/G', 'RecYds/G', 'Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    elif position == 'WR' or 'TE':
        XTest = df[['Tgt/G','Rec/G','RecYds/G','Yds/Rec','RecTD/G','TotTD/G','PPR/G']]
    else:
        print('Invalid position entered')
        return


    yPred = model.predict(XTest)
    predAndActual = {'Name': df['Player'], 'Predicted PPG': yPred,
                     'Actual PPG': df['Next_Yr_PPG']}

    # creates df from dictionary above
    database = pd.DataFrame(predAndActual)

    # creates a difference column which depicts the difference between the
    # predicted PPG and actual PPG
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals=3)
    database['Difference'] = database['Predicted PPG'] - database['Actual PPG']
    database['Difference'] = database['Difference'].round(decimals=3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)

    return database, meanDiff, medianDiff, meanAbsDiff, medianAbsDiff

In [None]:
def testModel(model, testCSV, trainingCSV, position):
    accuracy = testModelAccuracy(model, trainingCSV, position)
    differences = testModelDifference(model, testCSV, position)
    meanDiff = differences[1]
    medDiff = differences[2]
    meanAbsDiff = differences[3]
    medAbsDiff = differences[4]

    print('The accuracy of the {0} model is {1}'.format(position, accuracy))
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medDiff, medAbsDiff))
    print('\n')
    return