# Python for data analytics – Project assessment

In [None]:
# imports
import pandas as pd
import numpy as np

## Task 1 - Load your data

### Assignment Spec
> You should take your data from three files:
>  * data/afl/stats.csv
>  * data/afl/players.csv
>  * data/afl/games.csv
>
> And it should be loaded into a single dataframe by merging the three data sets.


### Explanation of Steps taken

TODO

In [None]:
stats = pd.read_csv("data/afl/stats.csv")
players = pd.read_csv("data/afl/players.csv", index_col="playerId")
games = pd.read_csv("data/afl/games.csv", index_col="gameId")

games_stats = games.join(stats.set_index("gameId"), on='gameId', lsuffix='_games', rsuffix='_stats')
games_stats_players = games_stats.join(players, on='playerId', lsuffix='_stats', rsuffix='_players')

# loaded into a single dataframe by merging the three data sets
games_stats_players


In [None]:
# globals for plot attributes
PLOT_TITLE      = ""
X_AXIS_LABEL    = ""
Y_AXIS_LABEL    = ""
SHOW_LEGEND     = False

# below helper functions reduce duplicate code as all plots are roughly the same view

def set_plot_attributes(title, x_label, y_label, show_legend=False):
    """Set global attributes to apply to plot
    
    Args:
        title (String): title of plot
        x_label (String): x axis label
        y_label (String): y axis label
        show_legend (bool, optional): Toggles display of legend on plots
    """
    global PLOT_TITLE, X_AXIS_LABEL, Y_AXIS_LABEL, SHOW_LEGEND

    PLOT_TITLE = title
    X_AXIS_LABEL = x_label
    Y_AXIS_LABEL = y_label
    SHOW_LEGEND = show_legend

def plot_player(player_name, add_linear_regression=False):
    """ 
    1. Filters the games_stats_players dataframe by a specific player name
    2. Sorts descending by game number to correctly accumulate
    4. Plots the player

    Args:
        player_name (String): player name for which to plot
        add_linear_regression (bool, optional): Toggles display of linear regression line. Defaults to False.
    """
    # create mask for filtering
    player_mask = games_stats_players["displayName_stats"] == player_name

    # create dataframe & sort to allow for accumulation
    player_stats_desc = games_stats_players[player_mask].sort_values(by="gameNumber", ascending=True)

    # add a cumulative goals column
    player_stats_desc["Goals (Cumulative)"] = player_stats_desc["Goals"].cumsum()

    # reset_index() to normalise
    player_series = player_stats_desc.reset_index(drop=True)

    # plot cumulative goals series
    player_series["Goals (Cumulative)"].plot(label=player_name,xlabel=X_AXIS_LABEL, ylabel=Y_AXIS_LABEL, title=PLOT_TITLE, legend=SHOW_LEGEND)
    
    #optionally add a linear regression line
    if add_linear_regression:
        regression_label = f"{player_name} - Goals (Predictions)"
        x = player_series.index
        y = player_series["Goals (Cumulative)"]
        
        coefficients = np.polyfit(x, y, 1)
        poly1d_fn = np.poly1d(coefficients)
        
        player_series["Goals (Predictions)"] = poly1d_fn(x)
        player_series["Goals (Predictions)"].plot(label=regression_label, legend=SHOW_LEGEND)



---

## Task 2 – Plot one player

### Assignment Spec

>For a particular player - say "Franklin, Lance" - plot their accumlation of goals over time. The x-axis should be the number of games played and the y-axis should be the number of goals accumulated. Thus we expect to see a line that monotonically increases, but in what way will depend on the player's career.


### Explanation of Steps taken

TODO

In [None]:
get_random_player = False

# Note: uncomment below line to plot a random player :)
#get_random_player = True

if get_random_player:
    player = games_stats_players["displayName_stats"].sample().values[0]
else:
    player = 'Franklin, Lance'

set_plot_attributes(title=f"{player} - Accumulation of goals over time", x_label="Goals", y_label="Number of Games")

plot_player(player_name=player)



---

## Task 3 – Plot multiple players

### Assignment Spec
> In the one chart, plot the lines for the following players:
> - "Franklin, Lance"
> - "Papley, Tom"
> - "Mumford, Shane"
> - "Hooker, Cale".
>
> Plot each in a different colour so they can be distinguished and add a legend.


### Explanation of Steps taken

TODO

In [None]:
add_regression = False

# Note: uncomment below line to plot regression lines for all players
#add_regression = True

set_plot_attributes(title="Players - Accumulation of goals over time", x_label="Goals", y_label="Number of Games", show_legend=True)

plot_player(player_name='Franklin, Lance', add_linear_regression=add_regression)
plot_player(player_name='Papley, Tom'    , add_linear_regression=add_regression)
plot_player(player_name='Mumford, Shane' , add_linear_regression=add_regression)
plot_player(player_name='Hooker, Cale'   , add_linear_regression=add_regression)



---

## Task 4 – Linear Regression

### Assignment Spec
> Create a second plot showing just "Franklin, Lance" and "Hooker, Cale" but include the linear regression line for each. In other words. as well as showing their actual cummulative goals over time, plot their predicted goals over time where the prediction is done via Linear Regression. Be sure to use different colours for each line and include a legend.


### Explanation of Steps taken

TODO

In [None]:
get_random_player = False

# Note: uncomment below line to plot a random player :)
#get_random_player = True

if get_random_player:
    player = games_stats_players["displayName_stats"].sample().values[0]
else:
    player = 'Franklin, Lance'

set_plot_attributes(f"{player} - Accumulation of goals over time","Goals", "Number of Games", show_legend=True)

plot_player(player, add_linear_regression=True)