# Python for data analytics – Project assessment

In [None]:
# imports
import pandas as pd

## Task 1 - Load your data

### Assignment Spec
> You should take your data from three files:
>  * data/afl/stats.csv
>  * data/afl/players.csv
>  * data/afl/games.csv
>
> And it should be loaded into a single dataframe by merging the three data sets.


### Explanation of Steps taken

TODO

In [None]:
stats = pd.read_csv("data/afl/stats.csv")
players = pd.read_csv("data/afl/players.csv", index_col="playerId")
games = pd.read_csv("data/afl/games.csv", index_col="gameId")

games_stats = games.join(stats.set_index("gameId"), on='gameId', lsuffix='_games', rsuffix='_stats')
games_stats_players = games_stats.join(players, on='playerId', lsuffix='_stats', rsuffix='_players')

# loaded into a single dataframe by merging the three data sets
games_stats_players


In [None]:
# globals for graph attributes
GRAPH_TITLE = ""
X_AXIS_LABEL = ""
Y_AXIS_LABEL = ""

# below helper functions reduce duplicate code as all graphs are roughly the same view

def set_graph_attributes(title, x_label, y_label):
    """Set global attributes to apply to graph
    Args:
        title (String): title of graph
        x_label (String): x axis label
        y_label (String): y axis label
    """
    global GRAPH_TITLE, X_AXIS_LABEL, Y_AXIS_LABEL

    GRAPH_TITLE = title
    X_AXIS_LABEL = x_label
    Y_AXIS_LABEL = y_label

def plot_player(player_name, display_legend=False):
    """ 
    1. Filters the games_stats_players dataframe by a specific player name
    2. Sorts decending by game number to correctly accumulate
    4. Plots the player

    Args:
        player_name (String): player name for which to graph
        display_legend (bool, optional): Toggles display of player on legend. Defaults to False.
    """
    # create mask for filtering
    player_mask = games_stats_players["displayName_stats"] == player_name

    # create dataframe & sort to allow for accumulation
    player_stats_desc = games_stats_players[player_mask].sort_values(by="gameNumber", ascending=True)

    # add a cumulative goals column
    player_stats_desc["Goals (Cumulative)"] = player_stats_desc["Goals"].cumsum()

    # create player series for graphing. reset_index() to normalise
    player_series = player_stats_desc["Goals (Cumulative)"].reset_index(drop=True)

    # graph series
    player_series.plot(label=player_name,xlabel=X_AXIS_LABEL, ylabel=Y_AXIS_LABEL, title=GRAPH_TITLE, legend=display_legend)



---

## Task 2 – Plot one player

### Assignment Spec

>For a particular player - say "Franklin, Lance" - plot their accumlation of goals over time. The x-axis should be the number of games played and the y-axis should be the number of goals accumulated. Thus we expect to see a line that monotonically increases, but in what way will depend on the player's career.


### Explanation of Steps taken

TODO

In [None]:
get_random_player = False

# Note: uncomment below line to graph a random player :)
#get_random_player = True

if get_random_player:
    player = games_stats_players["displayName_stats"].sample().values[0]
else:
    player = 'Franklin, Lance'

set_graph_attributes(f"{player} - Accumulation of goals over time","Goals", "Number of Games")

plot_player(player)


---

## Task 3 – Plot multiple players

### Assignment Spec
> In the one chart, plot the lines for the following players:
> - "Franklin, Lance"
> - "Papley, Tom"
> - "Mumford, Shane"
> - "Hooker, Cale".
>
> Plot each in a different colour so they can be distinguished and add a legend.


### Explanation of Steps taken

TODO

In [None]:
set_graph_attributes("Players - Accumulation of goals over time", "Goals", "Number of Games")

plot_player('Franklin, Lance', True)
plot_player('Papley, Tom', True)
plot_player('Mumford, Shane', True)
plot_player('Hooker, Cale', True)


---

## Task 4 – Linear Regression

### Assignment Spec
> Create a second plot showing just "Franklin, Lance" and "Hooker, Cale" but include the linear regression line for each. In other words. as well as showing their actual cummulative goals over time, plot their predicted goals over time where the prediction is done via Linear Regression. Be sure to use different colours for each line and include a legend.


### Explanation of Steps taken

TODO

In [None]:
#todo