In [None]:
##################################################
### Author: Anthony Igel                       ###
### Team: Category Management Transformation   ###
### Project: Developing practical Python Tools ###
### Purpose: GoT Battles                       ###
### Date: 05/29/2018                           ###
##################################################

# https://github.com/chrisalbon/war_of_the_five_kings_dataset/blob/master/greatest_commander_analysis.ipynb

import sys
assert sys.version_info.major == 3

######################################################################
########                     Import Modules                   ########
######################################################################

import py_effo as py_effo
### pandas
# Pandas is for structured data operations and manipulations, extensively used for data preparation,
import pandas as pd

### numpy
# NumPy stands for Numerical Python, a library contains basic linear algebra functions, Fourier Transforms and advanced random
# number capabilities
import numpy as np

### Seaborn
# Seaborn is a Python visualization library based on Matplolib, providing high-level interface for statistcial graphing
# Seaborn supports numpy and pandas data structures as well as statistical routines from scipy and statsmodels
# Note: https://seaborn.pydata.org/introduction.html
import seaborn as sns

### Collections
from collections import Counter

### Matplotlib
# Matplotlib is a Python based plotting library with complete 2D support and limited 3D support
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
sns.set_style("white")

%matplotlib inline

In [None]:
######################################################################
########                     Import Data                      ########
######################################################################

### Import data sets
battles = pd.read_csv("//nfs/analysis/analysis/kroger/category_management_transformation/mini_hack_days/python/battles.csv")
### View the top rows of df.battles
battles.head(3)

In [None]:
######################################################################
########                    Data Preparation                  ########
######################################################################

######## Column Manipulation ########
### This will ensure that all column names are stripped of whitespace
battles.rename(columns = lambda x: x.strip(), inplace = True)
### We can also adjust the case of our metrics table columns
battles.rename(columns = lambda x: x.lower(), inplace = True)

######## New Attributes ########
# 1. defender_count – Number of major houses on defending side
# 2. attacker_count – Number of major houses on attacking side
# 3. att_comm_count – Number of commanders on attacking side

battles.loc[:, "defender_count"] = (4 - battles[["defender_1", "defender_2", "defender_3", "defender_4"]].isnull().sum(axis = 1))
battles.loc[:, "attacker_count"] = (4 - battles[["attacker_1", "attacker_2", "attacker_3", "attacker_4"]].isnull().sum(axis = 1))
battles.loc[:, "att_comm_count"] = [len(x) if type(x) == list else np.nan for x in battles.attacker_commander.str.split(",")]

### Can we write a function to do this
battles["major_death"] = battles["major_death"].fillna("")
battles["major_death"] = pd.to_numeric(battles.major_death, errors='coerce')

battles["major_capture"] = battles["major_capture"].fillna("")
battles["major_capture"] = pd.to_numeric(battles.major_capture, errors='coerce')

battles["attacker_size"] = battles["attacker_size"].fillna("")
battles["attacker_size"] = pd.to_numeric(battles.attacker_size, errors='coerce')

battles["defender_size"] = battles["defender_size"].fillna("")
battles["defender_size"] = pd.to_numeric(battles.defender_size, errors='coerce')

### Coerce data types for numeric variables
numerics = ["major_death", "major_capture", "attacker_size", "defended_size"]

In [None]:
######################################################################
########                   Data Exploration                   ########
######################################################################

### Major death/capture events by year
p = battles.groupby('year').sum()[["major_death", "major_capture"]].plot.bar(rot = 0)
_ = p.set(xlabel = "Year", ylabel = "No. of Death/Capture Events", ylim = (0, 9)), p.legend(["Major Deaths", "Major Captures"])
plt.show()

### How many houses were on the attacking side?
p = battles.attacker_count.value_counts().sort_index().plot.bar(rot = 0)
_ = p.set(xlabel = "No. of Major Attacker Houses", ylabel = "Count")
plt.show()

### Which pairs fought the most battles?
# Ignoring records where either attacker_king or defender_king is null. Also ignoring one record where both have the same value.
c = list(Counter([tuple(set(x)) for x in battles.dropna(subset = ["attacker_king", "defender_king"])[["attacker_king", "defender_king"]].values if len(set(x)) > 1]).items())
p = pd.DataFrame(c).sort_values(1).plot.barh(figsize = (10, 6))
_ = p.set(yticklabels = ["%s vs. %s" % (x[0], x[1]) for x in list(zip(*c))[0]], xlabel = "No. of Battles"), p.legend("")

### How many commanders did armies of different kings have?
q = sns.boxplot("att_comm_count", "attacker_king", data = battles, saturation = .6, fliersize = 10., palette = ["lightgray", sns.color_palette()[1], "grey", "darkblue"])
_ = q.set(xlabel = "No. of Attacker Commanders", ylabel = "Attacker King", xticks = range(8))
plt.show()

### How many major death/capture events occured in each region
data = battles.groupby("region").sum()[["major_death", "major_capture"]]
p = pd.concat([data, battles.region.value_counts().to_frame()], axis = 1).sort_values("region", ascending = False).copy(deep = True).plot.bar(color = [sns.color_palette()[1], "grey", "darkblue"], rot = 0)
_ = p.set(xlabel = "Region", ylabel = "No. of Events"), p.legend(["Major Deaths", "Major Captures", "No. of Battles"], fontsize = 12.)
plt.show()

In [None]:
######## Do larger armies always win? ########
### Create subset void of null values
data = battles.dropna(subset = ["attacker_size", "defender_size"]).copy(deep = True)

### Create new data frame with calculation and join back together
data = pd.concat([(data.attacker_size - data.defender_size).to_frame(), battles[["attacker_outcome"]]], axis = 1, join = "inner")
data = data[data[0] != 0]

### Plot results
p = data[0].plot.barh(figsize = (12, 8), width = .8, color = [sns.color_palette()[0] if x == "win" else sns.color_palette()[2] if x == "loss" else "white" for x in data.attacker_outcome.values])
_ = p.legend(handles = [mpatches.Patch(color = sns.color_palette()[0], label = "Victory", aa = True), mpatches.Patch(color = sns.color_palette()[2], label = "Loss", aa = True)])
_ = p.axvline(0, color = 'k'), p.set(yticklabels = battles.name.iloc[data.index].values, xlabel = "Difference in Army Size (attacker_size - defender_size)", ylabel = "Battle Name")

In [None]:
######################################################################
########                   Data Manipulation                  ########
######################################################################

######## Create a list of attacking commanders ########
attacker_list = []

# For each row in df.attacker_commander,
for row in battles['attacker_commander']:
    # if the cell is a string,
    if type(row) == str:
        # split up the names by the comma, 
        # and attach it to attacker commander list,
       attacker_list.append(row.split(','))
    # otherwise,     
    else:
        # do nothing
        continue

######## Create a list of defending commanders ########
defender_list = []
        
# For each row in df.defender_commander,
for row in battles['defender_commander']:
    # if the cell is a string,
    if type(row) == str:
        # split up the names by the comma, 
        # and attach it to defender commander list,
       defender_list.append(row.split(','))
    # otherwise
    else:
        # do nothing
        defender_list.append('')

In [None]:
######## Create a list of commanders ########
commander = []

# For each list in attacker_list,
for row in attacker_list:
    # and each list in that list
    for element in row:
        # strip of any leading blank space 
        # and append it to the commander list
        commander.append(element.lstrip())

# For each list in defender_list,
for row in defender_list:
    # and each list in that list
    for element in row:
        # strip of any leading blank space 
        # and append it to the commander list
        commander.append(element.lstrip())

In [None]:
### Convert the commander list into a 
## set of unique names, and convert it to a list
commanders = list(set(commander))

# Display the total number of commanders in the list
len(commanders)

In [None]:
######## Count the number of times a commander successfully attacked ########

# Create a list
attack_win = []

# for each commander:
for row in commanders:
    # create a score variable, then
    score = 0
    # go through each cell of attacker commander where the attacker wins:
    for x in battles['attacker_commander'][battles['attacker_outcome'] == 'win']:
        # if the cell is a string
        if type(x) == str:
            # and if the commander is in the cell
            if row in x:
                # add one to score
                score = score + 1
            # if not
            else:
                # add 0 to score
                score = score + 0
        # if the cell is not a string        
        else:
            # do nothing
            continue
    # append the score to the list
    attack_win.append(score)

In [None]:
######## Count the number of times a commander unsuccessfully attacked

# Create a list
attack_loss = []

# for each commander:
for row in commanders:
    # create a score variable
    score = 0
    # go through each cell of attacker commander where the attacker loses:
    for x in battles['attacker_commander'][battles['attacker_outcome'] == 'loss']:
        # if the cell is a string
        if type(x) == str:
            # and if the commander is in the cell
            if row in x:
                # add one to score
                score = score + 1
            # if not
            else:
                # add 0 to score
                score = score + 0
        # if the cell is not a string        
        else:
            # do nothing
            continue
    # append the score to the list
    attack_loss.append(score)

In [None]:
######## Count the number of times a commander successfully defended

# Create a list
defend_win = []

# for each commander:
for row in commanders:
    # create a score variable
    score = 0
    # go through each cell of defender commander where the attacker losses:
    for x in battles['defender_commander'][battles['attacker_outcome'] == 'loss']:
        # if the cell is a string
        if type(x) == str:
            # and if the commander is in the cell
            if row in x:
                # add one to score
                score = score + 1
            # if not
            else:
                # add 0 to score
                score = score + 0
        # if the cell is not a string        
        else:
            # do nothing
            continue   
    # append the score to the list
    defend_win.append(score)

In [None]:
######## Count the number of times a commander unsuccessfully defended

# Create a list
defend_loss = []

# for each commander:
for row in commanders:
    # create a score variable
    score = 0
    # go through each cell of defender commander where the attacker wins:
    for x in battles['defender_commander'][battles['attacker_outcome'] == 'win']:
        # if the cell is a string
        if type(x) == str:
            # and if the commander is in the cell
            if row in x:
                # add one to score
                score = score + 1
            # if not
            else:
                # add 0 to score
                score = score + 0
        # if the cell is not a string        
        else:
            # do nothing
            continue
    # append the score to the list
    defend_loss.append(score)

In [None]:
# Create a dictionary of the four score lists
columns = {'attack_win':  attack_win, 
           'attack_loss': attack_loss,
           'defend_win': defend_win,
           'defend_loss': attack_loss}

# Create a dataframe from that dictionary, indexed by a commander's name
battle_record = pd.DataFrame(columns, index = commanders)

In [None]:
######################################################################
########                      Analysis                        ########
######################################################################

# Count the total attacks for for each commander
battle_record['total_attacks'] = battle_record['attack_win'] + battle_record['attack_loss']

# Count the total defends for for each commander
battle_record['total_defends'] = battle_record['defend_win'] + battle_record['defend_loss']

# Count the total wins for for each commander
battle_record['total_wins'] = battle_record['attack_win'] + battle_record['defend_win']

# Count the total losses for for each commander
battle_record['total_loss'] = battle_record['attack_loss'] + battle_record['defend_loss']

# Create a total number of battles for each commander
battle_record['total_battles'] = battle_record['attack_win'] + battle_record['attack_loss'] + battle_record['defend_win'] + battle_record['defend_loss']

# Create a win percentage score (total wins / total battles) for each commander
battle_record['win_percentage'] = battle_record['total_wins'] / battle_record['total_battles'] * 100

# Create a composite score (total number of wins minus total number of losses) for each commander
battle_record['composite_record'] = (battle_record['attack_win'] + battle_record['defend_win']) - (battle_record['attack_loss'] + battle_record['defend_loss'])

In [None]:
battles["attack_loss"] = pd.to_numeric(battle_record.attack_loss, errors='coerce')
battles["attack_win"] = pd.to_numeric(battle_record.attack_win, errors='coerce')
battles["defend_loss"] = pd.to_numeric(battle_record.defend_loss, errors='coerce')
battles["defend_win"] = pd.to_numeric(battle_record.defend_win, errors='coerce')
battles["total_attacks"] = pd.to_numeric(battle_record.total_attacks, errors='coerce')
battles["total_defends"] = pd.to_numeric(battle_record.total_defends, errors='coerce')
battles["total_wins"] = pd.to_numeric(battle_record.total_wins, errors='coerce')
battles["total_loss"] = pd.to_numeric(battle_record.total_loss, errors='coerce')
battles["total_battles"] = pd.to_numeric(battle_record.total_battles, errors='coerce')
battles["win_percentage"] = pd.to_numeric(battle_record.win_percentage, errors='coerce')
battles["composite_record"] = pd.to_numeric(battle_record.composite_record, errors='coerce')

In [None]:
battles

In [None]:
### Top 10 Most Active Commanders
battle_record.sort_values(by = 'total_battles', ascending = False).head(10)['total_battles']

In [None]:
### Top 10 Most Active Attackers
battle_record.sort_values(by = 'total_attacks', ascending = False).head(10)['total_attacks']

In [None]:
### Top 10 Most Active Defenders
battle_record.sort_values(by = 'total_defends', ascending = False).head(10)['total_defends']

In [None]:
### Top 10 Most Victorious Commanders
battle_record.sort_values(by = 'total_wins', ascending = False).head(10)['total_wins']

In [None]:
### Top 30 Most Losing Commanders
battle_record.sort_values(by = 'total_loss', ascending = False).head(30)['total_loss']

In [None]:
### Commanders With The Best Win Percentage
battle_record.sort_values(by = 'win_percentage', ascending = False).head(75)['win_percentage'][battle_record['win_percentage'] != float('Inf')]

In [None]:
### Top 10 Greatest Commanders
battle_record.sort_values(by = 'composite_record', ascending = False).head(10)['composite_record']

In [None]:
######################################################################
########                    Tie Breaker                       ########
######################################################################

# Create a list with the defender/attacker 
# ratios for each battle Jaime Lannister fought
jaime_ratio = []

# Create a list with the defender/attacker 
# ratios for each battle Robb Stark fought
robb_ratio = []

# Create a list with the defender/attacker 
# ratios for each battle Gregor Clegane fought
gregor_ratio = []

# Create a count variable
i = 0

# For each row in df.attacker_commander where the attacker wins,
for row in battles['attacker_commander'][battles['attacker_outcome'] == 'win']:
        # if the row is a string (this means we skip np.nan's, which are floats)
        if type(row) == str:
            # if the row contains this name
            if "Jaime Lannister" in row:
                # divide the size of the defender's army with the size of the attacker's army
                relative_size = battles['defender_size'][i] / battles['attacker_size'][i]
                jaime_ratio.append(relative_size)
            # if the row contains this name
            elif "Robb Stark" in row:
                # divide the size of the defender's army with the size of the attacker's army
                relative_size = battles['defender_size'][i] / battles['attacker_size'][i]
                robb_ratio.append(relative_size)
            # if the row contains this name
            elif "Gregor Clegane" in row:
                # divide the size of the defender's army with the size of the attacker's army
                relative_size = battles['defender_size'][i] / battles['attacker_size'][i]
                gregor_ratio.append(relative_size)
            # otherwise
            else:
                # do nothing
                continue
        
        # and add 1 to the counter variable
        i = i + 1

In [None]:
### Create an empty dataframe
top3_ratio = pd.DataFrame()

### Create three columns for the dataframe for each commander's ratios
top3_ratio['jaime'] = jaime_ratio
top3_ratio['robb'] = robb_ratio
top3_ratio['gregor'] = gregor_ratio

# Display the ratios of each commander's four battles
top3_ratio

In [None]:
######################################################################
########                       Results                        ########
######################################################################
# Print a plain English sentence that includes the mean ratio (rounded to two decimals)
print('On average, Jaime Lannister successfully attacked when the enemy has', top3_ratio['jaime'].mean().round(2), 'soldiers for every one of his own.')
print()
# Print a plain English sentence that includes the mean ratio (rounded to two decimals)
print('On average, Robb Stark successfully attacked when the enemy has', top3_ratio['robb'].mean().round(2), 'soldiers for every one of his own.')
print()
# Print a plain English sentence that includes the mean ratio (rounded to two decimals)
print('On average, Gregor Clegane successfully attacked when the enemy has', top3_ratio['gregor'].mean().round(2), 'soldiers for every one of his own.')