In [1]:
import pandas as pd
from ggplot import *
import numpy as np
%matplotlib inline

# Set ipython's max row display
pd.set_option('display.max_row', 1000)

# Set iPython's max column width to 50
pd.set_option('display.max_columns', 50)

In [11]:
df = pd.read_csv('world_war.csv')

### Quick glance at the data

In [12]:
df.head(10)

Unnamed: 0,Name,Year,Location,Assaulter,Defender,Assaulter_1,Assaulter_2,Defender_1,As_strength_men,Def_strength_men,As_strength_guns,Def_strength_guns,Assaulter_losses,Defender_losses,Outcome
0,Battle of Mons,1914.0,"Mons, Belgium",German Empire,British Empire,,,,160000.0,80000.0,600.0,300.0,5000.0,1638.0,win
1,Battle of Liege,1914.0,"Liege, Belgium",German Empire,Belgium,,,,59800.0,36000.0,400.0,280.0,25000.0,15000.0,win
2,Battle of the Frontiers,1914.0,"Lorraine, the Ardennes and Belgium",Belgium,German Empire,France,British Empire,,1504000.0,1600000.0,4640.0,5000.0,165000.0,264680.0,loss
3,Togoland Campaign,1914.0,German Togoland,British Empire,German Empire,France,,,1100.0,693.0,,,137.0,41.0,win
4,Battle of Cer,1914.0,Cer Mountain,Austria-Hungary,Serbia,,,,200000.0,180000.0,,,18500.0,4785.0,loss
5,Battle of Tannenberg,1914.0,"Allenstein, East Prussia",Russian Empire,German Empire,,,,230000.0,150000.0,,,170000.0,13873.0,loss
6,First Battle of the Marne,1914.0,"Marne River, Paris",France,German Empire,British Empire,,,1071000.0,1485000.0,,,263000.0,256000.0,win
7,Battle of Drina,1914.0,Drina River,Austria-Hungary,Serbia,,,,,,,,,,loss
8,First Battle of Ypres,1914.0,"Ypres, Belgium",France,German Empire,Belgium,British Empire,,4400000.0,5400000.0,,,58155.0,46765.0,indecisive
9,Siege of Tsingtao,1914.0,"Tsingtau, Germany",British Empire,German Empire,Empire of Japan,,Austria-Hungary,24500.0,3750.0,142.0,,3000.0,700.0,win


In [13]:
#Create list of Assaulters
assaulter_list = []

for elem in df['Assaulter'].unique():
    if type(elem) == str:
        assaulter_list.append(elem)
    else:
        continue

#Create list of Defenders
defender_list = []

for elem in df['Defender'].unique():
    if type(elem) == str:
        defender_list.append(elem)
    else:
        continue

In [14]:
#Create a list of participants 
participants = []

participants = set(assaulter_list + defender_list)

#How many times has been successfully carried out the attack
wins_at = []

for elem in participants:
    score = 0
    for row in df['Assaulter'][df['Outcome'] == 'win']:
        if elem in row:
            score +=1
        else:
            continue
    wins_at.append(score)

data = {'Assaulter': list(participants),
        'Win': wins_at}
attack_win = pd.DataFrame(data)
wins_at

[1, 1, 2, 2, 0, 6, 6, 1, 0, 0, 0]

In [15]:
#How many times has been unsuccessfully carried out the attack
loss_at = []

for elem in participants:
    score = 0
    for row in df['Assaulter'][df['Outcome'] == 'loss']:
        if elem in row:
            score +=1
        else:
            continue
    loss_at.append(score)

data = {'Assaulter': list(participants),
        'Loss': loss_at}
attack_loss = pd.DataFrame(data)

In [16]:
#How many times has been successfully carried out protection
wins_pr = []

for elem in participants:
    score = 0
    for row in df['Defender'][df['Outcome'] == 'loss']:
        if elem in row:
            score +=1
        else:
            continue
    wins_pr.append(score)

data = {'Defender': list(participants),
        'Win': wins_pr}
protection_win = pd.DataFrame(data)

In [17]:
#How many times has been unsuccessfully carried out protection
loss_pr = []

for elem in participants:
    score = 0
    for row in df['Defender'][df['Outcome'] == 'win']:
        if elem in row:
            score +=1
        else:
            continue
    loss_pr.append(score)

data = {'Defender': list(participants),
        'Loss': loss_pr}
protection_loss = pd.DataFrame(data)


In [18]:
#Dictionary
data = {'attack_win': wins_at,
        'attack_loss': loss_at,
        'protection_win': wins_pr,
        'protection_loss': loss_pr, }

# Create a dataframe from the dictionary, indexed by a participant's name
battle_record = pd.DataFrame(data,index=participants)
battle_record

Unnamed: 0,attack_loss,attack_win,protection_loss,protection_win
Australia,0,1,0,0
Kingdom of Italy,2,1,0,0
France,2,2,0,0
Bulgaria,0,2,0,0
Russian Empire,1,0,2,0
German Empire,0,6,6,7
British Empire,7,6,3,0
Serbia,0,1,2,2
Austria-Hungary,2,0,2,2
Ottoman Empire,0,0,3,4


In [19]:
# Count the total attacks
battle_record['total_attacks'] = battle_record['attack_win'] + battle_record['attack_loss']

# Count the total defends
battle_record['total_defends'] = battle_record['protection_win'] + battle_record['protection_loss']

# Count the total wins
battle_record['total_wins'] = battle_record['attack_win'] + battle_record['protection_win']

# Count the total losses
battle_record['total_loss'] = battle_record['attack_loss'] + battle_record['protection_loss']

# Create a total number of battles
battle_record['total_battles'] = battle_record['total_attacks'] + battle_record['total_defends']

# Create a win percentage score (total wins / total battles)
battle_record['win_percentage'] = battle_record['total_wins'] / battle_record['total_battles'] * 100

# Create a composite score (total number of wins minus total number of losses)
battle_record['composite_record'] = battle_record['total_wins'] - battle_record['total_loss']

battle_record

Unnamed: 0,attack_loss,attack_win,protection_loss,protection_win,total_attacks,total_defends,total_wins,total_loss,total_battles,win_percentage,composite_record
Australia,0,1,0,0,1,0,1,0,1,100.0,1
Kingdom of Italy,2,1,0,0,3,0,1,2,3,33.333333,-1
France,2,2,0,0,4,0,2,2,4,50.0,0
Bulgaria,0,2,0,0,2,0,2,0,2,100.0,2
Russian Empire,1,0,2,0,1,2,0,3,3,0.0,-3
German Empire,0,6,6,7,6,13,13,6,19,68.421053,7
British Empire,7,6,3,0,13,3,6,10,16,37.5,-4
Serbia,0,1,2,2,1,4,3,2,5,60.0,1
Austria-Hungary,2,0,2,2,2,4,2,4,6,33.333333,-2
Ottoman Empire,0,0,3,4,0,7,4,3,7,57.142857,1


### Analysis

In [21]:
#Top 10 most active assaulters
battle_record.sort_values('total_attacks', ascending=False).head(10)['total_attacks']

British Empire      13
German Empire        6
France               4
Kingdom of Italy     3
Bulgaria             2
Austria-Hungary      2
Australia            1
Russian Empire       1
Serbia               1
Belgium              1
Name: total_attacks, dtype: int64

In [22]:
#Top 10 most active defenders
battle_record.sort_values('total_defends', ascending=False).head(10)['total_defends']

German Empire       13
Ottoman Empire       7
Serbia               4
Austria-Hungary      4
British Empire       3
Russian Empire       2
Belgium              1
Australia            0
Kingdom of Italy     0
France               0
Name: total_defends, dtype: int64

In [23]:
#Top 10 most victorious participants
battle_record.sort_values('total_wins', ascending=False).head(10)['total_wins']

German Empire       13
British Empire       6
Ottoman Empire       4
Serbia               3
France               2
Bulgaria             2
Austria-Hungary      2
Australia            1
Kingdom of Italy     1
Russian Empire       0
Name: total_wins, dtype: int64

In [24]:
#Top 10 most losing participants
battle_record.sort_values('total_loss', ascending=False).head(10)['total_loss']

British Empire      10
German Empire        6
Austria-Hungary      4
Russian Empire       3
Ottoman Empire       3
Kingdom of Italy     2
France               2
Serbia               2
Belgium              2
Australia            0
Name: total_loss, dtype: int64

In [25]:
#Participants with the best win percentage
battle_record.sort_values('win_percentage', ascending=False).head(75)['win_percentage'][battle_record['win_percentage'] != float('Inf')]

Australia           100.000000
Bulgaria            100.000000
German Empire        68.421053
Serbia               60.000000
Ottoman Empire       57.142857
France               50.000000
British Empire       37.500000
Kingdom of Italy     33.333333
Austria-Hungary      33.333333
Russian Empire        0.000000
Belgium               0.000000
Name: win_percentage, dtype: float64