In [15]:
import pandas as pd
from ggplot import *
import numpy as np
%matplotlib inline

# Set ipython's max row display
pd.set_option('display.max_row', 1000)

# Set iPython's max column width to 50
pd.set_option('display.max_columns', 50)

In [16]:
df = pd.read_csv('world_war.csv')

### Quick glance at the data

In [17]:
df.head(10)

Unnamed: 0,Name,Year,Assaulter,Defender,Assaulter_1,Defender_1,Assaulter_strength,Defender_strength,Outcome
0,Baltic Operation,1941,Nazi Germany,Soviet Union,,,787500,498000,win
1,Battle of Bialystok–Minsk,1941,Nazi Germany,Soviet Union,,,750000,675000,win
2,Battle of Brody,1941,Nazi Germany,Soviet Union,,,750,3500,win
3,Battle of Hanko,1941,Finland,Soviet Union,Finland,,25300,30000,loss
4,Demyansk Pocket,1942,Soviet Union,Nazi Germany,,,130000,400000,loss


In [18]:
#Create list of Assaulters
assaulter_list = []

for elem in df['Assaulter'].unique():
    if type(elem) == str:
        assaulter_list.append(elem)
    else:
        continue

#Create list of Defenders
defender_list = []

for elem in df['Defender'].unique():
    if type(elem) == str:
        defender_list.append(elem)
    else:
        continue

In [49]:
#Create a list of participants 
participants = []

participants = set(assaulter_list + defender_list)

#How many times has been successfully carried out the attack
wins_at = []

for elem in participants:
    score = 0
    for row in df['Assaulter'][df['Outcome'] == 'win']:
        if elem in row:
            score +=1
        else:
            continue
    wins_at.append(score)

data = {'Assaulter': list(participants),
        'Win': wins_at}
attack_win = pd.DataFrame(data)
wins_at

[0, 0, 3]

In [44]:
#How many times has been unsuccessfully carried out the attack
loss_at = []

for elem in participants:
    score = 0
    for row in df['Assaulter'][df['Outcome'] == 'loss']:
        if elem in row:
            score +=1
        else:
            continue
    loss_at.append(score)

data = {'Assaulter': list(participants),
        'Loss': loss_at}
attack_loss = pd.DataFrame(data)

In [45]:
#How many times has been successfully carried out protection
wins_pr = []

for elem in participants:
    score = 0
    for row in df['Defender'][df['Outcome'] == 'loss']:
        if elem in row:
            score +=1
        else:
            continue
    wins_pr.append(score)

data = {'Defender': list(participants),
        'Win': wins_pr}
protection_win = pd.DataFrame(data)

In [46]:
#How many times has been unsuccessfully carried out protection
loss_pr = []

for elem in participants:
    score = 0
    for row in df['Defender'][df['Outcome'] == 'win']:
        if elem in row:
            score +=1
        else:
            continue
    loss_pr.append(score)

data = {'Defender': list(participants),
        'Loss': loss_pr}
protection_loss = pd.DataFrame(data)


[0, 3, 0]

In [51]:
#Dictionary
data = {'attack_win': wins_at,
        'attack_loss': loss_at,
        'protection_win': wins_pr,
        'protection_loss': loss_pr, }

# Create a dataframe from the dictionary, indexed by a participant's name
battle_record = pd.DataFrame(data,index=participants)
battle_record

Unnamed: 0,attack_loss,attack_win,protection_loss,protection_win
Finland,1,0,0,0
Soviet Union,1,0,3,1
Nazi Germany,0,3,0,1


In [52]:
# Count the total attacks
battle_record['total_attacks'] = battle_record['attack_win'] + battle_record['attack_loss']

# Count the total defends
battle_record['total_defends'] = battle_record['protection_win'] + battle_record['protection_loss']

# Count the total wins
battle_record['total_wins'] = battle_record['attack_win'] + battle_record['protection_win']

# Count the total losses
battle_record['total_loss'] = battle_record['attack_loss'] + battle_record['protection_loss']

# Create a total number of battles
battle_record['total_battles'] = battle_record['total_attacks'] + battle_record['total_defends']

# Create a win percentage score (total wins / total battles)
battle_record['win_percentage'] = battle_record['total_wins'] / battle_record['total_battles'] * 100

# Create a composite score (total number of wins minus total number of losses)
battle_record['composite_record'] = battle_record['total_wins'] - battle_record['total_loss']

battle_record

Unnamed: 0,attack_loss,attack_win,protection_loss,protection_win,total_attacks,total_defends,total_wins,total_loss,total_battles,win_percentage,composite_record
Finland,1,0,0,0,1,0,0,1,1,0.0,-1
Soviet Union,1,0,3,1,1,4,1,4,5,20.0,-3
Nazi Germany,0,3,0,1,3,1,4,0,4,100.0,4


### Analysis

In [55]:
#Top 10 most active assaulters
battle_record.sort('total_attacks', ascending=False).head(10)['total_attacks']

  from ipykernel import kernelapp as app


Nazi Germany    3
Finland         1
Soviet Union    1
Name: total_attacks, dtype: int64

In [56]:
#Top 10 most active defenders
battle_record.sort_values('total_defends', ascending=False).head(10)['total_defends']

Soviet Union    4
Nazi Germany    1
Finland         0
Name: total_defends, dtype: int64

In [57]:
#Top 10 most victorious participants
battle_record.sort_values('total_wins', ascending=False).head(10)['total_wins']

Nazi Germany    4
Soviet Union    1
Finland         0
Name: total_wins, dtype: int64

In [58]:
#Top 10 most losing participants
battle_record.sort_values('total_loss', ascending=False).head(10)['total_loss']

Soviet Union    4
Finland         1
Nazi Germany    0
Name: total_loss, dtype: int64

In [60]:
#Participants with the best win percentage
battle_record.sort_values('win_percentage', ascending=False).head(75)['win_percentage'][battle_record['win_percentage'] != float('Inf')]

Nazi Germany    100.0
Soviet Union     20.0
Finland           0.0
Name: win_percentage, dtype: float64

### Сomparison of the three attackers

In [None]:
nazi_germany = []
soviet_union = []