# The First Plot

In [2]:
from bokeh.plotting import figure, output_file, show
from bokeh.io import output_notebook

In [3]:
x = [1, 2, 3, 4]
y = [2, 4, 6, 8]

In [4]:
output_notebook()

In [5]:
p = figure()

In [6]:
p.circle(x, y, size=10, color='red', legend='circle')
p.line(x, y, color='blue', legend='line')
p.triangle(y, x, color='gold', size=10, legend='triangle')

In [7]:
p.legend.click_policy='hide'
# could also use 'mute' instead of 'hide'

In [8]:
show(p)
# This should have been inline, need to figure out why it wasn't at sometime

# The WWII THOR Data

In [9]:
import pandas as pd

In [10]:
df = pd.read_csv('thor_wwii.csv')
print(df)

           MSNDATE      THEATER COUNTRY_FLYING_MISSION    NAF   UNIT_ID  \
0       03/30/1941          ETO          GREAT BRITAIN    RAF   84 SQDN   
1       11/24/1940          ETO          GREAT BRITAIN    RAF  211 SQDN   
2       12/04/1940          ETO          GREAT BRITAIN    RAF  211 SQDN   
3       12/31/1940          ETO          GREAT BRITAIN    RAF  211 SQDN   
4       01/06/1941          ETO          GREAT BRITAIN    RAF  211 SQDN   
5       02/12/1941          ETO          GREAT BRITAIN    RAF   84 SQDN   
6       02/12/1941          ETO          GREAT BRITAIN    RAF   11 SQDN   
7       03/04/1941          ETO          GREAT BRITAIN    RAF  211 SQDN   
8       03/07/1941          ETO          GREAT BRITAIN    RAF  211 SQDN   
9       03/07/1941          ETO          GREAT BRITAIN    RAF  211 SQDN   
10      03/08/1941          ETO          GREAT BRITAIN    RAF  211 SQDN   
11      11/28/1940          ETO          GREAT BRITAIN    RAF   84 SQDN   
12      12/02/1940       

In [11]:
df.head()

Unnamed: 0,MSNDATE,THEATER,COUNTRY_FLYING_MISSION,NAF,UNIT_ID,AIRCRAFT_NAME,AC_ATTACKING,TAKEOFF_BASE,TAKEOFF_COUNTRY,TAKEOFF_LATITUDE,TAKEOFF_LONGITUDE,TGT_COUNTRY,TGT_LOCATION,TGT_LATITUDE,TGT_LONGITUDE,TONS_HE,TONS_IC,TONS_FRAG,TOTAL_TONS
0,03/30/1941,ETO,GREAT BRITAIN,RAF,84 SQDN,BLENHEIM,10.0,,,,,ALBANIA,ELBASAN,41.1,20.07,0.0,0.0,0.0,0.0
1,11/24/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,DURAZZO,41.32,19.45,0.0,0.0,0.0,0.0
2,12/04/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,TEPELENE,40.3,20.02,0.0,0.0,0.0,0.0
3,12/31/1940,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,VALONA,40.47,19.49,0.0,0.0,0.0,0.0
4,01/06/1941,ETO,GREAT BRITAIN,RAF,211 SQDN,BLENHEIM,9.0,,,,,ALBANIA,VALONA,40.47,19.49,0.0,0.0,0.0,0.0


In [12]:
df.columns.tolist()

['MSNDATE',
 'THEATER',
 'COUNTRY_FLYING_MISSION',
 'NAF',
 'UNIT_ID',
 'AIRCRAFT_NAME',
 'AC_ATTACKING',
 'TAKEOFF_BASE',
 'TAKEOFF_COUNTRY',
 'TAKEOFF_LATITUDE',
 'TAKEOFF_LONGITUDE',
 'TGT_COUNTRY',
 'TGT_LOCATION',
 'TGT_LATITUDE',
 'TGT_LONGITUDE',
 'TONS_HE',
 'TONS_IC',
 'TONS_FRAG',
 'TOTAL_TONS']

## Bokeh ColumnDataSource

In [13]:
from bokeh.models import ColumnDataSource
from bokeh.models.tools import HoverTool

In [25]:
sample = df.sample(500)
source = ColumnDataSource(sample)

p = figure()
p.circle(x='AC_ATTACKING', y='TOTAL_TONS',  source=source, size=3, color='blue')
p.title.text = 'Attacking Aircraft and Munitions Dropped'
p.xaxis.axis_label = 'Number of Attacking Aircraft'
p.yaxis.axis_label = 'Tons of Munitions Dropper'

hover = HoverTool()
hover.tooltips = [
    ('Attack Date', '@MSNDATE'),
    ('Attacking Aircraft', '@AC_ATTACKING'),
    ('Tons of Munitions', '@TOTAL_TONS'),
    ('Type of Aircraft', '@AIRCRAFT_NAME')
]
p.add_tools(hover)

show(p)

## Munitions Dropped by Country

In [26]:
from bokeh.palettes import Spectral5
from bokeh.transform import factor_cmap

In [27]:
grouped = df.groupby('COUNTRY_FLYING_MISSION')['TOTAL_TONS', 'TONS_HE', 
                                               'TONS_IC', 'TONS_FRAG'].sum()

In [28]:
print(grouped)

                        TOTAL_TONS     TONS_HE     TONS_IC  TONS_FRAG
COUNTRY_FLYING_MISSION                                               
AUSTRALIA                   479.89      453.90      13.600      18.64
GREAT BRITAIN           1112598.95   868277.23  209036.158    1208.00
NEW ZEALAND                2629.06     4263.70     166.500       0.00
SOUTH AFRICA                 11.69       15.00       0.000       0.00
USA                     1625487.68  1297955.65  205288.200  127655.98


In [29]:
grouped = grouped/1000

In [32]:
source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)

color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION', 
                    palette=Spectral5, factors=countries)

p.vbar(x='COUNTRY_FLYING_MISSION', top='TOTAL_TONS', source=source, 
       width=0.70, color=color_map)

p.title.text ='Munitions Dropped by Allied Country'
p.xaxis.axis_label = 'Country'
p.yaxis.axis_label = 'Kilotons of Munitions'

hover = HoverTool()
hover.tooltips = [
    ("Totals", "@TONS_HE High Explosive / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")]

hover.mode = 'vline'
p.add_tools(hover)

show(p)

### Redo the graph
This is a terrible figure, we should do a log10 transform to get a better idea of how this works.

In [35]:
import numpy as np
grouped = df.groupby('COUNTRY_FLYING_MISSION')['TOTAL_TONS', 'TONS_HE', 
                                               'TONS_IC', 'TONS_FRAG'].sum()
log_group = np.log(1+grouped)
print(log_group)

                        TOTAL_TONS    TONS_HE    TONS_IC  TONS_FRAG
COUNTRY_FLYING_MISSION                                             
AUSTRALIA                 6.175639   6.120078   2.681022   2.977568
GREAT BRITAIN            13.922210  13.674267  12.250267   7.097549
NEW ZEALAND               7.874762   8.358127   5.120983   0.000000
SOUTH AFRICA              2.540814   2.772589   0.000000   0.000000
USA                      14.301319  14.076302  12.232175  11.757102


In [36]:
source = ColumnDataSource(log_group)
#countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)

#color_map = factor_cmap(field_name='COUNTRY_FLYING_MISSION', 
#                    palette=Spectral5, factors=countries)

p.vbar(x='COUNTRY_FLYING_MISSION', top='TOTAL_TONS', source=source, 
       width=0.70, color=color_map)

p.title.text ='Munitions Dropped by Allied Country'
p.xaxis.axis_label = 'Country'
p.yaxis.axis_label = 'log(Tons of Munitions)'

hover = HoverTool()
hover.tooltips = [
    ("Totals", "@TONS_HE High Explosive / @TONS_IC Incendiary / @TONS_FRAG Fragmentation")]

hover.mode = 'vline'
p.add_tools(hover)

show(p)

## Types of Munitions Dropped by Country

In [37]:
thor = df
filter = df['COUNTRY_FLYING_MISSION'].isin(('USA', 'GREAT BRITAIN'))
df = df[filter]

In [38]:
grouped = df.groupby('COUNTRY_FLYING_MISSION')['TOTAL_TONS', 'TONS_HE', 
                                               'TONS_IC', 'TONS_FRAG'].sum()
grouped = grouped / 1000

source = ColumnDataSource(grouped)
countries = source.data['COUNTRY_FLYING_MISSION'].tolist()
p = figure(x_range=countries)

In [39]:
from bokeh.palettes import Spectral3

p.vbar_stack(stackers=['TONS_HE', 'TONS_FRAG', 'TONS_IC'], 
             x='COUNTRY_FLYING_MISSION', source=source, 
             legend = ['High Explosive', 'Fragmentation', 'Incendiary'],
             width=0.5, color=Spectral3)
p.title.text ='Types of Munitions Dropped by Allied Country'
p.legend.location = 'top_left'

p.xaxis.axis_label = 'Country'
p.xgrid.grid_line_color = None	#remove the x grid lines

p.yaxis.axis_label = 'Kilotons of Munitions'

show(p)

## Time-Series and Annotations: Bombing Operations over Time

In [42]:
df = thor

#make sure MSNDATE is a datetime format
df['MSNDATE'] = pd.to_datetime(df['MSNDATE'], format='%m/%d/%Y')

grouped = df.groupby('MSNDATE')['TOTAL_TONS', 'TONS_IC', 'TONS_FRAG'].sum()
grouped = grouped/1000

source = ColumnDataSource(grouped)

p = figure(x_axis_type='datetime')

p.line(x='MSNDATE', y='TOTAL_TONS', line_width=2, source=source, 
       legend='All Munitions')
p.line(x='MSNDATE', y='TONS_FRAG', line_width=2, source=source, color=Spectral3[1], 
       legend='Fragmentation')
p.line(x='MSNDATE', y='TONS_IC', line_width=2, source=source, color=Spectral3[2], 
       legend='Incendiary')

p.yaxis.axis_label = 'Kilotons of Munitions Dropped'

show(p)

### Subsampling
The tutorial suggest subsampling since the daily data are so noisy. That might create trends, of course, but it throws away information. Why not take the mean or sum over week, month, or quarter instead?

In [43]:
grouped

Unnamed: 0_level_0,TOTAL_TONS,TONS_IC,TONS_FRAG
MSNDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1939-09-03,0.0000,0.000,0.000
1939-09-04,0.0000,0.000,0.000
1939-09-05,0.0000,0.000,0.000
1939-09-07,0.0000,0.000,0.000
1939-09-08,0.0000,0.000,0.000
1939-09-10,0.0000,0.000,0.000
1939-09-24,0.0000,0.000,0.000
1939-09-25,0.0000,0.000,0.000
1939-09-27,0.0000,0.000,0.000
1939-09-28,0.0000,0.000,0.000


In [50]:
monthyear = grouped.groupby([lambda x: x.year, lambda x: x.month]).sum()
monthyear.head()

Unnamed: 0,Unnamed: 1,TOTAL_TONS,TONS_IC,TONS_FRAG
1939,9,0.0,0.0,0.0
1939,10,0.0,0.0,0.0
1939,11,0.0,0.0,0.0
1939,12,0.0,0.0,0.0
1940,5,0.028,0.0,0.0


In [53]:
monthyear.columns.tolist()

['TOTAL_TONS', 'TONS_IC', 'TONS_FRAG']

In [57]:
my = grouped.groupby([grouped.index.year, grouped.index.month]).sum()
my.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,TOTAL_TONS,TONS_IC,TONS_FRAG
MSNDATE,MSNDATE,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1939,9,0.0,0.0,0.0
1939,10,0.0,0.0,0.0
1939,11,0.0,0.0,0.0
1939,12,0.0,0.0,0.0
1940,5,0.028,0.0,0.0


In [60]:
grouped['Year'] = grouped.index.year
grouped['Month'] = grouped.index.month
grouped['Day'] =  grouped.index.day

In [69]:
ymd = grouped[['Year', 'Month']].copy()
ymd['Day'] = 1
ymd.head()

Unnamed: 0_level_0,Year,Month,Day
MSNDATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1939-09-03,1939,9,1
1939-09-04,1939,9,1
1939-09-05,1939,9,1
1939-09-07,1939,9,1
1939-09-08,1939,9,1


In [None]:
#make sure MSNDATE is a datetime format
df['MSNDATE'] = pd.to_datetime(df['MSNDATE'], format='%m/%d/%Y')

In [74]:
import time
import datetime
ymd['Date'] = datetime.date(year=ymd.Year, month=ymd.Month, day=1)

TypeError: cannot convert the series to <class 'int'>

In [76]:
datetime.date(year=1939, month=9, day=1)

datetime.date(1939, 9, 1)

In [None]:
source = ColumnDataSource(monthyear)

p = figure(x_axis_type='datetime')

p.line(x='MSNDATE', y='TOTAL_TONS', line_width=2, source=source, 
       legend='All Munitions')
p.line(x='MSNDATE', y='TONS_FRAG', line_width=2, source=source, color=Spectral3[1], 
       legend='Fragmentation')
p.line(x='MSNDATE', y='TONS_IC', line_width=2, source=source, color=Spectral3[2], 
       legend='Incendiary')

p.yaxis.axis_label = 'Kilotons of Munitions Dropped'
