In [1]:
import pandas as pd
from bokeh.plotting import figure, output_file, show
from bokeh.models import ColumnDataSource, Range1d
from bokeh.layouts import layout
from bokeh.palettes import Spectral3
from bokeh.tile_providers import CARTODBPOSITRON, get_provider
from pyproj import Proj, transform

In [2]:
from bokeh.io import push_notebook, output_notebook

output_notebook()

In [3]:
df = pd.read_csv("operations.csv", low_memory=False)

In [4]:
df

Unnamed: 0,Mission ID,Mission Date,Theater of Operations,Country,Air Force,Unit ID,Aircraft Series,Callsign,Mission Type,Takeoff Base,...,Incendiary Devices Weight (Tons),Fragmentation Devices,Fragmentation Devices Type,Fragmentation Devices Weight (Pounds),Fragmentation Devices Weight (Tons),Total Weight (Pounds),Total Weight (Tons),Time Over Target,Bomb Damage Assessment,Source ID
0,1,8/15/1943,MTO,USA,12 AF,27 FBG/86 FBG,A36,,,PONTE OLIVO AIRFIELD,...,,,,,,,10.0,,,
1,2,8/15/1943,PTO,USA,5 AF,400 BS,B24,,1,,...,,,,,,,20.0,,,9366.0
2,3,8/15/1943,MTO,USA,12 AF,27 FBG/86 FBG,A36,,,PONTE OLIVO AIRFIELD,...,,,,,,,9.0,,,
3,4,8/15/1943,MTO,USA,12 AF,27 FBG/86 FBG,A36,,,PONTE OLIVO AIRFIELD,...,,,,,,,7.5,,,
4,5,8/15/1943,PTO,USA,5 AF,321 BS,B24,,1,,...,,,,,,,8.0,,,22585.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178276,178738,2/16/1945,CBI,USA,10 AF,89 FS,P47,,10,,...,,,,,,,1.0,,,35815.0
178277,178739,2/16/1945,CBI,USA,10 AF,90 FS,P47,,10,,...,,,,,,,1.0,,,35020.0
178278,178740,2/16/1945,CBI,USA,10 AF,90 FS,P47,,10,,...,,,,,,,1.0,,,35022.0
178279,178741,2/16/1945,PTO,NEW ZEALAND,RNZAF,NO. 22 SQUADRON RNZAF,F4U,,,,...,,,,,,8000.0,4.0,,,


In [5]:
df.columns

Index(['Mission ID', 'Mission Date', 'Theater of Operations', 'Country',
       'Air Force', 'Unit ID', 'Aircraft Series', 'Callsign', 'Mission Type',
       'Takeoff Base', 'Takeoff Location', 'Takeoff Latitude',
       'Takeoff Longitude', 'Target ID', 'Target Country', 'Target City',
       'Target Type', 'Target Industry', 'Target Priority', 'Target Latitude',
       'Target Longitude', 'Altitude (Hundreds of Feet)', 'Airborne Aircraft',
       'Attacking Aircraft', 'Bombing Aircraft', 'Aircraft Returned',
       'Aircraft Failed', 'Aircraft Damaged', 'Aircraft Lost',
       'High Explosives', 'High Explosives Type',
       'High Explosives Weight (Pounds)', 'High Explosives Weight (Tons)',
       'Incendiary Devices', 'Incendiary Devices Type',
       'Incendiary Devices Weight (Pounds)',
       'Incendiary Devices Weight (Tons)', 'Fragmentation Devices',
       'Fragmentation Devices Type', 'Fragmentation Devices Weight (Pounds)',
       'Fragmentation Devices Weight (Tons)', 'To

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178281 entries, 0 to 178280
Data columns (total 46 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   Mission ID                             178281 non-null  int64  
 1   Mission Date                           178281 non-null  object 
 2   Theater of Operations                  175123 non-null  object 
 3   Country                                126494 non-null  object 
 4   Air Force                              126444 non-null  object 
 5   Unit ID                                50913 non-null   object 
 6   Aircraft Series                        178165 non-null  object 
 7   Callsign                               21 non-null      object 
 8   Mission Type                           47186 non-null   object 
 9   Takeoff Base                           3383 non-null    object 
 10  Takeoff Location                       3338 non-null    

In [22]:
df = df.drop(['Unit ID','Aircraft Series', 'Callsign', 'Mission Type',
       'Takeoff Base', 'Takeoff Location', 'Takeoff Latitude',
       'Takeoff Longitude', 'Target ID', 'Target Country', 'Target City',
       'Target Type', 'Target Industry', 'Target Priority','Altitude (Hundreds of Feet)', 'Airborne Aircraft',
       'Attacking Aircraft', 'Bombing Aircraft', 'Aircraft Returned',
       'Aircraft Failed', 'Aircraft Damaged', 'Aircraft Lost',
       'High Explosives', 'High Explosives Type',
       'High Explosives Weight (Pounds)',
       'Incendiary Devices', 'Incendiary Devices Type',
       'Incendiary Devices Weight (Pounds)',
       'Incendiary Devices Weight (Tons)', 'Fragmentation Devices',
       'Fragmentation Devices Type', 'Fragmentation Devices Weight (Pounds)',
       'Fragmentation Devices Weight (Tons)', 'Total Weight (Pounds)',
       'High Explosives Weight (Tons)', 'Time Over Target', 'Bomb Damage Assessment',
       'Source ID'], axis=1)

In [23]:
df = df[df['Country'].isnull()==False]

In [24]:
df = df.fillna(0)

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 126494 entries, 0 to 178280
Data columns (total 8 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Mission ID             126494 non-null  int64  
 1   Mission Date           126494 non-null  object 
 2   Theater of Operations  126494 non-null  object 
 3   Country                126494 non-null  object 
 4   Air Force              126494 non-null  object 
 5   Target Latitude        126494 non-null  float64
 6   Target Longitude       126494 non-null  float64
 7   Total Weight (Tons)    126494 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 8.7+ MB


In [41]:
grouped = df['Country'].value_counts()

In [51]:
new_df = pd.DataFrame(list(grouped.items()), columns=['Country', 'Counts'])

In [52]:
new_df

Unnamed: 0,Country,Counts
0,USA,94165
1,GREAT BRITAIN,31361
2,NEW ZEALAND,633
3,AUSTRALIA,316
4,SOUTH AFRICA,19


In [53]:
source = ColumnDataSource(data=new_df)

In [79]:
p = figure(x_range=new_df['Country'], title="Number of Bombings per Country")

In [80]:
p.vbar(x=new_df['Country'], top=new_df['Counts'], legend_label="Rate", width=0.5, bottom=0, color="red")

In [81]:
show(p)

In [None]:
df

In [74]:
grouped

USA              94165
GREAT BRITAIN    31361
NEW ZEALAND        633
AUSTRALIA          316
SOUTH AFRICA        19
Name: Country, dtype: int64

In [None]:
grouped.info()

In [73]:
filter = grouped['Total Weight (Tons)']!=0
grouped = grouped[filter]

KeyError: 'Total Weight (Tons)'

In [None]:
source = ColumnDataSource(grouped)

In [None]:
source

In [76]:
left = -2150000
right = 18000000
bottom = -5300000
top = 11000000

In [77]:
tile_provider = get_provider(CARTODBPOSITRON)
p = figure(x_range=(left, right), y_range=(bottom, top),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(tile_provider)

In [78]:
show(p)

In [None]:
def wgs84_to_web_mercator(grouped, lon="LON", lat="LAT"):
    k = 6378137
    df["x"] = df[lon] * (k * np.pi/180.0)
    df["y"] = np.log(np.tan((90 + df[lat]) * np.pi/360.0)) * k

    return df

In [None]:
p.circle(x='Target Longitude', y='Target Latitude', source=source, line_color='grey', fill_color='yellow')

p.axis.visible = False

show(p)