# Soultion files

Here you find the soultions to the challanges. 

In [None]:
%matplotlib inline

**Import statements**

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

**Global settings**

In [None]:
pd.options.display.max_rows = 999
pd.options.display.max_columns = 100
plt.rcParams["figure.figsize"] = [15,6]

## The Python ecosystem - Plotting with Python

In [None]:
# %load ../src/_solutions/mpl_plot_customization.py
## solution matplotlib challenge 
fig, ax = plt.subplots(figsize=(10,6))
ax.plot(X, C, color="green", label="cosine")
ax.plot(X, S, linestyle="--", linewidth=3.5, label="sine")
ax.set_title("My awesome matplotlib figure", size=16)
ax.legend(fontsize=12)
ax.set_xlim([-4,4])
ax.set_xticks([-np.pi, 0, np.pi])
ax.set_yticks([-1, 0, 1])
ax.set_xticklabels(['$-\pi$', '$0$', '$+\pi$'], size=12)
ax.grid()

filepath = "../figures/my_awesome_mpl_figure.png"
fig.savefig(filepath, dpi=300)

## Exploratory Data Analysis in Action - Data preparation

In [None]:
# %load ../src/_solutions/features_above_threshold.py
## solution challenge 
def features_above_threshold(df, threshold=0.5):    
    cols2keep = df.notnull().sum()/df.shape[0] > threshold 
    rv = df.columns[cols2keep.values]
    return rv

In [None]:
# %load ../src/_solutions/infere_mission_data.py
## solution challenge 
df_clean["Mission Date"] = pd.to_datetime(df_clean["Mission Date"], infer_datetime_format=True)

In [None]:
# %load ../src/_solutions/altitude_in_meters.py
## solution challenge 
df_clean["Altitude (meters)"] = df_clean["Altitude (Hundreds of Feet)"].apply(lambda x: x*100*0.3048)

In [None]:
# %load ../src/_solutions/limit_altitude.py
## solution challenge 
# replace values with np.nan
df_clean.loc[df_clean["Altitude (meters)"] > max_height, "Altitude (meters)"] = np.nan

In [None]:
# %load ../src/_solutions/limit_coordinates.py
## solution challenge 
print(df_clean.loc[((df_clean['Target Longitude'] > 180) |
                    (df_clean['Target Longitude'] < -180) | 
                    (df_clean['Target Latitude'] > 90 )| 
                    (df_clean['Target Latitude'] < -90)), 
                   ['Target Longitude', 'Target Latitude']].count())

df_clean.loc[((df_clean['Target Longitude'] > 180) |
                    (df_clean['Target Longitude'] < -180) | 
                    (df_clean['Target Latitude'] > 90 )| 
                    (df_clean['Target Latitude'] < -90)), 
                   ['Target Longitude', 'Target Latitude']] = np.nan

## Exploratory Data Analysis in Action - EDA: Airplanes

> **Q1: Which type of airplane is mostly engaged?**

In [None]:
# %load ../src/_solutions/eda_airplanes_q1.py
# EDA: Airplanes - Q1

print("Unique airplanes:\n",df_airpl["Aircraft Series"].unique())
print("---------------------------------------")
print("Most enganged airplanes:\n",df_airpl["Aircraft Series"].value_counts())
print("---------------------------------------")
df_airpl["Aircraft Series"].value_counts().plot.bar(rot=0);

> **Q2: At what height do airplanes operate? At what height to the 10 most common airplane types operate?**

In [None]:
# %load ../src/_solutions/eda_airplanes_q2.py
# EDA: Airplanes - Q2

fig, ax = plt.subplots(3,1, figsize=(16,18))
# get operating height
print("Operating height for each aircraft:\n",df_airpl.groupby("Aircraft Series")["Altitude (meters)"].agg(["mean", "min", "max"]).dropna())
df_airpl.groupby("Aircraft Series")["Altitude (meters)"].mean().dropna().sort_values(ascending=False).plot.bar(rot=0, ax=ax[0])
plt.ylabel("Mean altitude (meters)");
print("---------------------------------------")
# compute 10 most common airplane types
list_ten_most_common = df_airpl["Aircraft Series"].value_counts()[:10].index
print("10 most common airplane types:\n", list_ten_most_common)

ten_most_common = df_airpl.loc[df_airpl["Aircraft Series"].isin(list_ten_most_common)]
print(ten_most_common.shape)
print("---------------------------------------")
sns.boxplot(x="Aircraft Series", y="Altitude (meters)", data=ten_most_common, ax=ax[1])
sns.violinplot(x="Aircraft Series", y="Altitude (meters)", hue="Country", split=True, data=ten_most_common, ax=ax[2]);

> **Q3: Which type of airplane carried the heaviest bombs? Which were the 10 most dangerous airplane types with respect to carried explosives?**

In [None]:
# %load ../src/_solutions/eda_airplanes_q3.py
# EDA: Airplanes - Q3

fig, ax = plt.subplots(2,1, figsize=(16,12))

df_airpl.columns
(df_airpl.groupby('Aircraft Series')['High Explosives Weight (Tons)'].
 max().
 dropna().
 sort_values(ascending=False).
 plot.bar(ax=ax[0]))
ax[0].set_title("Aircrafs carring the heaviest explosives weights")

# compute most devastating aircrafts
list_ten_dangerous = (df_airpl.groupby('Aircraft Series')['High Explosives Weight (Tons)'].
                      max().sort_values(ascending=False).
                      dropna()[:10].index)
ten_dangerous = df_airpl.loc[df_airpl["Aircraft Series"].isin(list_ten_dangerous)]

sns.boxplot(x="Aircraft Series", y="High Explosives Weight (Tons)", data=ten_dangerous, ax=ax[1]);
plt.tight_layout()

> **Q4: Which Allied Force uses which airplane when and where?**   

In [None]:
# %load ../src/_solutions/eda_airplanes_q4.py
# EDA: Airplanes - Q3

def plot_airplane_type_over_europe(gdf, airplane="B17", 
                                   years=[1940, 1941 ,1942, 1943, 1944, 1945], 
                                   kdp=False, aoi=europe):
    fig = plt.figure(figsize=(16,12))
    for e, y in enumerate(years):
        _gdf = gdf.loc[(gdf["year"]==y) & (gdf["Aircraft Series"]==airplane)].copy()
        _gdf.Country.replace(np.nan, "unknown", inplace=True)
        ax = fig.add_subplot(3,2,e+1)
        ax.set_aspect('equal')
        aoi.plot(ax=ax, facecolor='lightgray', edgecolor="white")
        if _gdf.shape[0] > 2:
            if kdp:
                sns.kdeplot(_gdf['Target Longitude'], _gdf['Target Latitude'], 
                            cmap="viridis", shade=True, shade_lowest=False, bw=0.25, ax=ax)   
            else:
                _gdf.plot(ax=ax, marker='o', cmap='Set1', categorical=True,
                          column='Country', legend=True, markersize=5, alpha=1)
        ax.set_title("Year: " + str(y), size=16)
    plt.tight_layout()
    plt.suptitle("Attacks of airplane {} for different years".format(airplane), size=22)
    plt.subplots_adjust(top=0.92)
    return fig, ax
    
    
# run
plot_airplane_type_over_europe(df_airpl, airplane="B17", kdp=False);

## Exploratory Data Analysis in Action - EDA: Targets

> **Q1: Which cities were the 15 most frequent targets?**

In [None]:
# %load ../src/_solutions/eda_targets_q1.py
# EDA: Target - Q1

print("Number of unique cities in the data set:\n", df_tar['Target City'].nunique())
print("---------------------------------------")
most_frequent_cities = df_tar['Target City'].value_counts().sort_values(ascending=False)[:15]
print("Most frequent cities:\n", most_frequent_cities)
print("---------------------------------------")

> **Q2: How much high explosives (in tons) went down on the 25 most frequent targets?**

In [None]:
# %load ../src/_solutions/eda_targets_q2.py
# EDA: Target - Q2

list_most_frequent_cities = most_frequent_cities.index
df_cities = df_tar.loc[df_tar["Target City"].isin(list_most_frequent_cities)]
print("Summed high explosives (in tons) per city:\n", 
      df_cities.groupby("Target City")["High Explosives Weight (Tons)"].sum().sort_values(ascending=False))
# plot
df_cities.groupby("Target City")["High Explosives Weight (Tons)"].sum().sort_values(ascending=False).plot.bar()
plt.ylabel("High Explosives in tons", size=12);

> **Q3: How did the aerial attacks change over time for the 15 most frequent targeted cities?**

In [None]:
# %load ../src/_solutions/eda_targets_q3.py
# EDA: Target - Q3

df_daily_index=pd.date_range(start=df_cities["Mission Date"].min(), end=df_cities["Mission Date"].max(), freq="d")
df_cities.set_index("Mission Date", inplace=True)

fig, ax = plt.subplots(15,1,sharey=True, figsize=(10,32)) 
for e, city in enumerate(list_most_frequent_cities):
    s = df_cities.loc[df_cities["Target City"]==city, "High Explosives Weight (Tons)"].resample("d").sum()
    s = s.reindex(df_daily_index)
    s.cumsum().plot(ax=ax[e])
    ax[e].set_title(city.capitalize())
plt.tight_layout()
plt.suptitle("Accumulated high explosives weight (in Tons) due to arial attacks\nfor the 15 most frequent targeted cities in Germany", size=18)
plt.subplots_adjust(top=0.95)