In [1]:
# Import the required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Import the required plotly libraries

import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [4]:
# Here we have used SIPRI (STOCKHOLM INTERNATIONAL PEACE RESEARCH INSTITUTE) and World Bank DataBank as Data Source 
# Read the military expenditure and gdp per capita data of Last Decade

military_spending_percapita = pd.read_csv("~/Python_Project/military_expenditure_percapita.csv")
gdp_percapita = pd.read_csv("~/Python_Project/gdp_percapita.csv")

In [5]:
# Inspect first few rows of the data 

military_spending_percapita.head()
gdp_percapita.head()

Unnamed: 0,Country Name,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Afghanistan,369.8357959698,373.3611163,445.8932979237,553.3002893831,603.5370230624,669.009050888,631.7449706415,612.0696513912,584.0259020558,561.7787462695
1,Albania,3603.0136853664,4370.5396465315,4114.1365449095,4094.3588319192,4437.1780684361,4247.6143082625,4414.7231395733,4575.7637866622,3954.0227832601,4146.896250462
2,Algeria,3935.1833428593,4905.2548657626,3868.8312304091,4463.3946748895,5432.2502026808,5564.8256595807,5471.8667658485,5470.851007071,4132.7602920886,3843.7523220231
3,American Samoa,8980.5364143481,9871.9971944591,12058.26382343,10352.8227618312,10375.9942154736,11660.329531052,11553.6912144936,11508.5592654725,11541.8549795632,..
4,Andorra,48510.4672826221,47712.2990809422,43215.6870129515,39627.6635163399,40919.1832785953,38167.0957462493,40215.4353174077,..,..,..


In [6]:
# Replace the non-numeric data with NaN (Not a Number) 


military_spending_percapita = military_spending_percapita.replace("..",np.nan)
military_spending_percapita = military_spending_percapita.replace("...",np.nan)
military_spending_percapita = military_spending_percapita.replace(". .",np.nan)
military_spending_percapita = military_spending_percapita.replace(". . .",np.nan)
military_spending_percapita = military_spending_percapita.replace("xxx",np.nan)

gdp_percapita = gdp_percapita.replace("..",np.nan)
gdp_percapita = gdp_percapita.replace("...",np.nan)
gdp_percapita = gdp_percapita.replace(". .",np.nan)
gdp_percapita = gdp_percapita.replace(". . .",np.nan)
gdp_percapita = gdp_percapita.replace("xxx",np.nan)


In [7]:
# Remove the rows which contains NA

military_spending_percapita = military_spending_percapita.dropna(axis=0,how='any')
gdp_percapita = gdp_percapita.dropna(axis=0,how='any')

In [8]:
# Check if all the values are non-null in the dataset

military_spending_percapita.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 132 entries, 0 to 167
Data columns (total 11 columns):
Country    132 non-null object
2007       132 non-null object
2008       132 non-null object
2009       132 non-null object
2010       132 non-null object
2011       132 non-null object
2012       132 non-null object
2013       132 non-null object
2014       132 non-null object
2015       132 non-null object
2016       132 non-null object
dtypes: object(11)
memory usage: 12.4+ KB


In [9]:
# Merge the gdp data with military spending for the matching country names

gdp_military_spending_percapita = (gdp_percapita.merge(military_spending_percapita,
                                                  left_on='Country Name', right_on='Country', how='inner'))

In [10]:
# Delete the duplicate column

del gdp_military_spending_percapita['Country Name']

In [11]:
# Set the row index as country name as it is unique identifier

gdp_military_spending_percapita = gdp_military_spending_percapita.set_index('Country')

In [12]:
# Convert the numerical data into numeric data type from object

gdp_military_spending_percapita.loc[:,'2007_x':'2016_y'] = gdp_military_spending_percapita.loc[:,'2007_x':'2016_y'].apply(pd.to_numeric)

In [13]:
# Subset the data for year 2016 gdp and military spending

gdp_military_spending_percapita_2016 = gdp_military_spending_percapita.loc[:,['2016_x','2016_y']]

In [14]:
# Rename the column names

gdp_military_spending_percapita_2016.columns = ['GDP Per Capita (US $)','Military Spending Per Capita (US $)']

In [15]:
# Sort the data on the basis of top military expenditures

gdp_military_spending_percapita_2016_sorted = (gdp_military_spending_percapita_2016.sort_values
                                                    (by='Military Spending Per Capita (US $)',ascending=False))

In [16]:
# Round the data for nearest integer

gdp_military_spending_percapita_2016_sorted = (gdp_military_spending_percapita_2016_sorted.round
                                        ({'GDP Per Capita (US $)':0,'Military Spending Per Capita (US $)':0}))

In [17]:
# Subset for top 20 military expenditure nations by per capita

gdp_military_spending_percapita_2016_top20 = gdp_military_spending_percapita_2016_sorted.iloc[0:20,]

In [18]:
# Save the results to export to plotly

gdp_military_spending_percapita_2016_top20.to_csv("gdp_military_spending_percapita_2016_top20.csv")

In [22]:
# Fetch the graph from online plotly and display
py.sign_in('iamDeepakA','priXUwppx0TKp9sswWmk')
fig = py.get_figure('https://plot.ly/~iamDeepakA/43/', raw=False)
py.iplot(fig)

In [19]:
# Prepare the data for top 20 military expenditure nation by per capita for 2015

gdp_military_spending_percapita_2015 = gdp_military_spending_percapita.loc[:,['2015_x','2015_y']]

gdp_military_spending_percapita_2015.columns = ['GDP Per Capita (US $)','Military Spending Per Capita (US $)']

gdp_military_spending_percapita_2015.head()

gdp_military_spending_percapita_2015_sorted = (gdp_military_spending_percapita_2015.sort_values
                                                    (by='Military Spending Per Capita (US $)',ascending=False))

gdp_military_spending_percapita_2015_sorted = (gdp_military_spending_percapita_2015_sorted.round
                                        ({'GDP Per Capita (US $)':0,'Military Spending Per Capita (US $)':0}))

gdp_military_spending_percapita_2015_top20 = gdp_military_spending_percapita_2015_sorted.iloc[0:20,]

gdp_military_spending_percapita_2015_top20

gdp_military_spending_percapita_2015_top20.to_csv("gdp_military_spending_percapita_2015_top20.csv")

In [23]:
fig = py.get_figure('https://plot.ly/~iamDeepakA/20/', raw=False)
py.iplot(fig)
