In [3]:
# Import the required libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [4]:
# Import the required plotly libraries

import plotly.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

In [5]:
# Here we have used SIPRI (STOCKHOLM INTERNATIONAL PEACE RESEARCH INSTITUTE) and World Bank DataBank as Data Source 
# Read the military expenditure and gdp data of Last Decade

military_spending = pd.read_csv("~/Python_Project/military_expenditure_in_usd.csv")
gdp = pd.read_csv("~/Python_Project/gdp_data_in_usd.csv")

In [6]:
# Inspect first few rows of the data 

military_spending.head()
gdp.head()

Unnamed: 0,Country Name,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016
0,Afghanistan,9843842000.0,10190530000.0,12486940000.0,15936800000.0,17930240000.0,20536540000.0,20046330000.0,20050190000.0,19702990000.0,19469020000.0
1,Albania,10701010000.0,12881350000.0,12044210000.0,11926950000.0,12890870000.0,12319780000.0,12781030000.0,13219860000.0,11390370000.0,11926890000.0
2,Algeria,134977100000.0,171000700000.0,137211000000.0,161207300000.0,200013100000.0,209047400000.0,209783500000.0,213983100000.0,164779500000.0,156079600000.0
3,American Samoa,520000000.0,563000000.0,678000000.0,576000000.0,574000000.0,644000000.0,639000000.0,638000000.0,641000000.0,
4,Andorra,4010991000.0,4001201000.0,3650083000.0,3346517000.0,3427023000.0,3146152000.0,3248925000.0,,,


In [7]:
# Replace the non-numeric data with NaN (Not a Number) 


military_spending = military_spending.replace("..",np.nan)
military_spending = military_spending.replace("...",np.nan)
military_spending = military_spending.replace(". .",np.nan)
military_spending = military_spending.replace(". . .",np.nan)
military_spending = military_spending.replace("xxx",np.nan)

gdp = gdp.replace("..",np.nan)
gdp = gdp.replace("...",np.nan)
gdp = gdp.replace(". .",np.nan)
gdp = gdp.replace(". . .",np.nan)
gdp = gdp.replace("xxx",np.nan)


In [8]:
# Remove the rows which contains NA

military_spending = military_spending.dropna(axis=0,how='any')
gdp = gdp.dropna(axis=0,how='any')

In [9]:
# Check if all the values are non-null in the dataset

military_spending.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 132 entries, 0 to 169
Data columns (total 13 columns):
Country      132 non-null object
2007         132 non-null object
2008         132 non-null object
2009         132 non-null object
2010         132 non-null object
2011         132 non-null object
2012         132 non-null object
2013         132 non-null object
2014         132 non-null object
2015         132 non-null object
2016         132 non-null object
Region       132 non-null object
Continent    132 non-null object
dtypes: object(13)
memory usage: 14.4+ KB


In [10]:
# Merge the gdp data with military spending for the matching country names

gdp_military_spending = (gdp.merge(military_spending,left_on='Country Name', right_on='Country', how='inner'))

In [11]:
gdp_military_spending.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 123 entries, 0 to 122
Data columns (total 24 columns):
Country Name    123 non-null object
2007_x          123 non-null float64
2008_x          123 non-null float64
2009_x          123 non-null float64
2010_x          123 non-null float64
2011_x          123 non-null float64
2012_x          123 non-null float64
2013_x          123 non-null float64
2014_x          123 non-null float64
2015_x          123 non-null float64
2016_x          123 non-null float64
Country         123 non-null object
2007_y          123 non-null object
2008_y          123 non-null object
2009_y          123 non-null object
2010_y          123 non-null object
2011_y          123 non-null object
2012_y          123 non-null object
2013_y          123 non-null object
2014_y          123 non-null object
2015_y          123 non-null object
2016_y          123 non-null object
Region          123 non-null object
Continent       123 non-null object
dtypes: float64(10), 

In [12]:
# Delete the duplicate column

del gdp_military_spending['Country Name']

In [13]:
# Set the row index as country name as it is unique identifier

gdp_military_spending = gdp_military_spending.set_index('Country')

In [14]:
# Convert the numerical data into numeric data type from object

gdp_military_spending.loc[:,'2007_x':'2016_y'] = gdp_military_spending.loc[:,'2007_x':'2016_y'].apply(pd.to_numeric)

In [15]:
# Subset the data for year 2016 gdp and military spending

gdp_military_spending_2016 = gdp_military_spending.loc[:,['2016_x','2016_y']]

In [16]:
# Rename the column names

gdp_military_spending_2016.columns = ['GDP (US $)','Military Spending (US $)']

In [17]:
# Sort the data on the basis of top military expenditures

gdp_military_spending_2016_sorted = (gdp_military_spending_2016.sort_values
                                                    (by='Military Spending (US $)',ascending=False))

In [18]:
# Round the data for nearest integer

gdp_military_spending_2016_sorted = (gdp_military_spending_2016_sorted.round
                                        ({'GDP Per Capita (US $)':0,'Military Spending Per Capita (US $)':0}))

In [19]:
# Subset for top 20 military expenditure nations

gdp_military_spending_2016_top20 = gdp_military_spending_2016_sorted.iloc[0:20,]

In [20]:
# Save the results to export to plotly

gdp_military_spending_2016_top20.to_csv("gdp_military_spending_2016_top20.csv")

In [22]:
# Fetch the graph from online plotly and display
py.sign_in('iamDeepakA','priXUwppx0TKp9sswWmk')
fig = py.get_figure('https://plot.ly/~iamDeepakA/28/', raw=False)
py.iplot(fig)

In [24]:
# Prepare the data for top 20 military expenditure nation for 2015

gdp_military_spending_2015 = gdp_military_spending.loc[:,['2015_x','2015_y']]

gdp_military_spending_2015.columns = ['GDP (US $)','Military Spending (US $)']

gdp_military_spending_2015.head()

gdp_military_spending_2015_sorted = (gdp_military_spending_2015.sort_values
                                                    (by='Military Spending (US $)',ascending=False))

gdp_military_spending_2015_sorted = (gdp_military_spending_2015_sorted.round
                                        ({'GDP Per Capita (US $)':0,'Military Spending (US $)':0}))

gdp_military_spending_2015_top20 = gdp_military_spending_2015_sorted.iloc[0:20,]

gdp_military_spending_2015_top20

gdp_military_spending_2015_top20.to_csv("gdp_military_spending_2015_top20.csv")

In [23]:
# Fetch the graph from online plotly and display
py.sign_in('iamDeepakA','priXUwppx0TKp9sswWmk')
fig = py.get_figure('https://plot.ly/~iamDeepakA/32/', raw=False)
py.iplot(fig)
