In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Scarping
import requests
from bs4 import BeautifulSoup

Sources: (May 31, 2024)
1. https://www.forbesindia.com/article/explainers/top-10-countries-with-most-powerful-military-strength/89891/1

2. https://www.globalfirepower.com/countries-listing.php

In [None]:
military_df = pd.read_html('https://www.forbesindia.com/article/explainers/top-10-countries-with-most-powerful-military-strength/89891/1')
tot_military = military_df[0]
tot_military['Rank & Nation'] = tot_military['Rank & Nation'].str.split('\s(?=\D)').str[1:].str.join(' ')

# # Add 5 more data from https://www.globalfirepower.com/countries-listing.php#google_vignette
# New rows to add
new_rows = [
    {'Rank & Nation': 'France', 'Power Index': None, 'Total Military Personnel (est)': 376000},
    {'Rank & Nation': 'Brazil', 'Power Index': None, 'Total Military Personnel (est)': 900000},
    {'Rank & Nation': 'Indonesia', 'Power Index': None, 'Total Military Personnel (est)': 1050000},
    {'Rank & Nation': 'Iran', 'Power Index': None, 'Total Military Personnel (est)': 1180000},
    {'Rank & Nation': 'Egypt', 'Power Index': None, 'Total Military Personnel (est)': 1220000}
]

# Create a DataFrame from the new rows
new_df = pd.DataFrame(new_rows)

# Concatenate the existing DataFrame with the new DataFrame
tot_military = pd.concat([tot_military, new_df], ignore_index=True)

tot_military = tot_military.rename(columns={'Rank & Nation': 'Country'})

tot_military

Unnamed: 0,Country,Power Index,Total Military Personnel (est)
0,United States,0.0699,2127500
1,Russia,0.0702,3570000
2,China,0.0706,3170000
3,India,0.1023,5137550
4,South Korea,0.1416,3820000
5,United Kingdom,0.1443,1108860
6,Japan,0.1601,328150
7,Turkiye,0.1697,883900
8,Pakistan,0.1711,1704000
9,Italy,0.1863,289000


In [None]:
# global_data = pd.read_html('https://www.globalfirepower.com/countries-listing.php#google_vignette')


url = 'https://www.globalfirepower.com/countries-listing.php#google_vignette'

header = {
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

long_names = []
short_names = []
powers = []

req = requests.get(url, headers=header)
soup = BeautifulSoup(req.text, 'html.parser')
find_name = soup.findAll('span', class_='textWhite textLarge textShadow')
for index, i in enumerate(find_name):
  if (index%2 == 0):
    try: long_name = i.text.strip()
    except: long_name = ''
    long_names.append(long_name)
    # print(index, i.text.strip())
  if (index%2 == 1):
    try:short_name = i.text.strip()
    except: short_name = ''
    short_names.append(short_name)
    # print(index, i.text.strip())

find_power = soup.findAll('span', class_='textLarge textLtGray')
for index, j in enumerate(find_power):
  try: power = j.text.strip().split(' ')[1]
  except: power = ''
  powers.append(power)
  # print(index, j.text.strip().split(' ')[1])

# Create DataFrame
data = {
    'Country': long_names,
    'Short Name': short_names,
    'Power Index (GFP)': powers
}
print(len(long_names))
print(len(short_names))
print(len(powers))
df = pd.DataFrame(data)
df.head()

145
145
145


Unnamed: 0,Country,Short Name,Power Index (GFP)
0,United States,USA,0.0699
1,Russia,RUS,0.0702
2,China,CHN,0.0706
3,India,IND,0.1023
4,South Korea,SKO,0.1416


In [None]:
merged_df = pd.merge(df, tot_military, on='Country', how='left')
merged_df = merged_df[['Country', 'Short Name', 'Power Index (GFP)', 'Total Military Personnel (est)']]

# Mengambil 15 negara terbaik
merged_df = merged_df.loc[:14]
merged_df['Total Military Personnel (est)'] = merged_df['Total Military Personnel (est)'].astype(int)
# Change "INO" to "IND" in the "Short Name" column
merged_df.loc[merged_df['Short Name'] == 'INO', 'Short Name'] = 'IDN'
merged_df

Unnamed: 0,Country,Short Name,Power Index (GFP),Total Military Personnel (est)
0,United States,USA,0.0699,2127500
1,Russia,RUS,0.0702,3570000
2,China,CHN,0.0706,3170000
3,India,IND,0.1023,5137550
4,South Korea,SKO,0.1416,3820000
5,United Kingdom,UKD,0.1443,1108860
6,Japan,JPN,0.1601,328150
7,Turkiye,TKY,0.1697,883900
8,Pakistan,PAK,0.1711,1704000
9,Italy,ITA,0.1863,289000


In [None]:
merged_df.to_csv('Military Strength Ranking 2024.csv', index=False)