In [1]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
equipment = pd.read_csv("../input/2022-ukraine-russian-war/russia_losses_equipment.csv")
personnel = pd.read_csv("../input/2022-ukraine-russian-war/russia_losses_personnel.csv")

**We have two datasets. One dataset has information about the equipment loss while other has information on personnel loss. Lets study the equipment losses first.**

In [3]:
equipment.head()

In [4]:
equipment.columns

In [5]:
equipment.info()

In [6]:
equipment.isnull().sum()

**We are not going to completely remove the null values rather we will replace that value with zero as they are key to understanding when did Russia started using these special weapons to deal more damage to Ukraine.**

In [7]:
equipment['special equipment'] = equipment['special equipment'].fillna(0)
equipment['mobile SRBM system'] = equipment['mobile SRBM system'].fillna(0)

In [8]:
equipment.describe()

In [9]:
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
plt.figure(figsize=(10,10))
sns.heatmap(equipment.corr(),cmap="YlGnBu", annot=True)
plt.show()

****The Day column has strong positive correlation with all the other feature columns. This indicates that as the day progressed the losses of equipments only increased.****

In [11]:
weapon_columns = ['aircraft', 'helicopter', 'tank', 'APC',
       'field artillery', 'MRL', 'military auto', 'fuel tank', 'drone',
       'naval ship', 'anti-aircraft warfare', 'special equipment',
       'mobile SRBM system']

data = []
for weapon in weapon_columns:
    data.append(equipment[weapon].max())

In [12]:
import plotly.express as px
px.pie(equipment[weapon_columns],values=equipment[weapon_columns].max(),names=weapon_columns)

In [13]:
plt.figure(figsize = (15,15))
plt.plot(equipment['day'], equipment['aircraft'], color = 'red', marker = 'o',label = "Aircraft")
plt.plot(equipment['day'], equipment['helicopter'], color = 'green',marker = 'o', label = "Helicopter")
plt.plot(equipment['day'], equipment['tank'], color = 'cyan', marker = 's', label = "Tank")
plt.plot(equipment['day'], equipment['APC'], color = 'magenta', marker = 's', label = "APC")
plt.plot(equipment['day'], equipment['field artillery'], color = 'yellow',marker = 's', label = "Field Artillery")
plt.plot(equipment['day'], equipment['MRL'], color = 'tab:orange',marker = 's', label = "MRL")
plt.plot(equipment['day'], equipment['military auto'], color = 'tab:purple',marker = 's', label = "Military Auto")
plt.plot(equipment['day'], equipment['fuel tank'], color = 'tab:olive',marker = 's', label = "Fuel Tank")
plt.plot(equipment['day'], equipment['drone'], marker = 'o', color = 'tab:pink',label = "Drone")
plt.plot(equipment['day'], equipment['naval ship'], marker = 'v', color = '#00FF00',label = "Naval Ship")
plt.plot(equipment['day'], equipment['anti-aircraft warfare'], color = '#00FFFF',marker = 's', label = "Anti-Aircraft Warfare")
plt.plot(equipment['day'], equipment['special equipment'],marker = 's', label = "Special Equipment")
plt.plot(equipment['day'], equipment['mobile SRBM system'], color = '#800080',marker = 's', label = "Mobile SRBM System")
plt.ylabel("Count of losses")
plt.xlabel("No. of Days")
plt.legend()
plt.show()

**It seems like our data is in cumulative form. I am more interested in finding the number of losses on each day. So to do that we are going to use shifting method and find the actual number of losses in each day.**

In [14]:
for i in weapon_columns:
    equipment[f'actual_loss_{i}'] = equipment[i].diff()
    equipment[f'actual_loss_{i}'][0] = equipment[i][0]

In [15]:
equipment.head()

In [16]:
figure, axis = plt.subplots(4,4, figsize=(20,20))

figure.delaxes(axis[3][1])
figure.delaxes(axis[3][2])
figure.delaxes(axis[3][3])


axis[0, 0].bar(equipment['day'], equipment['actual_loss_aircraft'], color = 'red')
axis[0, 0].set_title("Distribution of Aircraft Loss")

axis[0, 1].bar(equipment['day'], equipment['actual_loss_helicopter'], color = 'green')
axis[0, 1].set_title("Distribution of Helicopter Loss")
  
axis[0, 2].bar(equipment['day'], equipment['actual_loss_tank'], color = 'cyan')
axis[0, 2].set_title("Distribution of Tank Loss")
  
axis[0, 3].bar(equipment['day'], equipment['actual_loss_APC'], color = 'magenta')
axis[0, 3].set_title("Distribution of APC Loss")

axis[1, 0].bar(equipment['day'], equipment['actual_loss_field artillery'], color = 'yellow')
axis[1, 0].set_title("Distribution of Field artillery Loss") 

axis[1, 1].bar(equipment['day'], equipment['actual_loss_MRL'], color = 'tab:orange')
axis[1, 1].set_title("Distribution of MRL Loss")

axis[1, 2].bar(equipment['day'], equipment['actual_loss_military auto'], color = 'tab:purple')
axis[1, 2].set_title("Distribution of Military Auto Loss")

axis[1, 3].bar(equipment['day'], equipment['actual_loss_fuel tank'], color = 'tab:olive')
axis[1, 3].set_title("Distribution of Fuel Tank Loss") 

axis[2, 0].bar(equipment['day'], equipment['actual_loss_drone'], color = 'tab:pink')
axis[2, 0].set_title("Distribution of Drone Loss")

axis[2, 1].bar(equipment['day'], equipment['actual_loss_naval ship'], color='#00FF00')
axis[2, 1].set_title("Distribution of Naval Ship Loss") 

axis[2, 2].bar(equipment['day'], equipment['actual_loss_anti-aircraft warfare'], color = '#00FFFF')
axis[2, 2].set_title("Distribution of Anti-Aircraft Warfare Loss")

axis[2, 3].bar(equipment['day'], equipment['actual_loss_special equipment'])
axis[2, 3].set_title("Distribution of Special Equipment Loss")

axis[3, 0].bar(equipment['day'], equipment['actual_loss_mobile SRBM system'], color = '#800080')
axis[3, 0].set_title("Distribution of SRBM Loss") 


plt.show()

Personnel Dataset

In [17]:
personnel.head()

In [18]:
personnel.shape

In [19]:
personnel.info()

In [20]:
personnel.describe()

In [21]:
plt.figure(figsize = (15,15))
plt.plot(personnel['day'], personnel['personnel'], color = 'red', marker = 'o',label = "Personnel")
plt.plot(personnel['day'], personnel['POW'], color = 'green',marker = 's', label = "POW")
plt.ylabel("Count of Human losses")
plt.xlabel("No. of Days")
plt.legend()
plt.show()

In [22]:
# Find the personnel loss on each day

personnel_columns = ['personnel','POW']
for i in personnel_columns:
    personnel[f'actual_loss_{i}'] = personnel[i].diff()
    personnel[f'actual_loss_{i}'][0] = personnel[i][0]

In [23]:
personnel.head()

In [24]:
plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
plt.bar(personnel['day'], personnel['actual_loss_personnel'])
plt.xlabel('No. of Days')
plt.ylabel('Count of Human Losses')
plt.title('Distribution of Human Personnel Losses')


plt.subplot(1, 2, 2)
plt.bar(personnel['day'], personnel['actual_loss_POW'], color = '#800080')
plt.xlabel('No. of Days')
plt.ylabel('Count of Human Losses')
plt.title('Distribution of Prisoners of War')

plt.show()