### Importing important libraries

In [129]:
import numpy as np
from collections import defaultdict
from scipy.stats import beta
import matplotlib.pyplot as plt
import seaborn as sns

###FEATURE ENGINEERING
### All the metrics/parameters to predict the Betrayal Likelihood of the troop of soldiers, assigning different weights to each and hence doing further analysis.

In [112]:
class Metrics():

    def __init__(self, row):
        self.personal_metric = 0
        self.service_metric = 0
        self.behavioral_metric = 0
        self.performance_metric = 0
        self.demographic_metric = 0
        self.external_factor_bias = 0
        self.betrayal_likelihood = 0
        self.row = row

    def personal(self):
        familial_history = self.row['familial_history']
        dependents = self.row['dependents']
        kin = self.row['kin']
        pay_gap = self.row['pay_gap']

        self.personal_metric = pay_gap * 0.35 + familial_history * 0.25 + dependents * 0.2 + kin * 0.2
        return self.personal_metric

    def service(self):
        risk_last_op = self.row['risk_last_op']
        active_duty_record = self.row['active_duty_record']
        served_time = self.row['served_time']
        off_time = self.row['off_time']
        current_posting = self.row['current_posting']
        posting_period = self.row['posting_period']
        failure = self.row['failure']

        self.service_metric = (failure * 0.2 + posting_period * 0.2 + current_posting * 0.2 +
                               off_time * 0.1 + served_time * (-0.15) +
                               active_duty_record * 0.3 + risk_last_op * 0.2)
        return self.service_metric

    def behavioral(self):
        corruption = self.row['corruption']
        physical_health = self.row['physical_health']
        injury_type = self.row['injury_type']
        mental_health = self.row['mental_health']
        campaign_count = self.row['campaign_count']
        campaign_cause = self.row['campaign_cause']
        peer_based = self.row['peer_based']

        self.behavioral_metric = (0.2 * corruption + (-0.15) * physical_health + 0.2 * injury_type +
                                  0.2 * mental_health + 0.1 * campaign_count +
                                  0.2 * campaign_cause + 0.25 * peer_based)
        return self.behavioral_metric

    def performance(self):
        grievance = self.row['grievance']
        n_reports = self.row['n_reports']
        complaint_c = self.row['complaint_c']
        absents = self.row['absents']
        task_sf = self.row['task_sf']

        self.performance_metric = (-0.15 * grievance + 0.2 * n_reports + 0.35 * complaint_c +
                                   0.25 * absents + 0.35 * task_sf)
        return self.performance_metric

    def demographic(self):
        genb_location = self.row['genb_location']
        genc_location = self.row['genc_location']
        age = self.row['age']
        edu_level = self.row['edu_level']
        previously_c = self.row['previously_c']

        self.demographic_metric = (-0.75 * genb_location + 0.5 * genc_location + (-0.1) * age +
                                   (-0.15) * edu_level + 1.5 * previously_c)
        return self.demographic_metric

    def external(self):
        attack = self.row['attack']
        riots = self.row['riots']
        emergency = self.row['emergency']
        x_incentives = self.row['x_incentives']
        p_incentives = self.row['p_incentives']
        risk = self.row['risk']
        cultural_change = self.row['cultural_change']
        discipline = self.row['discipline']

        self.external_factor_bias = (attack * 0.2 + riots * 0.25 + emergency * 0.2 - x_incentives * 0.2 +
                                     p_incentives * 0.3 - risk * 0.3 + discipline * 0.2 +
                                     cultural_change * 0.35)
        return self.external_factor_bias


### Calculating Risk-Scores for likelhood

In [109]:
def calculate_risk_score(personal_metric, service_metric, behavioral_metric, performance_metric, demographic_metric, external_factor_bias):
  risk_score = 0
  risk_score = service_metric * 0.3 + behavioral_metric * 0.25 + performance_metric * 0.2 + personal_metric * 0.15 + demographic_metric * 0.1 + external_factor_bias
  betrayal_likelihood = 1/(1+np.exp(-risk_score))

  return betrayal_likelihood

### Initialising a dataframe and hence making if flexible to add more entries for new soldiers.

In [110]:
#dataframe of soldier's data
import pandas as pd

def create_betrayal_metrics_df():
    columns = [
        'soldier_name','familial_history','dependents', 'kin', 'pay_gap', 'risk_last_op', 'active_duty_record', 'served_time', 'off_time', 'current_posting', 'posting_period', 'failure','corruption', 'physical_health', 'injury_type', 'mental_health', 'campaign_count', 'campaign_cause', 'peer_based', 'grievance', 'n_reports', 'complaint_c', 'absents', 'task_sf', 'genb_location', 'genc_location', 'age', 'edu_level', 'previously_c', 'attack','riots','emergency','x_incentives','p_incentives','risk','cultural_change','discipline'
    ]

    df = pd.DataFrame(columns=columns)
    return df

def add_new_entry(df, entry_data):
  df.loc[len(df)] = entry_data
  return df

betrayal_df = create_betrayal_metrics_df()

new_soldier_metrics = {
    'soldier_name': 'abhivansh',
    'familial_history': 1,
    'dependents': 2,
    'kin': 3,
    'pay_gap': 4,
    'risk_last_op': 0.5,
    'active_duty_record': 0.6,
    'served_time': 7,
    'off_time': 8,
    'current_posting': 9,
    'posting_period': 10,
    'failure': 0.7,
    'corruption': 7000,
    'physical_health': 10,
    'injury_type': 2,
    'mental_health': 12,
    'campaign_count': 13,
    'campaign_cause': 1,
    'peer_based': 14,
    'grievance': 15,
    'n_reports': 16,
    'complaint_c': 1,
    'absents': 17,
    'task_sf': 1,
    'genb_location': 18,
    'genc_location': 19,
    'age': 20,
    'edu_level': 3,
    'previously_c': 21,
    'attack': 0.8,
    'riots': 3,
    'emergency': 0,
    'x_incentives': 5000,
    'p_incentives': 5000,
    'risk': 0,
    'cultural_change': 1,
    'discipline': 0.5
}

betrayal_df = add_new_entry(betrayal_df, new_soldier_metrics)


In [111]:
#sample dataframe
betrayal_df

Unnamed: 0,soldier_name,familial_history,dependents,kin,pay_gap,risk_last_op,active_duty_record,served_time,off_time,current_posting,...,edu_level,previously_c,attack,riots,emergency,x_incentives,p_incentives,risk,cultural_change,discipline
0,abhivansh,1,2,3,4,0.5,0.6,7,8,9,...,3,21,0.8,3,0,5000,5000,0,1,0.5


### Function to calculate scores for all entries of a particular Dataframe, and shortlisting Potential Betrayers on the basis of a set threshold.

In [138]:
def row_calculation(dataframe):
  all_betrayal_scores = []
  potential_betrayers= []
  THRESHOLD_BE=0.85
  for name in dataframe['soldier_name']:

    soldier_row = dataframe[dataframe['soldier_name'] == name].iloc[0]
    metrics = Metrics(soldier_row)
    personal_metric = sigmoid(metrics.personal())
    service_metric = sigmoid(metrics.service())
    behavioral_metric = sigmoid(metrics.behavioral())
    performance_metric = sigmoid(metrics.performance())
    demographic_metric = sigmoid(metrics.demographic())
    external_factor_bias = sigmoid(metrics.external())

    betrayal_likelihood = calculate_risk_score(personal_metric, service_metric, behavioral_metric, performance_metric, demographic_metric, external_factor_bias)

    if betrayal_likelihood > THRESHOLD_BE:
      potential_betrayers.append(name)
    all_betrayal_scores.append(betrayal_likelihood)
  return potential_betrayers

In [114]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

### Function to add new data and calculate it's risk-scores.

In [139]:
def new_entry_with_calc(df, entry_data):
  df.loc[len(df)] = entry_data
  row_calculation(df)
  pass

### Data Analysis of Individual Soldier

In [140]:
def data_analysis_soldier(dataframe,name):

  soldier_row = dataframe[dataframe['soldier_name'] == name].iloc[0]

  metrics = Metrics(soldier_row)
  personal_metric = sigmoid(metrics.personal())
  service_metric = sigmoid(metrics.service())
  behavioral_metric = sigmoid(metrics.behavioral())
  performance_metric = sigmoid(metrics.performance())
  demographic_metric = sigmoid(metrics.demographic())
  external_factor_bias = sigmoid(metrics.external())

  arr = [personal_metric, service_metric, behavioral_metric, performance_metric, demographic_metric, external_factor_bias]

  all_metrics={'\tPersonal records':personal_metric, '\tService records':service_metric, '\tBehaviour':behavioral_metric, '\tPerformance':performance_metric,'\tDemographic': demographic_metric, '\tExternal Factors':external_factor_bias}

  betrayal_likelihood = calculate_risk_score(personal_metric, service_metric, behavioral_metric, performance_metric, demographic_metric, external_factor_bias)
  print("Risk Assessment of",soldier_row['soldier_name'],":")
  print('Likelihood of betrayal based on:')
  for key,value in all_metrics.items():
    print(key,":",value)
  print("Betrayal Likelihood:",betrayal_likelihood)
  print("\nRecent behaviours of Concern:")

  np_arr= np.array(arr)
  max_val= np.max(np_arr)
  for key,value in all_metrics.items():
    if value == max_val:
      print(key,'is/are likely to cause betrayal. Please consider helping.')


In [136]:
data_analysis_soldier(betrayal_df,'abhivansh')

Risk Assessment of abhivansh :
Likelihood of betrayal based on:
	Personal records : 0.9340109905087812
	Service records : 0.9814761750367006
	Behaviour : 1.0
	Performance : 0.997268039236989
	Demographic : 0.9999999999867895
	External Factors : 1.0
Betrayal Likelihood: 0.8791067179672545

Recent behaviours of Concern:
	Behaviour is/are likely to cause betrayal. Please consider helping.
	External Factors is/are likely to cause betrayal. Please consider helping.


### Synthetic Data Initialisation & Calculations

In [141]:
syn_data = pd.read_csv("/content/soldier_betrayal_metrics.csv")

In [142]:
row_calculation(syn_data)

  return 1/(1+np.exp(-x))


['soldier_0',
 'soldier_4',
 'soldier_16',
 'soldier_20',
 'soldier_24',
 'soldier_31',
 'soldier_36',
 'soldier_37',
 'soldier_41',
 'soldier_46',
 'soldier_50',
 'soldier_70',
 'soldier_71',
 'soldier_76',
 'soldier_77',
 'soldier_78',
 'soldier_81',
 'soldier_86',
 'soldier_93',
 'soldier_95',
 'soldier_96',
 'soldier_97',
 'soldier_98',
 'soldier_102',
 'soldier_105',
 'soldier_106',
 'soldier_117',
 'soldier_120',
 'soldier_134',
 'soldier_137',
 'soldier_139',
 'soldier_140',
 'soldier_147',
 'soldier_149',
 'soldier_155',
 'soldier_157',
 'soldier_159',
 'soldier_162',
 'soldier_174',
 'soldier_180',
 'soldier_181',
 'soldier_188',
 'soldier_189',
 'soldier_195',
 'soldier_196',
 'soldier_199',
 'soldier_211',
 'soldier_217',
 'soldier_219',
 'soldier_220',
 'soldier_225',
 'soldier_228',
 'soldier_229',
 'soldier_234',
 'soldier_235',
 'soldier_236',
 'soldier_250',
 'soldier_251',
 'soldier_253',
 'soldier_255',
 'soldier_264',
 'soldier_266',
 'soldier_268',
 'soldier_269',
 

### Two contrasting instances for Potential Betrayals

In [126]:
data_analysis_soldier(syn_data, 'soldier_429')

Risk Assessment of soldier_429 :
Likelihood of betrayal based on:
	Personal records : 1.0
	Service records : 1.0
	Behaviour : 1.0
	Performance : 0.9999999999963998
	Demographic : 3.784906243060045e-12
	External Factors : 1.0
Betrayal Likelihood: 0.8698915256369635

Recent behaviours of Concern:
	Personal records is/are likely to cause betrayal. Please consider helping.
	Service records is/are likely to cause betrayal. Please consider helping.
	Behaviour is/are likely to cause betrayal. Please consider helping.
	External Factors is/are likely to cause betrayal. Please consider helping.


In [128]:
data_analysis_soldier(syn_data, 'soldier_1')

Risk Assessment of soldier_1 :
Likelihood of betrayal based on:
	Personal records : 1.0
	Service records : 1.0
	Behaviour : 1.0
	Performance : 0.9999982396567868
	Demographic : 1.0
	External Factors : 3.7690763969586906e-273
Betrayal Likelihood: 0.7310585094091028

Recent behaviours of Concern:
	Personal records is/are likely to cause betrayal. Please consider helping.
	Service records is/are likely to cause betrayal. Please consider helping.
	Behaviour is/are likely to cause betrayal. Please consider helping.
	Demographic is/are likely to cause betrayal. Please consider helping.


### Ranking

For ranking of soldiers for their betrayal likelihood, given we have already listed out the potential betrayers on the basis of their betrayal scores by different metric weights, we just need to sort their scores, via $sort()$ function in Python. This will give us the sorted list of Soldiers, hence providing us with complete overview of the condition.