<a href="https://colab.research.google.com/github/adampotton/Group-3-gotta-catch-em-all-/blob/main/model_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import torch
import torch.nn as nn
from google.colab import files, drive
import pandas as pd
drive.mount('/content/drive')

Mounted at /content/drive


### Make dictionary for details about conflicts

In [52]:
df_conflicts = pd.read_csv('/content/drive/My Drive/Data_sci/icb1v15.csv')
df_actors = pd.read_csv('/content/drive/My Drive/Data_sci/icb2v15.csv')
country_codes = pd.read_csv('/content/drive/My Drive/Data_sci/country codes 2.csv')

crisis_details = {}

for index, row in df_conflicts.iterrows():
    crisis_number = row['crisno']  # Crisis number
    crisis_name = row['crisname']  # Crisis name
    start_year = row['yrtrig']  # Start year
    end_year = row['yrterm']  # End year

    crisis_details[crisis_number] = {'crisis_name': crisis_name,
                                      'start_year': start_year,
                                      'end_year': end_year,
                                      'actors': set()}

for index, row in df_actors.iterrows():
    crisis_number = row['crisno']  # Crisis number
    r = row['actor']
    idx = np.where(country_codes == row['actor'])[0] # Actor
    actor = country_codes.iloc[idx[0],0]

    if crisis_number in crisis_details:
        crisis_details[crisis_number]['actors'].add(actor)

print(crisis_details[87])

{'crisis_name': 'OCCUPATION OF IRAN', 'start_year': 1941, 'end_year': 1942.0, 'actors': {'Iran'}}


### Make matrix for years (i) and actors (j), for if a conflict has been started

In [90]:
actors = set()
for _, details in crisis_details.items():
    actors.update(details['actors'])

matrix_data = []
for _, details in crisis_details.items():
    crisis_year = details['start_year']
    crisis_actors = details['actors']
    row_data = {'Year': crisis_year}
    for actor in actors:
        row_data[actor] = 1 if actor in crisis_actors else 0
    matrix_data.append(row_data)

actor_crisis_matrix = pd.DataFrame(matrix_data)

columns = actor_crisis_matrix.columns.tolist()
columns.remove('Year')
columns = ['Year'] + columns

actor_crisis_matrix = actor_crisis_matrix[columns] # Row per conflict
combined_actor_crisis_matrix = actor_crisis_matrix.groupby('Year').sum().reset_index() # Row per year
actor_crisis_matrix = actor_crisis_matrix[actor_crisis_matrix['Year'] >= 1950]

print(actor_crisis_matrix.head())

     Year  Guatemala  Algeria  Luxembourg  Argentina  Egypt  Malawi  \
131  1950          0        0           0          0      0       0   
132  1950          0        0           0          0      0       0   
133  1951          0        0           0          0      0       0   
134  1951          0        0           0          0      0       0   
135  1951          0        0           0          0      1       0   

     South Yemen  Qatar  Germany (West Germany)  ...  Panama  Azerbaijan  \
131            0      0                       0  ...       0           0   
132            0      0                       0  ...       0           0   
133            0      0                       0  ...       0           0   
134            0      0                       0  ...       0           0   
135            0      0                       0  ...       0           0   

     Rwanda  Australia  El Salvador  Croatia  Malta  Kosovo  Zambia  Zimbabwe  
131       0          0            0 

### Prepare input data for RNN

In [157]:
df = pd.read_csv('/content/drive/My Drive/Data_sci/all_alphabetical_by_recipient.csv')

categorical_cols = ['Recipient', 'Supplier', 'Weapon designation']
encoder = OneHotEncoder(handle_unknown='ignore')
encoded_data = encoder.fit_transform(df[categorical_cols])

numeric_cols = ['Year of order', 'Number ordered', 'SIPRI TIV per unit', 'SIPRI TIV for total order', 'SIPRI TIV of delivered weapons']
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[numeric_cols])


def extract_arms_trade_data(recipient, target_year):
    data = df[(df['Recipient'] == recipient) & (df['Year of order'] >= target_year - 3) & (df['Year of order'] < target_year)]
    if data.empty:
      return None, None

    encoded_data = encoder.transform(data[categorical_cols])
    scaled_data = scaler.transform(data[numeric_cols])

    input_data = np.concatenate((encoded_data.toarray(), scaled_data), axis=1)

    year_rows = actor_crisis_matrix[actor_crisis_matrix['Year'] == target_year]
    if year_rows[recipient].sum() == 0:
        target = 0
    else:
        target = 1

    return input_data, target


extract_arms_trade_data('Afghanistan', 2007)


(array([[ 0.        ,  1.        ,  0.        , ..., -0.25502206,
         -0.27955698, -0.27606523],
        [ 0.        ,  1.        ,  0.        , ..., -0.16847345,
         -0.24720897, -0.24119009],
        [ 0.        ,  1.        ,  0.        , ..., -0.26800436,
         -0.27820288, -0.27460534],
        [ 0.        ,  1.        ,  0.        , ..., -0.27124993,
         -0.13436705, -0.11953263],
        [ 0.        ,  1.        ,  0.        , ..., -0.26944683,
         -0.2201269 , -0.2119923 ]]),
 0)

### Generate random samples of data for RNN

In [156]:
import random
def generate_random_samples(df, n):
    random_samples = []

    random_indices = random.sample(range(len(df)), n)

    for index in random_indices:
        row = df.iloc[index]
        recipient = row['Recipient']
        target_year = row['Year of order']

        input_data, target = extract_arms_trade_data(recipient, target_year)

        if input_data is not None and target is not None:

            X_rnn = torch.tensor(input_data, dtype=torch.float32)
            y_rnn = torch.tensor(target, dtype=torch.float32)

            random_samples.append((X_rnn, y_rnn))

        random_samples.append((X_rnn, y_rnn))

    return random_samples

n = 5
random_samples = generate_random_samples(df, n)


In [147]:
### Need to sort out country naming issues

### Define the RNN

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=1):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        out = self.sigmoid(out)
        return out

input_size = X.shape[2]
output_size = 1
hidden_size = 64
num_layers = 1

model = RNNModel(input_size, hidden_size, output_size, num_layers)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


### Training

In [None]:
num_epochs = 10
for epoch in range(num_epochs):

    outputs = model(X)
    loss = criterion(outputs.squeeze(), y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')