# [NTDS'18] Team project
[ntds'18]: https://github.com/mdeff/ntds_2018

## Students

* Team: `<34>`
* Students: `<Valentin Morel, Xavier Sieber, Cédric Schumacher, Charles-Théophile Coen>`
* Dataset: `<Terrorist attack>`

In [2]:
%matplotlib inline

import numpy as np
import networkx as nx
import pandas as pd
from scipy import sparse
import scipy.sparse.linalg
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.animation import FuncAnimation
from pyunlocbox import functions, solvers
from scipy.optimize import minimize as opt
from scipy.spatial.distance import pdist, squareform
from sklearn.cluster import KMeans, SpectralClustering, Birch
from sklearn.mixture import GaussianMixture
import hdbscan
from sklearn.datasets import make_blobs
from sklearn import metrics

## Part 0.1
This part consists of creating or loading the different objects that we need for further tasks.

In [5]:
# Features
features = pd.read_csv('TerrorAttack/terrorist_attack.nodes',delim_whitespace=' ',header = None,engine='python')
features = features.drop( columns=0)
features = features.drop(columns=107)

# Labels
labels = pd.read_csv('TerrorAttack/terrorist_attack.nodes',delim_whitespace=' ',header = None,engine='python')[[107]]
labels[107] = labels[107].map(lambda x: x.lstrip('http://counterterror.mindswap.org2005/ict_events.owl#'))

# Locations
adjacency_location = np.load('adjacency_location.npy')
adjacency_location[881][967] = 1
adjacency_location[967][881] = 1
adjacency_location[582][587] = 1
adjacency_location[587][582] = 1
n_nodes = len(adjacency_location)

location = np.zeros(n_nodes)
number_loc = 0
location[0] = number_loc
index = []
for i in range(0,n_nodes):
    #if i != 0:
    if not location[i] in location[0:i]:
        number_loc += 1
        location[i] = number_loc
    else:
        index.append(i)

    if not location[i] in location[0:i]:
        for j in range(i,n_nodes):
            if adjacency_location[i,j] == 1:
                location[j] = location[i]

# Distance matrix – 106 features
X = features
distances = pdist(X.values, metric='cosine')
kernel_width = distances.mean()
weights_106 = np.exp(-distances**2 / kernel_width**2)
adjacency_features = squareform(weights_106) 

## Part 0.2
This part is dedicated to the different functions used troughout this whole project

In [9]:
def distance_weighted(i,j):
    return adjacency_weights[i,j]

def clustering_location (adj,Ltype,k,typ,gt,plot):
    
    # Creation of the Laplacian 
    adjacency = adj.A
    degree=adjacency.sum(axis=0)
    adjacency = sparse.csr_matrix(adjacency)
    
    D = sparse.diags(degree)
    D_rw=sparse.diags(1/degree)
    D_n = sparse.diags(1/np.sqrt(degree))
    L = D - adjacency
    L_rw = D_rw.dot(L)
    L_n = D_n.dot(L.dot(D_n))
    
    if Ltype=='std':
        Lap=L
    if Ltype=='rw':
        Lap=L_rw
    if Ltype=='norm':
        Lap=L_n
    
    # Spectral decomposition
    e, U = np.linalg.eigh(Lap.A)
    
    # Clustering
    if typ == 'sc':
        my_clust = SpectralClustering(k, affinity='precomputed')
    if typ == 'bi':
        my_clust = Birch(threshold= 0.8, n_clusters = k)
    if typ == 'hd':
        my_clust = hdbscan.HDBSCAN(min_cluster_size=k)
    if typ == 'km':
        my_clust = KMeans(k)
    
    my_clust.fit_predict(adjacency.A)
    
    # Plotting of the data with ground truth and the clustering attribution
    if plot:
        fig = plt.figure(figsize=(12,12))
        ax = Axes3D(fig)
        ax.scatter(U[:,1],U[:,2],U[:,3],c=gt,cmap='rainbow')
        
        fig2 = plt.figure(figsize=(12,12))
        ax2 = Axes3D(fig2)
        ax2.scatter(U[:,1],U[:,2],U[:,3],c=my_clust.fit_predict(adjacency.A),cmap='rainbow')
        
    
    return my_clust

def clustering_label(feature,kernel,gt,Ltype,k,typ,plot):
    
    # Creating adjacency
    distances = pdist(feature, metric=kernel)
    kernel_width = distances.mean()
    weights = np.exp(-distances**2 / kernel_width**2)
    adjacency = squareform(weights)
    
    # Creation of the Laplacian 
    degree=adjacency.sum(axis=0)
    adjacency = sparse.csr_matrix(adjacency)
    
    D = sparse.diags(degree)
    D_rw=sparse.diags(1/degree)
    D_n = sparse.diags(1/np.sqrt(degree))
    L = D - adjacency
    L_rw = D_rw.dot(L)
    L_n = D_n.dot(L.dot(D_n))
    
    if Ltype=='std':
        Lap=L
    if Ltype=='rw':
        Lap=L_rw
    if Ltype=='norm':
        Lap=L_n
    
    # Spectral decomposition
    e, U = np.linalg.eigh(Lap.A)
    
    # Clustering
    if typ == 'sc':
        my_clust = SpectralClustering(k, affinity='precomputed')
    if typ == 'bi':
        my_clust = Birch(threshold= 0.8, n_clusters = k)
    if typ == 'hd':
        my_clust = hdbscan.HDBSCAN(min_cluster_size=k)
    if typ == 'km':
        my_clust = KMeans(k)

    my_clust.fit_predict(adjacency.A)
    
    if plot:
        fig = plt.figure(figsize=(12,12))
        ax = Axes3D(fig)
        ax.scatter(U[:,1],U[:,2],U[:,3],c=gt,cmap='rainbow')
        
        fig2 = plt.figure(figsize=(12,12))
        ax2 = Axes3D(fig2)
        ax2.scatter(U[:,1],U[:,2],U[:,3],c=my_clust.fit_predict(adjacency.A),cmap='rainbow')
    
    return my_clust

## Part 1.0
In this project, each attack has 106 features and two types of labels: a location, and a type of attack. This part consists of doing basic analysis of these two labels.