In [1]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.image as mpimg
import matplotlib.animation as animation

from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

import seaborn as sns
import time

import figures



# Trade Matrix Calculated From UN COMTRADE Dataset

The matrices calculated here are created from the publicly available data from UN Comtrade dataset (2001-2009) and Feenstra Dataset (1962-2000). I have collected data on the types of products that are traded between a sample of 70 countries between years 1962 and 2009.

These datasets use Standardized International Trade Codes (SITC), which are used for classify types of goods that are globally traded. SITC codes consist of 4 Digits. Each digit determines a more specific category from left to right. For example: 
* 1 - Beverages and Tobacco 
    * 11 - Beverages
        * 112 - Alcoholic beverages
            * 112.3 - Beer made from malt (including ale, stout, and porter)

Below is a list of the top level codes:
0. Food and live animals
1. Beverages and tobacco
2. Crude materials, inedible, except fuels
3. Mineral, fuels, lubricants and related materials
4. Animal and vegetable oils, fats and waxes
5. Chemicals and related products, n.e.s.
6. Manufactured goods classified chiefly by material
7. Machinery and transport equipment
8. Miscellaeous manufactured articles
9. Commodities and transactions not classified elsewhere

You can find more information regarding SITC Classifications registry at http://unstats.un.org/unsd/cr/registry/regcst.asp?Cl=14

Below is an excerpt from all the data I have concatenated together from these sources

In [8]:
products_trade = pd.read_csv('data/sitc.csv')
products_trade

Unnamed: 0,year,icode,importer,ecode,exporter,sitc4,unit,dot,value,quantity
0,2000,100000,World,100000,World,,,,5895564.0,
1,2000,100000,World,100000,World,0011,,,1201050.0,
2,2000,100000,World,100000,World,0011,N,,518532.0,1422555.0
3,2000,100000,World,100000,World,0011,W,,2179618.0,1261510.0
4,2000,100000,World,100000,World,0012,,,120856.0,
5,2000,100000,World,100000,World,0012,N,,66609.0,2286888.0
6,2000,100000,World,100000,World,0012,W,,510111.0,345008.0
7,2000,100000,World,100000,World,0013,,,34966.0,
8,2000,100000,World,100000,World,0013,N,,293097.0,2510376.0
9,2000,100000,World,100000,World,0013,W,,1070801.0,910991.0


Using this dataset I have created role equivalence measure that is previously used in the sociology and management literature. This measure calculates the similarities between nodes in a network by considering the correlations of valued ties. 

To calculate this measure I have first calculated an export product category share vector (EPSV) and an import product category share vector (IPSV) using the formulas below:

### $EPSV_{it} = \frac{exports_{ikt}}{\sum exports_{ikt}}$
### $IPSV_{it} = \frac{imports_{ikt}}{\sum imports_{ikt}}$

I concatenated these two vectors to get a product share vector (PSV) and finally I created a trade matrix based on the correlations of product share vectors for each country.

## Role Equivalence in Trade Matrices for each year:

### $r(PSV_{it}, PSV_{jt})$

Below is the symmetric trade matrix for 1962 where each cell reports for the correlation on the type of products trade between each pair of countries:

In [11]:
tradeyear1962 = pd.read_csv('data/1962_rev.csv')
tradeyear1962.ix[:, 2:]

Unnamed: 0,value_stacked117100,value_stacked135040,value_stacked137880,value_stacked138180,value_stacked141400,value_stacked162880,value_stacked163840,value_stacked165660,value_stacked211240,value_stacked218400,...,value_stacked552460,value_stacked553520,value_stacked555780,value_stacked557520,value_stacked557560,value_stacked583480,value_stacked586160,value_stacked586420,value_stacked710360,value_stacked715540
0,1.000000,0.580727,0.298044,0.470812,0.444656,0.142892,0.154735,0.264662,0.526961,0.261812,...,0.179566,0.171661,0.258835,0.420187,0.136996,0.253699,0.140088,0.262556,0.714803,0.466513
1,0.580727,1.000000,0.444529,0.281030,0.090011,0.110292,0.183642,0.111618,0.238687,0.137312,...,0.061212,0.187686,0.156467,0.175202,0.036515,0.255572,0.149251,0.080086,0.190184,0.088327
2,0.298044,0.444529,1.000000,0.376566,0.109824,0.091489,0.107405,0.409892,0.256704,0.152630,...,0.045924,0.083095,0.168233,0.132455,0.057890,0.204983,0.164935,0.498706,0.111423,0.062776
3,0.470812,0.281030,0.376566,1.000000,0.552737,0.022472,0.062017,0.169648,0.062096,0.103984,...,0.013675,0.034875,0.085897,0.010670,0.081922,0.194281,0.239873,0.210107,0.719614,0.534223
4,0.444656,0.090011,0.109824,0.552737,1.000000,0.649721,0.636258,0.492181,0.095077,0.123988,...,0.096211,0.050254,0.042241,0.101738,0.103024,0.057377,0.007624,0.059112,0.629279,0.507022
5,0.142892,0.110292,0.091489,0.022472,0.649721,1.000000,0.958899,0.502106,0.128596,0.016184,...,0.155786,0.027763,0.031853,0.152697,0.043021,0.059916,0.044805,0.117732,0.056770,0.052964
6,0.154735,0.183642,0.107405,0.062017,0.636258,0.958899,1.000000,0.513306,0.155802,0.027102,...,0.266134,0.038619,0.035831,0.152180,0.060850,0.110427,0.092189,0.204426,0.078699,0.073516
7,0.264662,0.111618,0.409892,0.169648,0.492181,0.502106,0.513306,1.000000,0.224008,0.150207,...,0.147793,0.070416,0.110475,0.151310,0.071882,0.112146,0.021339,0.287527,0.197014,0.168632
8,0.526961,0.238687,0.256704,0.062096,0.095077,0.128596,0.155802,0.224008,1.000000,0.375624,...,0.649362,0.128504,0.598085,0.666478,0.176563,0.098815,0.107257,0.484297,0.281046,0.122748
9,0.261812,0.137312,0.152630,0.103984,0.123988,0.016184,0.027102,0.150207,0.375624,1.000000,...,0.056082,0.045907,0.156476,0.476716,0.341011,0.125065,0.029067,0.203066,0.265478,0.123454


In [12]:
mat = np.array(tradeyear1962.ix[:, 2:])
mat

array([[ 1.       ,  0.5807274,  0.2980445, ...,  0.2625561,  0.7148025,
         0.4665128],
       [ 0.5807274,  1.       ,  0.4445291, ...,  0.0800861,  0.1901837,
         0.0883269],
       [ 0.2980445,  0.4445291,  1.       , ...,  0.4987055,  0.111423 ,
         0.0627757],
       ..., 
       [ 0.2625561,  0.0800861,  0.4987055, ...,  1.       ,  0.2308287,
         0.1192481],
       [ 0.7148025,  0.1901837,  0.111423 , ...,  0.2308287,  1.       ,
         0.8239651],
       [ 0.4665128,  0.0883269,  0.0627757, ...,  0.1192481,  0.8239651,
         1.       ]])

Below is a dataset from other variables I have collected in my research regarding crises of sovereignty. In this case I have observations regarding the number of revolutionary situations experienced in a country reported by New York Times. This data comes from Banks' Cross National Time Series Dataset. 

In [13]:
attributevectors1962 = pd.read_csv('data/1962_rev_vector.csv')
attributevectors1962

Unnamed: 0,FEENSTRAcode,FEENSTRAcountry,revolutions,rev1
0,330320,Argentina,5,1
1,710360,Australia,0,0
2,550400,Austria,0,0
3,583480,Hungary,0,0
4,530560,Belgium-Lux,0,0
5,330680,Bolivia,0,0
6,330760,Brazil,0,0
7,451040,Myanmar,1,1
8,211240,Canada,0,0
9,141400,Cent.Afr.Rep,0,0


In [15]:
rev = np.array(attributevectors1962.ix[:, -1:])
countries = np.array(attributevectors1962.ix[:, 1:2])
countries

array([['Argentina'],
       ['Australia'],
       ['Austria'],
       ['Hungary'],
       ['Belgium-Lux'],
       ['Bolivia'],
       ['Brazil'],
       ['Myanmar'],
       ['Canada'],
       ['Cent.Afr.Rep'],
       ['Sri Lanka'],
       ['Chile'],
       ['China'],
       ['Colombia'],
       ['Costa Rica'],
       ['Denmark'],
       ['Dominican Rp'],
       ['Ecuador'],
       ['El Salvador'],
       ['Finland'],
       ['France,Monac'],
       ['Fm German FR'],
       ['Ghana'],
       ['Greece'],
       ['Guatemala'],
       ['Honduras'],
       ['Iceland'],
       ['India'],
       ['Indonesia'],
       ['Italy'],
       ['Cote Divoire'],
       ['Japan'],
       ['Korea Rep.'],
       ['Malaysia'],
       ['Mexico'],
       ['Morocco'],
       ['Netherlands'],
       ['New Zealand'],
       ['Nicaragua'],
       ['Nigeria'],
       ['Panama'],
       ['Paraguay'],
       ['Peru'],
       ['Philippines'],
       ['Poland'],
       ['Portugal'],
       ['Romania'],
       ['Sout

To visualize this data in a meaningful way, I have used the manifold learning algorithms for metric multidimensional scaling from scikit-learn module in Python. Multidimensional scaling allows a two dimensional representation of the data by respecting the original distances of the high-dimensional space. In this case for my graphs, countries that have a higher correlation of trade products (stronger ties) are represented closer to each other. Countries that have a lower correlation are farther apart from each other. This is a nice way of visually representing this data because for my work my hypothesis that countries that have higher similarities in their trade behavior are more likely to experience crises in similar time periods

In [16]:
mds = manifold.MDS(n_components=2, dissimilarity="precomputed", random_state=2)
mds

MDS(dissimilarity='precomputed', eps=0.001, max_iter=300, metric=True,
  n_components=2, n_init=4, n_jobs=1, random_state=2, verbose=0)

Below is the coordinates for the countries calculated for 1962:

In [18]:
results = mds.fit_transform(mat)
results

array([[ -2.63806953e-01,   7.63528909e-02],
       [  7.79086445e-02,  -7.57110076e-02],
       [  9.60647042e-02,   9.87505842e-02],
       [ -1.62797639e-01,   4.97451318e-02],
       [ -6.98435339e-02,  -2.09911719e-01],
       [  7.83917513e-02,  -9.28217494e-03],
       [ -1.89631669e-01,  -5.70568575e-02],
       [ -1.33912084e-01,  -1.18867386e-02],
       [ -2.29887834e-02,  -1.98540823e-01],
       [  1.11529814e-01,  -2.63298325e-02],
       [ -1.25816382e-01,   1.53200233e-02],
       [ -4.31430630e-02,  -2.21884525e-02],
       [  2.06885973e-01,  -6.65261820e-02],
       [ -3.73238295e-02,   3.84234185e-02],
       [ -3.39950578e-02,   7.73383426e-02],
       [  7.52810090e-02,   1.15648135e-01],
       [ -6.35011941e-02,   2.55151986e-01],
       [  2.05583935e-01,   1.04633851e-01],
       [  1.79875107e-01,   3.64878544e-02],
       [  8.76883249e-02,   7.53831894e-02],
       [  1.50443175e-02,  -1.70377118e-04],
       [ -1.46417537e-01,   1.01854048e-01],
       [ -

Using matplotlib I created the figure below. In the figure, I color coded the countries that experienced a revolutionary situation with black dots. Other countries have white dots. I expect black dots to be in general closer to each other. I also expect that in the following year, the countries that are close to black dots should be more likely to experience a revolutionary situation 

In [None]:
fig = plt.figure(figsize=(28.0,16.5))
fig.suptitle('Revolutionary Situations in 1962', fontsize=14, fontweight='bold')


plt.subplots_adjust(bottom = 0.1)

plt.scatter(
    results[:, 0], results[:, 1], marker = 'o', c=rev
    )


for label, x, y, in zip(countries, results[:, 0], results[:, 1]):
    plt.annotate(
    label,
    xy = (x, y), xytext = (-20, 20),
    textcoords = 'offset points', ha = 'right', va='bottom',
    bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
    arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0'))

leg = plt.legend(('Revolutions', 'Stable'), loc = 'lower right', frameon=True)    
leg.get_frame().set_edgecolor('b')
plt.show()

Elsewhere I have written a program that has functions to automate the commands I have used above. With the loop below I have generated a plot for each year in my dataset

In [2]:
year = 1962
while year<2010:
    numbers = figures.inputs('data/'+str(year)+'_rev.csv', 'data/'+str(year)+'_rev_vector.csv')
    coor = figures.mdscoordinates(numbers['matrix'])
    figures.mdsfigure(coor, year, numbers['behavior'], numbers['country names'])
    year+=1
#    plt.pause(0.5)
    plt.close('all')

Below is a plot that displays how countries move across the graph from 1962 to 2009:

In [23]:
year = 1962
while year<2010:
    plt.figure(figsize=(28.0, 16.5))
    img=mpimg.imread('images/'+str(year)+'.png')
    imgplot = plt.imshow(img)
    year+=1
    plt.pause(1)
    plt.close('all')

KeyboardInterrupt: 