In [78]:
import pandas as pd 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.mixture import GaussianMixture as GMM
from sklearn.decomposition import PCA, FastICA
from sklearn import linear_model
from matplotlib import pyplot as plt, cm as cm, mlab as mlab
import seaborn as sns; sns.set()
import progressbar as pb
import time 
import math
from scipy.optimize import minimize              # for function minimization
import scipy.stats as scs
import statsmodels.formula.api as smf       
import statsmodels.tsa.api as smt
import statsmodels.api as sm

In [79]:
# read csv/excel data files 
pnas_data1 = pd.read_csv('/home/jaeweon/research/data/pnas_data1.csv')

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [80]:
# format data 

# extract 9 Complexity Characteristic variables 
features = ['PolPop', 'PolTerr', 'CapPop', 'levels', 'government','infrastr', 'writing', 'texts', 'money']
feature_time = ['Time'] + features

# take subset of original data table with 9 CCs and change it into numpy array 
data_mat = StandardScaler().fit_transform(pnas_data1.loc[:, features].values)

In [87]:
#Gaussian Mixture Model 
#fit GMM
gmm = GMM(n_components=2).fit(data_mat)
cov = gmm.covariances_
prob_distr = gmm.predict_proba(data_mat)

gauss1_idx = [i for i in range(len(prob_distr)) if prob_distr[i][0] >= prob_distr[i][1]]
gauss2_idx = [j for j in range(len(prob_distr)) if prob_distr[j][1] >= prob_distr[j][0]]
gauss1 = [data_mat[:,0][i] for i in gauss1_idx]
gauss2 = [data_mat[:,0][j] for j in gauss2_idx]

In [82]:
# Multiple linear regression over time
ols = linear_model.LinearRegression()
model = ols.fit()

TypeError: fit() missing 2 required positional arguments: 'X' and 'y'

In [83]:
# pca components 
pca = PCA(n_components=9)
pca.fit(data_mat)
components = pca.components_

In [84]:
# calculate angle between two vectors 
def angle(vec1, vec2):
    """
    Given two vectors, compute the angle between the vectors
    """
    assert vec1.shape == vec2.shape
    
    cos_vec = np.inner(vec1, vec2)/(np.linalg.norm(vec1)*np.linalg.norm(vec2))
    angle = math.acos(cos_vec)
    in_deg = math.degrees(angle)
    if in_deg >= 90:
        return (180-in_deg)
    return in_deg


In [85]:
# main eigenvectors for covariances of each gaussians
eigval1, eigvec1 = np.linalg.eig(cov[0])
eigval2, eigvec2 = np.linalg.eig(cov[1])

max_eigval1 = max(eigval1); max_eigvec1 = eigvec1[:, np.argmax(max_eigval1)]
max_eigval2 = max(eigval2); max_eigvec2 = eigvec2[:, np.argmax(max_eigval2)]

comp1 = np.matmul(max_eigvec1.T, components)
comp2 = np.matmul(max_eigvec2.T, components)

for i in range(1, len(components)): # angle using only some components
    print("angle using first %s components: " %(i+1), angle(comp1[:i+1], comp2[:i+1]))

angle using first 2 components:  12.495913311568557
angle using first 3 components:  30.86984463794994
angle using first 4 components:  34.137423612232794
angle using first 5 components:  43.81706490089974
angle using first 6 components:  45.12623621762637
angle using first 7 components:  45.621579754337915
angle using first 8 components:  39.658912681329156
angle using first 9 components:  33.56245604869352
