## Proximity Matrix

The code below creates the proximity matrix for the dataset for TimeStep = 0

#### Preprocessing the Data

Drop unncessary columns like 'SpeciesName' and binarize categorical columns.

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("adap-ecosys-dataset.csv")
df = df.drop(["SpeciesName"], axis='columns')

df = pd.get_dummies(df, columns=['Diet'])
df = pd.get_dummies(df, columns=['Habitat'])

df_time_0 = df[df['TimeStep'] == 0]
df_time_0 = df_time_0.astype(float)

df_time_0

Unnamed: 0,Size,Speed,Color,Aggression,TimeStep,SpeciesID,Diet_Carnivore,Diet_Herbivore,Habitat_Desert,Habitat_Forest,Habitat_Mountain,Habitat_Ocean,Habitat_Plains,Habitat_Wetlands
0,5.175966,0.194755,45.326815,5.889688,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,4.870808,4.094815,43.352986,1.838275,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
2,4.327507,1.325123,54.829338,7.827112,0.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
3,9.014150,4.272313,1.046772,3.129817,0.0,3.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0
4,7.207142,7.804117,177.680053,6.387206,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.053717,1.833778,16.390798,9.321824,0.0,995.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0
996,4.976148,3.344396,193.711256,0.371852,0.0,996.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
997,6.527764,2.831408,232.306310,3.486010,0.0,997.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
998,1.452511,6.303037,150.228761,2.807099,0.0,998.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


#### Create the Matrix

Define function for distance. Initialize the proximity matrix (1000 by 1000) with zeros. Then using two 'for' loops, calculate distance between the row vectors of the dataframe and update the matrix.

In [2]:
def norm(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))


def create_prox_matrix(para):
    matrix = np.zeros([len(para), len(para)])

    for i in np.arange(len(para)):
        for j in np.arange(len(para)):
            matrix[i, j] = norm(np.array(para.iloc[i].tolist()), np.array(para.iloc[j].tolist()))

    return matrix

proximity_matrix = create_prox_matrix(df_time_0)
print(proximity_matrix)
print(proximity_matrix.shape)

[ 5.17596612  0.19475456 45.3268155   5.88968834  0.          0.
  0.          1.          0.          0.          0.          0.
  1.          0.        ]
[ 4.87080775  4.09481532 43.35298632  1.83827451  0.          1.
  0.          1.          0.          1.          0.          0.
  0.          0.        ]
6.213980301410535
[[   0.            6.2139803    10.1019384  ... 1014.38990933
  1003.52931983 1019.61650846]
 [   6.2139803     0.           13.36185593 ... 1013.76940923
  1002.7227587  1019.04771505]
 [  10.1019384    13.36185593    0.         ... 1010.71807674
  1000.5894741  1015.79271874]
 ...
 [1014.38990933 1013.76940923 1010.71807674 ...    0.
    82.34072694   17.97413173]
 [1003.52931983 1002.7227587  1000.5894741  ...   82.34072694
     0.           99.53657537]
 [1019.61650846 1019.04771505 1015.79271874 ...   17.97413173
    99.53657537    0.        ]]
(1000, 1000)


#### Save the Matrix

Save the Proximity Matrix as a csv file so that it does not have to be computed repeatedly for this TimeStep.

In [None]:
matrix = pd.DataFrame(proximity_matrix)
matrix.to_csv("output.csv", index=False, header=False)