In [12]:
from osgeo import gdal, gdal_array, osr, ogr
from IPython.core.debugger import set_trace

import os
import sys
import fnmatch
import logging
import json
import numpy as np
import csv

In [13]:
""" The script is used to random sample the npy files of 26 years isolated pixels
   
   input: FIRE/INS/LOG/OTH: 4 csv per year
          var: db, dg, dw
          
   output: a dataset grouped by year and agent
          var: distance of db&dg, distance of db&dw, distance of dg&dw    
"""

log = logging.getLogger('export_image')

ROOT = "/projectnb/landsat/users/zhangyt/above/post_processing/analysis/isolated_pixels/"
ori = ROOT + "population/"
des = ROOT +  "sampled/"
agents = ['FIRE', 'INS', 'LOG', 'OTH']

In [14]:
def get_files(path, pattern, recursive=True):
    """ search files with pattern

    Args:
        path (str): location to search in
        pattern (str): searching pattern

    Returns:
        file_list (list): list of files, [path, name]

    """
    if recursive:
        return [[x[0], x[1]] for x in [[pn, f] for pn, dn, fn in os.walk(path)
                for f in fn] if fnmatch.fnmatch(x[1],pattern)]
    else:
        return [[path, f] for f in fnmatch.filter(os.listdir(path), pattern)]


In [15]:
def get_year(x, end=-4, _format='YYYY'):
    """ extract year from filename
    Args:
        x (str): filename
        end (int): year ending index
        _format (str): format of the date, e.g. YYYY
        can be modified to get date
    Returns:
        Year (int): year
    """
    start:(start + len(_format))
    return int(x[(end - len(_format)):end])

In [None]:
# 1.read files
## concatenate files from different years before sampling 
if not os.path.exists(des):
    log.warning('{} does not exist, trying to create one.'.format(des))
    try:
        os.makedirs(des)
    except:
        log.error('Cannot create output folder {}'. format(des))
        print(1)


log.info('Locating files...'.format(ori))
dic = {}
year_avail = []
for agent in agents:
    tmp_dic = {}
    agent_list = get_files(ori, '{}*.csv'.format(agent))
    for agent_file in sorted(agent_list):
        year = get_year(agent_file[1])
        np_file = os.path.join(agent_file[0], agent_file[1])
        np_array = np.genfromtxt(np_file, delimiter=',', dtype='int32')
        
        # add agent to array
        nrows = np_array.shape[0] - 1
        agent_id = np.ones((nrows, 1), dtype=np.int16) * (agents.index(agent) + 1)
        dTC_array = np.hstack((np_array[1:,], np.atleast_2d(agent_id)))  # remove first all zero row
        
        tab_name = agent + "_dTC_POP_" + str(year)
        if year not in year_avail:
            year_avail.append(year)
        
        # concatenate all years
        tmp_dic[tab_name] = dTC_array
        
        #if year == 1987:
        #    comb_array = dTC_array
        #else:
        #    comb_array = np.concatenate((comb_array, dTC_array))
        
        dic[agent] = tmp_dic
        print(year)
        
    print(agent + " is completed.")
            

In [47]:
dic['FIRE']

{'FIRE_dTC_POP_1987': array([[-4112, -1661, -4245,     1],
        [-3120, -2937, -4084,     1],
        [-1760,   157,   -10,     1],
        ...,
        [   15,   137,   291,     1],
        [  537,    67,  -110,     1],
        [ -466,    22,  -459,     1]], dtype=int32),
 'FIRE_dTC_POP_1988': array([[-509,   83,  513,    1],
        [1245, -715,  383,    1],
        [-808, -397,  -40,    1],
        ...,
        [ 293,  290, -282,    1],
        [ 524,  468, -180,    1],
        [-460, -392,   30,    1]], dtype=int32),
 'FIRE_dTC_POP_1989': array([[  18,  359, -125,    1],
        [  14,    7,    2,    1],
        [ 274,  301,  -17,    1],
        ...,
        [ 472,  114, -177,    1],
        [  19,  426,  570,    1],
        [   0,    1,    0,    1]], dtype=int32),
 'FIRE_dTC_POP_1990': array([[  -30,    17,   -16,     1],
        [    4,    -4,   -16,     1],
        [   16,    -4,   -46,     1],
        ...,
        [  175,   -48,  -573,     1],
        [  -32,    50,   -41,  

In [6]:
# 2.random sample
dic2 = dic.copy()
np.random.seed(0)
#samples = [1600, 500, 500, 800]  #for entire time series sampling
samples = [1000, 400, 400, 600]
for agent in agents:
    idx = agents.index(agent)
    for year in year_avail:
        tab_name = agent + "_dTC_POP_" + str(year)
        dic2[agent][tab_name] = dic2[agent][tab_name][np.random.choice(dic2[agent][tab_name].shape[0], samples[idx]), 0:4]
        print("Finished sample " + str(samples[idx]) + " " + agent + "in year " + str(year) + ".")  

Finished sample 1000 FIREin year 1987.
Finished sample 1000 FIREin year 1988.
Finished sample 1000 FIREin year 1989.
Finished sample 1000 FIREin year 1990.
Finished sample 1000 FIREin year 1991.
Finished sample 400 INSin year 1987.
Finished sample 400 INSin year 1988.
Finished sample 400 INSin year 1989.
Finished sample 400 INSin year 1990.
Finished sample 400 INSin year 1991.
Finished sample 400 LOGin year 1987.
Finished sample 400 LOGin year 1988.
Finished sample 400 LOGin year 1989.
Finished sample 400 LOGin year 1990.
Finished sample 400 LOGin year 1991.
Finished sample 600 OTHin year 1987.
Finished sample 600 OTHin year 1988.
Finished sample 600 OTHin year 1989.
Finished sample 600 OTHin year 1990.
Finished sample 600 OTHin year 1991.


In [9]:
dic2

{'FIRE': {'FIRE_dTC_POP_1987': array([[  431,    88,  -225,     1],
         [  -67,   253,   279,     1],
         [  669,   551,  -656,     1],
         ...,
         [   92,  -276,  -514,     1],
         [-1548, -1195,  -352,     1],
         [  102,   165,     8,     1]], dtype=int32),
  'FIRE_dTC_POP_1988': array([[ 477,  553,  287,    1],
         [ 203,   74,  221,    1],
         [ 328,  184,  107,    1],
         ...,
         [-377, -553, -350,    1],
         [ 211,  112,  180,    1],
         [ 252,  185, -166,    1]], dtype=int32),
  'FIRE_dTC_POP_1989': array([[1249,  653,  -59,    1],
         [ 236,  159, -255,    1],
         [ 117,  296,  320,    1],
         ...,
         [ 419,  149,  -34,    1],
         [ 179,  196,  152,    1],
         [  18,  110,  109,    1]], dtype=int32),
  'FIRE_dTC_POP_1990': array([[ -17, -330, -534,    1],
         [ 237, -190,  462,    1],
         [   5,   11,    8,    1],
         ...,
         [ 178, -222, -373,    1],
         [ 70

In [10]:
# 3.calculate the distance of dTC 
dTC_distance = {}
for agent in agents:
    idx = agents.index(agent)
    tmp_distance = {}
    for year in year_avail:
        tab_name = agent + "_dTC_POP_" + str(year)
        j = [1,2,0]
        for i in range(3):
            tmp_array = np.sqrt(np.square(dic2[agent][tab_name][:,i])
                        + np.square(dic2[agent][tab_name][:,j[i]]), dtype=np.float32).reshape((samples[idx],1))
            
            if i == 0:
                distance_array = tmp_array
            else:
                distance_array = np.concatenate((distance_array, tmp_array), axis = 1)
            
        agent_id = np.ones((samples[idx], 1), dtype=np.int16) * (agents.index(agent) + 1)
        ag_year = np.ones((samples[idx], 1), dtype=np.int16) * year
        distance_array = np.hstack((distance_array.astype(int), np.atleast_2d(agent_id), np.atleast_2d(ag_year))) 
        
        out_name = des + agent + '_dTC_distance_sampled_'+ str(year) + '.csv'
        np.savetxt(out_name, distance_array, delimiter=",", fmt= '%5.0d')
        tmp_distance[tab_name] = distance_array
        
    dTC_distance[agent] = tmp_distance
   
    print("Finished calculated dTC distance matrix of " + agent + ".")

Finished calculated dTC distance matrix of FIRE.
Finished calculated dTC distance matrix of INS.
Finished calculated dTC distance matrix of LOG.
Finished calculated dTC distance matrix of OTH.


In [11]:
# with open('sampled_all.json', 'w') as f:
#     json.dump(tmp_distance, f)
##need to be modified if wanted to store as a json file
##for now it's just csv files and next is using ggplot in R to plot it out