In [1]:
%load_ext rpy2.ipython

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

from tifffile import TiffFile

## Average Stable Lights

In [3]:
%%time
img_paths = [
    '../data/raw/Version_4_DMSP-OLS_Nighttime_Lights_Time_Series/F101992.v4b_web.stable_lights.avg_vis.tif.npz',
    '../data/raw/Version_4_DMSP-OLS_Nighttime_Lights_Time_Series/F182013.v4c_web.stable_lights.avg_vis.tif.npz',
]
t_years = len(img_paths)
n = 16801
m = 43201

images = np.zeros((len(img_paths),n,m))
                  
for i, p in enumerate(img_paths):
    images[i] = np.load(p)['arr_0']

CPU times: user 7.53 s, sys: 6.86 s, total: 14.4 s
Wall time: 18.6 s


### Extracting Subimages (Convolutional Style)

In [4]:
def convolve_coordinates(image, step_size = (200,200), sub_image_shape = (300,300)):
    passes = [int(image.shape[i]/step_size[i]-sub_image_shape[i]/step_size[i]+1) for i in range(2)]
    sub_image_coordinates = np.zeros((*passes,2,2))
    for i in range(passes[0]):
        step_i = i*step_size[0]
        for j in range(passes[1]):
            step_j = j*step_size[1]
            sub_image_coordinates[i,j] = [
                [step_i,step_i+sub_image_shape[0]],
                [step_j,step_j+sub_image_shape[1]]
            ]
            
    return sub_image_coordinates.reshape(passes[0]*passes[1],2,2)

In [6]:
%%time
sub_img_pxl = np.zeros((t_years, 17845, 2, 2))

for t, img in enumerate(images):
    sub_img_pxl[t] = convolve_coordinates(img)
    
sub_img_pxl.shape

CPU times: user 115 ms, sys: 7.18 ms, total: 123 ms
Wall time: 124 ms


Function using coordinates to retrieve information (faster)

In [7]:
def retrieve_sub_img(image,img_pxl):
    return image[img_pxl[0][0]:img_pxl[0][1],
                 img_pxl[1][0]:img_pxl[1][1]]

In [8]:
def get_pxl(df):
    return [
        [df['pxll'][0],df['pxlb'][0]],
        [df['pxll'][0],df['pxlr'][0]]
    ]

Construct dataframe

In [14]:
%%time
X_df = pd.DataFrame(columns=['year','i','f','t','mean_luminosity','pxlt','pxlb','pxll','pxlr'])

for t in range(t_years):
    for i, img_pxl in enumerate(sub_img_pxl[t]):
        X_df = X_df.append(pd.DataFrame([[
            img_paths[t][-42:-38],
            str(i) + ' ' + str(t),
            str(i),
            t,
            np.mean(retrieve_sub_img(images[t],img_pxl)),
            img_pxl[0,0],
            img_pxl[0,1],
            img_pxl[1,0],
            img_pxl[1,1],
        ]],
            columns=['year','i','f','t','mean_luminosity','pxlt','pxlb','pxll','pxlr'],
            index=[img_paths[t][-42:-38]+' '+str(i)]
        ))
        
X_df.to_csv('../data/processed/usgeo-disco-features.csv',index=False)
X_df = X_df.set_index(['i'])
X_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


CPU times: user 3min 5s, sys: 19 s, total: 3min 24s
Wall time: 3min 30s


In [17]:
X_df = pd.read_csv('../data/processed/usgeo-disco-features.csv', index_col='i')
X_df.head()

Unnamed: 0_level_0,year,f,t,mean_luminosity,pxlt,pxlb,pxll,pxlr
i,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0 0,1992,0,0.0,0.0,0.0,300.0,0.0,300.0
1 0,1992,1,0.0,0.0,0.0,300.0,200.0,500.0
2 0,1992,2,0.0,0.0,0.0,300.0,400.0,700.0
3 0,1992,3,0.0,0.0,0.0,300.0,600.0,900.0
4 0,1992,4,0.0,0.0,0.0,300.0,800.0,1100.0
