# Step 0: Initial Imports

In [1]:
import sys
import os
import torch
from torch import optim
from time import time
import gdal
import numpy as np

In [2]:
#Dict for Mapping between CDL Int Label and String Correspondence:
CDL_dict = {0: "Background", 1: "Corn", 2: "Cotton", 3: "Rice", 4: "Sorghum", 5: "Soybeans", 6: "Sunflower", 10: "Peanuts", 11: "Tobacco", 12: "Sweet Corn", 13: "Pop or Orn Corn", 14: "Mint", 21: "Barley", 22: "Durum Wheat", 23: "Spring Wheat", 24: "Winter Wheat", 25: "Other Small Grains", 26: "Dbl Crop WinWht/Soybeans", 27: "Rye", 28: "Oats", 29: "Millet", 30: "Speltz", 31: "Canola", 32: "Flaxseed", 33: "Safflower", 34: "Rape Seed", 35: "Mustard", 36: "Alfalfa", 37: "Other Hay/Non Alfalfa", 38: "Camelina", 39: "Buckwheat", 41: "Sugarbeets", 42: "Dry Beans", 43: "Potatoes", 44: "Other Crops", 45: "Sugarcane", 46: "Sweet Potatoes", 47: "Misc Vegs & Fruits", 48: "Watermelons", 49: "Onions", 50: "Cucumbers", 51: "Chick Peas", 52: "Lentils", 53: "Peas", 54: "Tomatoes", 55: "Caneberries", 56: "Hops", 57: "Herbs", 58: "Clover/Wildflowers", 59: "Sod/Grass Seed", 60: "Switchgrass", 61: "Fallow/Idle Cropland", 63: "Forest", 64: "Shrubland", 65: "Barren", 66: "Cherries", 67: "Peaches", 68: "Apples", 69: "Grapes", 70: "Christmas Trees", 71: "Other Tree Crops", 72: "Citrus", 74: "Pecans", 75: "Almonds", 76: "Walnuts", 77: "Pears", 81: "Clouds/No Data", 82: "Developed", 83: "Water", 87: "Wetlands", 88: "Nonag/Undefined", 92: "Aquaculture", 111: "Open Water", 112: "Perennial Ice/Snow", 121: "Developed/Open Space", 122: "Developed/Low Intensity", 123: "Developed/Med Intensity", 124: "Developed/High Intensity", 131: "Barren", 141: "Deciduous Forest", 142: "Evergreen Forest", 143: "Mixed Forest", 152: "Shrubland", 176: "Grassland/Pasture", 190: "Woody Wetlands", 195: "Herbaceous Wetlands", 204: "Pistachios", 205: "Triticale", 206: "Carrots", 207: "Asparagus", 208: "Garlic", 209: "Cantaloupes", 210: "Prunes", 211: "Olives", 212: "Oranges", 213: "Honeydew Melons", 214: "Broccoli", 216: "Peppers", 217: "Pomegranates", 218: "Nectarines", 219: "Greens", 220: "Plums", 221: "Strawberries", 222: "Squash", 223: "Apricots", 224: "Vetch", 225: "Dbl Crop WinWht/Corn", 226: "Dbl Crop Oats/Corn", 227: "Lettuce", 229: "Pumpkins", 230: "Dbl Crop Lettuce/Durum Wht", 231: "Dbl Crop Lettuce/Cantaloupe", 232: "Dbl Crop Lettuce/Cotton", 233: "Dbl Crop Lettuce/Barley", 234: "Dbl Crop Durum Wht/Sorghum", 235: "Dbl Crop Barley/Sorghum", 236: "Dbl Crop WinWht/Sorghum", 237: "Dbl Crop Barley/Corn", 238: "Dbl Crop WinWht/Cotton", 239: "Dbl Crop Soybeans/Cotton", 240: "Dbl Crop Soybeans/Oats", 241: "Dbl Crop Corn/Soybeans", 242: "Blueberries", 243: "Cabbage", 244: "Cauliflower", 245: "Celery", 246: "Radishes", 247: "Turnips", 248: "Eggplants", 249: "Gourds", 250: "Cranberries", 254: "Dbl Crop Barley/Soybeans"}

In [3]:
#!sudo apt-get update
#!sudo apt-get install python-gdal -y

In [4]:
#Changing Path
tile2vec_dir = '/home/agupta21/gcloud/231n_gitproject'
sys.path.append('../')
sys.path.append(tile2vec_dir)

In [5]:
from src.datasets import TileTripletsDataset, GetBands, RandomFlipAndRotate, ClipAndScale, ToFloatTensor, triplet_dataloader
from src.tilenet import make_tilenet

In [6]:
from src.training import prep_triplets, train_triplet_epoch

In [7]:
#CHECK IF GPU IS ACTIVE AND ENABLED

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
cuda = torch.cuda.is_available()
print(cuda)

False


In [96]:
#Testing a anchor/neighbor object (Model for reading in)

curNum = 25
part1 = "../data/triplets/" + str(curNum)
anchor = np.load(part1 + "anchor.npy")
neighbor = np.load(part1 + "neighbor.npy")
distant = np.load(part1 + "distant.npy")

print(anchor.shape)

CDL_int1 = anchor[49][49][4] #print 3rd dimension, and last column of 0th row (same label for every example)
CDL_int2 = neighbor[49][49][4] 
CDL_int3 = distant[49][49][4]

print(CDL_dict[CDL_int1])
print(CDL_dict[CDL_int2])
print(CDL_dict[CDL_int3])
counts_anchor = np.bincount(anchor[:,:,4].reshape(2500,))
CDL_int1 = np.argmax(counts_anchor)
print(np.argmax(counts_anchor))

(50, 50, 5)
Grassland/Pasture
Grapes
Open Water
176


In [97]:
#Extract the label (5th dimension) and convert the array into 50 by 50 by 4.
anchor_label = anchor[2][0][4]
anchor_reshaped = anchor[:,:,0:4]
print(anchor_label, "Label of Tile")
print(anchor_reshaped.shape, "New shape")
#print(anchor[:,:,0:4])
#print(anchor.shape,anchor)

#visualize a tile

69 Label of Tile
(50, 50, 4) New shape


In [11]:
import random
random.seed(30)
for i in range(5):
    print(random.randrange(0,100000))

70658
37905
80085
3936
81510


In [98]:
#Set up pipeline to link triplet labels to the various tiles 

labelToTiles = {}
import time
start = time.time()

import random
random.seed(30)
nums = set()
for i in range(10000): # of triplets
    if i % 1000 == 0:
        print(i)
    curNum = random.randrange(0,100000)
    while curNum in nums:
        curNum = random.randrange(0,100000)
    nums.add(curNum)
    #print(curNum)
    part1 = "../data/triplets/" + str(curNum)
    anchor = np.load(part1 + "anchor.npy")
    neighbor = np.load(part1 + "neighbor.npy")
    distant = np.load(part1 + "distant.npy")
    counts_anchor = np.bincount(anchor[:,:,4].reshape(2500,))
    anchor_label = np.argmax(counts_anchor)
    counts_nei = np.bincount(neighbor[:,:,4].reshape(2500,))
    neighbor_label = np.argmax(counts_nei)
    counts_distant = np.bincount(distant[:,:,4].reshape(2500,))
    distant_label = np.argmax(counts_distant)
    
    a_key = CDL_dict[anchor_label]
    n_key = CDL_dict[neighbor_label]
    d_key = CDL_dict[distant_label]
    #print 3rd dimension, and last column of 0th row (same label for every example)
    labelToTiles.setdefault(a_key,[]).append(anchor)
    labelToTiles.setdefault(n_key,[]).append(neighbor)
    labelToTiles.setdefault(d_key,[]).append(distant)
    
end = time.time()
print(end - start)
ctr = 0
for key, value in labelToTiles.items():
    ctr += len(value)
    print(key,len(value))
print(ctr)

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
9.731363534927368
Grapes 8687
Grassland/Pasture 4222
Almonds 4933
Open Water 247
Corn 174
Alfalfa 1201
Developed/Open Space 1590
Pistachios 679
Dbl Crop WinWht/Corn 263
Winter Wheat 346
Barley 69
Fallow/Idle Cropland 1932
Triticale 222
Cotton 300
Developed/Med Intensity 1667
Tomatoes 474
Developed/High Intensity 431
Walnuts 391
Developed/Low Intensity 1129
Other Hay/Non Alfalfa 18
Oranges 361
Barren 74
Garlic 69
Evergreen Forest 10
Dbl Crop WinWht/Sorghum 50
Other Tree Crops 7
Oats 71
Carrots 28
Dbl Crop Oats/Corn 15
Nectarines 1
Shrubland 35
Citrus 16
Onions 98
Pomegranates 35
Peaches 10
Cherries 10
Safflower 22
Sorghum 21
Durum Wheat 8
Lettuce 4
Olives 16
Pears 1
Woody Wetlands 34
Plums 6
Herbaceous Wetlands 4
Cantaloupes 1
Dry Beans 9
Dbl Crop Barley/Corn 2
Deciduous Forest 3
Blueberries 2
Honeydew Melons 1
Sod/Grass Seed 1
30000


In [67]:
#all the values are unique (ie all grape tiles = different in the list)
part1 = "../data/triplets/" + str(99999)
anchor = np.load(part1 + "anchor.npy")
# for i in range(5000):
#     for j in range(5000):
#         if i != j:
#             if np.array_equal(labelToTiles["Grapes"][i], labelToTiles["Grapes"][j]):
#                 print(i,j, "are equal")


In [99]:
#Need to create new triplets based off of our dict. 
file_index = 100000
random.seed(50)
anch_n = []#anchor/neighbor dict
d_list = []

import copy 
labelToTiles_copy = copy.deepcopy(labelToTiles)

for curKey in labelToTiles_copy.keys():
    if len(labelToTiles_copy[curKey]) > 1:
        anch_n.append(curKey)
#         print(len(labelToTiles_copy[curKey]))
    if len(labelToTiles_copy[curKey]) >=1:
        d_list.append(curKey)
#         print(len(labelToTiles_copy[curKey]))
#     if len(labelToTiles_copy[curKey]) ==0:
#         print(curKey)

ctr = 0
while len(anch_n) > 1 and len(d_list) >1: #this is the bottleneck
    anLabIndex = random.randint(0,len(anch_n)-1) #label index
    distLabIndex = random.randint(0,len(d_list)-1)
    anLabel = anch_n[anLabIndex]
    distLabel = d_list[distLabIndex]
    while distLabel == anLabel:
        #print(len(d_list), "in while loop", distLabel)
        distLabIndex = random.randint(0,len(d_list)-1)
        distLabel = d_list[distLabIndex]
    #now, we have the labels for the anchor/neighbor and for the distance
#     print(anLabel,len(labelToTiles_copy[anLabel]))
    #from master dict, sample 2 random from anchor and 1 random from distant
    #print("anLabel, About to pop from", anLabel, "with size: ", len(labelToTiles_copy[anLabel]))
    anchor = labelToTiles_copy[anLabel].pop()
   # print("anLabel, About to pop from", anLabel, "with size: ", len(labelToTiles_copy[anLabel]))
    neighbor = labelToTiles_copy[anLabel].pop()
    #print("ditLabel, About to pop from", distLabel, "with size: ", len(labelToTiles_copy[distLabel]))
    distant = labelToTiles_copy[distLabel].pop()
    
    np.save("../data/triplets/{}anchor.npy".format(file_index),anchor)
    np.save("../data/triplets/{}neighbor.npy".format(file_index),neighbor)
    np.save("../data/triplets/{}distant.npy".format(file_index),distant)
    
    
    if len(labelToTiles_copy[anLabel]) < 2:
        #print(anLabel, "anch_n", len(labelToTiles_copy[anLabel]))
        anch_n.remove(anLabel)
    if len(labelToTiles_copy[anLabel]) == 0:
        #print(anLabel, "d_list", len(labelToTiles_copy[anLabel]))
        d_list.remove(anLabel)
    if len(labelToTiles_copy[distLabel]) == 0:
        #print(distLabel, "d_list", len(labelToTiles_copy[distLabel]))
        d_list.remove(distLabel)
    if distLabel in anch_n and len(labelToTiles_copy[distLabel])<2:
            anch_n.remove(distLabel)
    
    file_index += 1
print(file_index)

108700


In [102]:
a = np.load("../data/triplets/108699anchor.npy")
print(a[:,:,:])


[[[102 114  96 187  69]
  [ 99 109  98 178  69]
  [ 92 105  95 177  69]
  ...
  [ 91 100  94 176  69]
  [ 82  89  86 163  69]
  [ 92  97  94 166  69]]

 [[100 114  95 191  69]
  [ 94 110  92 185  69]
  [ 85 102  88 179  69]
  ...
  [ 80  93  85 178  69]
  [ 71  81  77 166  69]
  [ 73  80  84 170  69]]

 [[ 97 116  90 193  69]
  [ 91 112  86 186  69]
  [ 85 102  80 177  69]
  ...
  [ 76  90  79 180  69]
  [ 66  76  74 168  69]
  [ 76  84  83 176  69]]

 ...

 [[ 89 101  89 188 212]
  [ 88 103  89 184 212]
  [ 86 101  85 182 212]
  ...
  [ 87 104  86 183  69]
  [ 79  94  83 177  69]
  [ 79  94  83 177  69]]

 [[ 86 100  86 187 212]
  [ 86  96  87 180 212]
  [ 79  93  83 177 212]
  ...
  [ 89 108  85 184  69]
  [ 77  92  81 177  69]
  [ 77  92  81 177  69]]

 [[ 80  96  85 179 212]
  [ 74  86  80 170 212]
  [ 73  85  78 168 212]
  ...
  [ 89 111  85 181  69]
  [ 77  92  80 174  69]
  [ 77  92  80 174  69]]]
