# Predicting Climate using Aerial Imagery

## Creating Directories and Loading Images
#### Here, we will be creating the directories required and loading the images. It is important to note that you must create an account with the link found here: https://scihub.copernicus.eu/dhus; this is to access the satellite imagery.

In [None]:
from sentinelsat import SentinelAPI

user = '' # create an account following the link below and update the field here and below
password = '' 
api = SentinelAPI(user, password, 'https://scihub.copernicus.eu/dhus')

In [None]:
import os

path = "" # update this with your directory and your folder name for this project. I recommend using PredictingClimate as a folder name. 
pathTest = "" # update this with the above directory
tempTest = "" # update this with a temporary directory
try:
    os.mkdir(pathTest)
    os.mkdir(tempTest)
except OSError:
    print ("Creation of the directory %s failed" % pathTest)
else:
    print ("Successfully created the directory %s " % pathTest)

latitude_lower_bound = 30
latitude_upper_bound = 50
longitude_lower_bound = -70
longitude_upper_bound = -120

In [None]:
import os 
import time
import shutil
for latitude in range(latitude_lower_bound, latitude_upper_bound):
    for longitude in range(longitude_upper_bound, longitude_lower_bound):
        products = api.query('POINT({0} {1})'.format(longitude, latitude), platformname = 'Sentinel-2', processinglevel = 'Level-2A',cloudcoverpercentage = (0, 10))
        products_gdf = api.to_geodataframe(products)
        if (not(products_gdf.empty)):
            print("Product found")
            products_gdf_sorted = products_gdf.sort_values(['cloudcoverpercentage'], ascending=[True])
            name = products_gdf_sorted.iloc[0,:].name
            api.download_quicklook(name, path)
            
            for count, filename in enumerate(os.listdir("")): # update "" with your above created directory location
                dst = "l" + str(longitude)+ "l" + str(latitude) + ".jpeg"
                src = '' + filename # update '' with your above created (not temporary) directory location
                dst = '' + dst # update '' with your above created (not temporary) directory location
                os.rename(src, dst)
            
            os.chdir('') # update this with your above created temporary directory location
            dst_dir = "" # update this with your above created (not temporary directory location)
            for f in os.listdir():
                shutil.copy(f, dst_dir)
            for file in os.listdir(''): # update this with your above created temporary directory location
                if file.endswith('.jpeg'):
                    os.remove(file)
            time.sleep(1)
                
        print((longitude, latitude))

In [None]:
import os
climateTestDir = "" # update this with a ClimateTest directory folder name
climateTrainDir = "" # update this with a ClimateTrain directory folder name
climates = ["Af", "Am", "Aw", "As", 
            "BWh", "BWk", "BSh", "BSk", 
            "Csa", "Csb", "Csc", "Cwa", "Cwb", "Cwc", "Cfa", "Cfb", "Cfc",
            "Dsa", "Dsb", "Dsc", "Dsd", "Dwa", "Dwb", "Dwc", "Dwd", "Dfa", "Dfb", "Dfc", "Dfd",
            "ET", "EF"]

try:
    os.mkdir(climateTestDir)
    os.mkdir(climateTrainDir)
except OSError:
    print ("Creation of the directory %s failed" % climates)
else:
    print ("Successfully created the directory %s " % climates)
    
for i in range(len(climates)):
    os.mkdir(climateTestDir + climates[i])
    os.mkdir(climateTrainDir + climates[i])

## Sorting Images by Climate Type
#### In these following coding blocks, we will be sorting the images according to their climate type. It is important to note that in this project, we are using the latitude and longitude values of the US. Simply adjust this to your liking, if necessary.

In [None]:
import numpy as np
import pickle
import math
import os
import time
import shutil

latitude_lower_bound = 30
latitude_upper_bound = 50
longitude_lower_bound = -120
longitude_upper_bound = -70

directory = "" # update this with the directory location above (not the temporary one)
path = ".../data/koppen_1901-2010.tsv" # update the ... with your directory location. Follow the download instructions at the website: http://hanschen.org/koppen
koppen = np.genfromtxt(path, dtype=None, names=True)

imageToClimate = {}

for filename in os.listdir(directory):
    if filename.endswith(".jpeg"):
        newFile = filename.replace(".jpeg", '')
        latLon = newFile.split("l")
        lat = (np.pi / 180) * int(latLon[2])
        lon = (np.pi / 180) * int(latLon[1])

        dist = [] 
        clim = []
        
        for i in range(len(koppen)):
            if (koppen['latitude'][i] >= latitude_lower_bound and koppen['latitude'][i] <= latitude_upper_bound and koppen['longitude'][i] >= longitude_lower_bound and koppen['longitude'][i] <= longitude_upper_bound):
                koppenLat = (np.pi / 180) * koppen['latitude'][i]
                koppenLon = (np.pi / 180) * koppen['longitude'][i]
                distance = math.sin(pow(((koppenLat-lat)/2), 2))+math.cos(koppenLat)*math.cos(lat)*math.sin(pow(((koppenLon-lon)/2), 2))
                dist.append(distance)
                clim.append(koppen['p1901_2010'][i])
        
        closestClimateType = clim[dist.index(min(dist))]
        imageToClimate[filename] = closestClimateType
    
pickle.dump(imageToClimate, open("imageToClimate.p", "wb"))
imageToClimate = pickle.load(open("imageToClimate.p", "rb"))
print(imageToClimate)

dst = "" # update this with your temporary directory location

In [None]:
for i in imageToClimate:
    print(i)
    origClimate = str(imageToClimate.get(i))
    newClim = str(origClimate[2:-1])
    moveToClimate = dst + newClim
    source = directory + "/" + i
    shutil.copy(source, moveToClimate)

In [None]:
import random, os, shutil, math
path = "" # update this with the ClimateTest directory location
newPath = "" # update this with the CliamteTrain directory location

counter = 0
origList = []
climList = []

for i in os.listdir(path):
    for j in os.listdir(path + "/" + i):
        counter = counter + 1
        origList.append(path + "/" + i + "/" + j)
        climList.append(newPath + "/" + i)
    

bigList = list(zip(origList, climList))
random.shuffle(bigList)

origList, climList = zip(*bigList)

dataTrainAmount = math.ceil(counter * 0.8)
dataTestAmount = math.ceil(counter * 0.2)

for i in range(dataTrainAmount):
    shutil.move(origList[i], climList[i])

In [None]:
import random, os, shutil, math
path = "" # update this with the ClimateTrain directory location
newPath = "" # update this with a ClimateVal directory location

# uncomment this code to create the ClimateVal directory
#try:
    #os.mkdir(newPath)
#except OSError:
#    print ("Creation of the directory %s failed" % newPath)
#else:
#    print ("Successfully created the directory %s " % newPath)

climates = ["Af", "Am", "Aw", "As", 
            "BWh", "BWk", "BSh", "BSk", 
            "Csa", "Csb", "Csc", "Cwa", "Cwb", "Cwc", "Cfa", "Cfb", "Cfc",
            "Dsa", "Dsb", "Dsc", "Dsd", "Dwa", "Dwb", "Dwc", "Dwd", "Dfa", "Dfb", "Dfc", "Dfd",
            "ET", "EF"]

# uncomment this to fill the climates
#for i in range(len(climates)):
    #print(climates[i])
    #os.mkdir(newPath + climates[i])
    
counter = 0
origList = []
climList = []

for i in os.listdir(path):
    for j in os.listdir(path + "/" + i):
        counter = counter + 1
        origList.append(path + "/" + i + "/" + j)
        climList.append(newPath + "/" + i)
    
bigList = list(zip(origList, climList))
random.shuffle(bigList)

origList, climList = zip(*bigList)

dataValAmount = math.ceil(counter * 0.1)

for i in range(dataValAmount):
    shutil.move(origList[i], climList[i])

## CNN Training using Tensorflow
#### We can now use the above code blocks to train our model. We will be using a tensorflow CNN model, which will be modified to suit our needs.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
import pathlib
data_dir = "" # update this with Climates directory
data_dir = pathlib.Path(data_dir)

In [None]:
batch_size = 32
img_height = 180
img_width = 180

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = keras.Sequential(
  [
    layers.RandomFlip("horizontal",
                      input_shape=(img_height,
                                  img_width,
                                  3)),
    layers.RandomFlip("vertical",
                     input_shape=(img_height,
                                 img_width,
                                 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
  ]
)
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
  for i in range(9):
    augmented_images = data_augmentation(images)
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(augmented_images[0].numpy().astype("uint8"))
    plt.axis("off")

In [None]:
model = Sequential([
  data_augmentation,
  layers.Rescaling(1./255),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Dropout(0.2),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
epochs = 16
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

In [None]:
import os
from sklearn.metrics import confusion_matrix
path = "" # update this with Cliamtes directory location

y_actu = []
y_pred = []

arr = os.listdir(path)

for i in arr:
    newPath = path + "/" + i
    for j in os.listdir(newPath):
        cfa_image = newPath + "/" + j
        img = tf.keras.utils.load_img(
            cfa_image, target_size=(img_height, img_width)
            )
        img_array = tf.keras.utils.img_to_array(img)
        img_array = tf.expand_dims(img_array, 0)

        predictions = model.predict(img_array)
        score = tf.nn.softmax(predictions[0])
        print(i)
        y_actu.append(i)
        y_pred.append(class_names[np.argmax(score)])
        
confArr = confusion_matrix(y_actu, y_pred)

In [None]:
import matplotlib.pyplot as plt
import numpy

def plot_confusion_matrix(df_confusion, cmap=plt.cm.plasma):
    plt.matshow(df_confusion, cmap=cmap)
    plt.colorbar()
    tick_marks = np.arange(len(df_confusion.columns))
    plt.xticks(tick_marks, df_confusion.columns)
    plt.yticks(tick_marks, df_confusion.index)
    plt.ylabel(df_confusion.index.name)
    plt.xlabel(df_confusion.columns.name)

row_sums = confArr.sum(axis=1)
new_matrix = confArr / row_sums[:, numpy.newaxis]
plot_confusion_matrix(new_matrix)