<a href="https://colab.research.google.com/github/arathee2/black-mamba/blob/master/Solar_PV_in_Aerial_Imagery_IDS_705_JUAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
import sklearn.metrics as metrics
from sklearn.model_selection import train_test_split

# Google Drive Connection
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Load Kaggle images

!ls "/content/drive/My Drive/kaggle" 

'''
Set directory parameters
'''
dir_train_images  = '/content/drive/My Drive/kaggle/training/'
dir_test_images   = '/content/drive/My Drive/kaggle/testing/'
dir_train_labels  = '/content/drive/My Drive/kaggle/labels_training.csv'
dir_test_ids      = '/content/drive/My Drive/kaggle/sample_submission.csv'

labels_training.csv    solar_classification.py	training
sample_submission.csv  testing


In [0]:
def load_data(dir_data, dir_labels, training=True):
    ''' Load each of the image files into memory 

    While this is feasible with a smaller dataset, for larger datasets,
    not all the images would be able to be loaded into memory

    When training=True, the labels are also loaded
    '''
    labels_pd = pd.read_csv(dir_labels)
    ids       = labels_pd.id.values
    data      = []
    for identifier in ids:
        fname     = dir_data + identifier.astype(str) + '.tif'
        image     = mpl.image.imread(fname)
        data.append(image)
    data = np.array(data) # Convert to Numpy array
    if training:
        labels = labels_pd.label.values
        return data, labels
    else:
        return data, ids

In [0]:
# Split available images in training and validation
images, labels = load_data(dir_train_images, dir_train_labels, training=True)

training_images, validation_images, training_labels, validation_labels = train_test_split(images, labels, test_size=0.3)
test_images, test_ids = load_data(dir_test_images, dir_test_ids, training=False)

In [103]:
print(training_images.shape)
print(training_labels.shape)
print(validation_images.shape)
print(validation_labels.shape)
print(test_images.shape)
print(test_labels.shape)

(1050, 101, 101, 3)
(1050,)
(450, 101, 101, 3)
(450,)
(558, 101, 101, 3)
(558,)


In [0]:
training_images=training_images / 255.0
validation_images=validation_images / 255.0
test_images=test_images/255.0

In [0]:
# CNN

model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(101, 101, 3)), # 64 convolutions or 'filters' each filter is 3x3
  tf.keras.layers.MaxPooling2D(2, 2), # Pooling with 2x2 matrix
  tf.keras.layers.Conv2D(64, (3,3), activation='relu'), # 64 convolutions or 'filters' each filter is 3x3
  tf.keras.layers.MaxPooling2D(2,2),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(2, activation='softmax')
])

In [108]:
# Training

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()
model.fit(training_images, training_labels, epochs=15)
validation_loss = model.evaluate(validation_images, validation_labels)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 99, 99, 64)        1792      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 49, 49, 64)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 47, 47, 64)        36928     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 23, 23, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 33856)             0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               4333696   
_________________________________________________________________
dense_3 (Dense)              (None, 2)                

In [0]:
# Classify test data

test_predictions = model.predict(test_images)

In [0]:
submission = pd.DataFrame({"id":test_labels, "score":test_predictions[:,1]})
submission

submission.to_csv("/content/drive/My Drive/kaggle/submission_02142020.csv", index = False)