<a href="https://colab.research.google.com/github/ojetokun/electricity_stats/blob/master/hamoye_neural_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
api_token = {"username":"lanreojetokun","key":"verySecret"}
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json
import json
with open('/root/.kaggle/kaggle.json', 'w') as file:
    json.dump(api_token, file)
!chmod 600 ~/.kaggle/kaggle.json

In [1]:
!kaggle datasets download -d nikitarom/planets-dataset

planets-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
!unzip planets-dataset.zip

In [2]:
import pandas as pd
import numpy as np
train_df = pd.read_csv("planet/planet/train_classes.csv")
split = train_df['tags'].map(lambda x: x.split(' '))
labels = list(set([y for x in split.values for y in x]))

In [3]:
import cv2
def load_data(df_train, labels, resize):
    X_train = []
    y_train = []

    label_map = {l: i for i, l in enumerate(labels)}
    inv_label_map = {i: l for l, i in label_map.items()}

    for f, tags in df_train.values:
        img = cv2.imread('planet/planet/train-jpg/{}.jpg'.format(f))
        targets = np.zeros(17)
        for t in tags.split(' '):
            targets[label_map[t]] = 1 

        X_train.append(cv2.resize(img,resize))
        y_train.append(targets)
        
    y_train = np.array(y_train, np.uint8)
    X_train = np.array(X_train, np.float16) / 255.

    return X_train, y_train

In [4]:

end = 20000 # used only 20,000 because of RAM
train_df = train_df[:end]
X, y = load_data(train_df, labels, resize=(128, 128))


In [5]:
#free RAM
del train_df,split

In [6]:
import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPool2D
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(128, 128, 3)),
    MaxPool2D(pool_size=(2, 2)),


    
    Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(0.2),


    Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
    
    MaxPool2D(pool_size=(2, 2)),
    Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
    MaxPool2D(pool_size=(2, 2)),
    Dropout(0.2),
    

    Flatten(),

    Dense(100, activation='relu'),
    Dense(17, activation='sigmoid') 
])

model.compile(loss='binary_crossentropy', optimizer='adam')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 128, 128, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 128)       73856     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 128)       0

In [7]:
from sklearn.model_selection import train_test_split
import time
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state = 1)

In [8]:
# free RAM
del X,y


In [9]:
model_fit = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=10,
    verbose=1,
    validation_data=(X_val, y_val)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [37]:
import os
all_answers =[]
file_names = []
batch_size = 4000

def predict_test(batch,dir):
    batch_img = []
    
    for file in batch:
      
      img = cv2.imread('{}{}'.format(dir,file))
      img = cv2.resize(img,(128,128))
      img = np.array(img, np.float16) / 255
      batch_img.append(img)
      file_names.append([file[:-4]]) # -4 removes the .jpg from filename
    batch_img = np.array(batch_img)
    
    
    batch_ans = np.rint(model.predict(batch_img))
    batch_ans = batch_ans.astype(np.int32)
    
    
    for each in batch_ans:
      ans_string = ""
      for index,answer in enumerate(each):  
        if answer==1:
          ans_string += str(labels[index])+" "
      all_answers.append([ans_string])


all_test_files = os.listdir("planet/planet/test-jpg")
num_batches = len(all_test_files)//batch_size # batch size 32
i = 0
for all in range(num_batches):
  i+=1
  start = batch_size * all
  end = start + batch_size
  batch = all_test_files[start:end]
  predict_test(batch,"planet/planet/test-jpg/")
  print(str(i)+" / "+ str(num_batches))


remainder = len(all_test_files)% batch_size
if remainder != 0:
  start = len(all_test_files) - remainder
  rest = all_test_files[start:]
  predict_test(rest,"planet/planet/test-jpg/")

assert len(file_names)==len(all_answers)==len(all_test_files)



1 / 10
2 / 10
3 / 10
4 / 10
5 / 10
6 / 10
7 / 10
8 / 10
9 / 10
10 / 10


In [38]:


all_test_files = os.listdir("test-jpg-additional/test-jpg-additional")
num_batches = len(all_test_files)//batch_size 
i = 0
for all in range(num_batches):
  i+=1
  start = batch_size * all
  end = start + batch_size
  batch = all_test_files[start:end]
  predict_test(batch,"test-jpg-additional/test-jpg-additional/")
  print(str(i)+" / "+ str(num_batches))


remainder = len(all_test_files)% batch_size
if remainder != 0:
  start = len(all_test_files) - remainder
  rest = all_test_files[start:]
  predict_test(rest,"test-jpg-additional/test-jpg-additional/")

assert len(file_names)==len(all_answers)


1 / 5
2 / 5
3 / 5
4 / 5
5 / 5


In [31]:
len(all_answers)

61191

In [39]:
ans2 = np.array(all_answers)
file2 = np.array(file_names)
both = np.concatenate((file2,ans2),axis = 1)
df = pd.DataFrame(both)
df.columns = ["image_name","tags"]
df.to_csv("lanre_hamoye.csv")



from google.colab import files
files.download("lanre_hamoye.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>