In [168]:
from google_drive_downloader import GoogleDriveDownloader as gdd

gdd.download_file_from_google_drive(file_id='1ahtCipPXsX9BVRWi6nDG13tr4b_rMY97',
dest_path='content/detect_trees_near_electric_wires.zip',
unzip=True)

In [169]:
import pandas as pd 
import numpy as np 
import tensorflow as tf 
import os 
import cv2 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score

In [170]:
labels = pd.read_csv("/content/content/detect_trees_near_electric_wires/train.csv") 
labels.head() 

Unnamed: 0,file_name,Intersection
0,Image_1.jpg,0
1,Image_2.jpg,1
2,Image_3.jpg,1
3,Image_4.jpg,1
4,Image_5.jpg,1


In [171]:
labels.tail()

Unnamed: 0,file_name,Intersection
1437,Image_1438.jpg,1
1438,Image_1439.jpg,0
1439,Image_1440.jpg,1
1440,Image_1441.jpg,0
1441,Image_1442.jpg,1


In [172]:
file_paths = [[fname, '/content/content/detect_trees_near_electric_wires/train/' + fname] for fname in labels['file_name']]

In [173]:
# Confirm if number of images is same as number of labels given
if len(labels) == len(file_paths):
   print('Number of labels i.e. ', len(labels), 'matches the number of filenames i.e. ', len(file_paths))
else:
   print('Number of labels does not match the number of filenames')

Number of labels i.e.  1442 matches the number of filenames i.e.  1442


In [174]:
images = pd.DataFrame(file_paths, columns=['file_name', 'filepaths'])
images.head()

Unnamed: 0,file_name,filepaths
0,Image_1.jpg,/content/content/detect_trees_near_electric_wi...
1,Image_2.jpg,/content/content/detect_trees_near_electric_wi...
2,Image_3.jpg,/content/content/detect_trees_near_electric_wi...
3,Image_4.jpg,/content/content/detect_trees_near_electric_wi...
4,Image_5.jpg,/content/content/detect_trees_near_electric_wi...


In [175]:
train_data = pd.merge(images, labels, how = 'inner', on = 'file_name')
train_data.head()

Unnamed: 0,file_name,filepaths,Intersection
0,Image_1.jpg,/content/content/detect_trees_near_electric_wi...,0
1,Image_2.jpg,/content/content/detect_trees_near_electric_wi...,1
2,Image_3.jpg,/content/content/detect_trees_near_electric_wi...,1
3,Image_4.jpg,/content/content/detect_trees_near_electric_wi...,1
4,Image_5.jpg,/content/content/detect_trees_near_electric_wi...,1


In [176]:
data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(train_data)):

    img_array = cv2.imread(train_data['filepaths'][i], cv2.IMREAD_GRAYSCALE) # converting the image to gray scale

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # resizing the image array
    data.append([new_img_array, train_data['Intersection'][i]])

In [177]:
data[0]

[array([[198, 197, 196, ...,  51, 236, 253],
        [198, 198, 199, ..., 212, 193, 154],
        [199, 199, 200, ..., 102, 108, 113],
        ...,
        [136, 190,  55, ..., 105, 117, 115],
        [137, 248,  94, ...,  28,  67,  85],
        [146, 143, 128, ...,  37,  40,  35]], dtype=uint8), 0]

In [178]:
np.random.shuffle(data)

In [179]:
x = []
y = []
for image in data:
    x.append(image[0])
    y.append(image[1])

# converting x & y to numpy array as they are list
x = np.array(x)
y = np.array(y)

In [180]:
np.unique(y, return_counts=True)

(array([0, 1]), array([616, 826]))

In [181]:
# split the data
X_train, X_val, y_train, y_val = train_test_split(x,y,test_size=0.3, random_state = 42)

In [185]:
# Defining the model
model = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(100, 100)), # flattening the image
tf.keras.layers.Dense(100, activation='relu'),
tf.keras.layers.Dense(90, activation='relu'),
tf.keras.layers.Dense(80, activation='relu'),
tf.keras.layers.Dense(70, activation='relu'),
tf.keras.layers.Dense(60, activation='relu'),
tf.keras.layers.Dense(50, activation='relu'),
tf.keras.layers.Dense(40, activation='relu'),
tf.keras.layers.Dense(30, activation='relu'),
tf.keras.layers.Dense(20, activation='relu'),
tf.keras.layers.Dense(10, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])

model.fit(X_train, y_train, epochs=20, batch_size=10)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fa14d5181d0>

In [186]:
model.evaluate(X_val, y_val)



[0.7345910668373108, 0.5981523990631104]

In [187]:
# Loading the order of the image's name that has been provided
test_image_order = pd.read_csv("/content/content/detect_trees_near_electric_wires/Test.csv")
test_image_order.head()

Unnamed: 0,file_name
0,Image_1.jpg
1,Image_2.jpg
2,Image_3.jpg
3,Image_4.jpg
4,Image_5.jpg


In [188]:
file_paths = [[fname, '/content/content/detect_trees_near_electric_wires/test/' + fname] for fname in test_image_order['file_name']]

In [189]:
# Confirm if number of images is same as number of labels given
if len(test_image_order) == len(file_paths):
   print('Number of image names i.e. ', len(test_image_order), 'matches the number of file paths i.e. ', len(file_paths))
else:
   print('Number of image names does not match the number of filepaths')

Number of image names i.e.  723 matches the number of file paths i.e.  723


In [190]:
test_images = pd.DataFrame(file_paths, columns=['file_name', 'filepaths'])
test_images.head()

Unnamed: 0,file_name,filepaths
0,Image_1.jpg,/content/content/detect_trees_near_electric_wi...
1,Image_2.jpg,/content/content/detect_trees_near_electric_wi...
2,Image_3.jpg,/content/content/detect_trees_near_electric_wi...
3,Image_4.jpg,/content/content/detect_trees_near_electric_wi...
4,Image_5.jpg,/content/content/detect_trees_near_electric_wi...


In [191]:
test_pixel_data = [] # initialize an empty numpy array
image_size = 100 # image size taken is 100 here. one can take other size too
for i in range(len(test_images)):

    img_array = cv2.imread(test_images['filepaths'][i], cv2.IMREAD_GRAYSCALE) # converting the image to gray scale

    new_img_array = cv2.resize(img_array, (image_size, image_size)) # resizing the image array

    test_pixel_data.append(new_img_array)

In [192]:
test_pixel_data = np.array(test_pixel_data)

In [193]:
pred = model.predict(test_pixel_data)

In [194]:
# The predicted values are the probabilities value
pred[0]

array([0.7731006], dtype=float32)

In [195]:
prediction = []
for value in pred:
    if value < 0.5:
       prediction.append(0)
    else:
       prediction.append(1)

In [196]:
prediction[0]

1

In [197]:
res = pd.DataFrame({'file_name': test_images['file_name'], 'prediction': prediction}) # prediction is nothing but the final predictions of your model on input features of your new unseen test data
res.to_csv("submission.csv", index = False)

# To download the csv file locally
from google.colab import files
files.download('submission.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>