In [1]:
import tensorflow as tf

In [2]:
import numpy as np
import pandas as pd

### Import the dataset

In [3]:
dataset = pd.read_csv('train.csv')
X = dataset.iloc[:,1:].values
y = dataset.iloc[:,0].values

In [4]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
X = imputer.fit_transform(X)

### Split the dataset

In [5]:
from sklearn.model_selection import train_test_split
X_train_val, X_test,y_train_val,y_test = train_test_split(X,y,test_size=0.2,random_state = 0)

In [6]:
X_train, X_val, y_train, y_val = train_test_split(X_train_val,y_train_val,test_size=0.2,random_state=0)

### Feature Scaling

In [7]:
# Feature scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)
X_test = sc.transform(X_test)

In [8]:
X_train = X_train.reshape(-1,20,20,3)
X_val = X_val.reshape(-1,20,20,3)
X_test = X_test.reshape(-1,20,20,3)

In [9]:
X_train.shape

(3360, 20, 20, 3)

In [10]:
X_test.shape

(1050, 20, 20, 3)

In [11]:
X_val.shape

(840, 20, 20, 3)

### Build the CNN

In [12]:
cnn = tf.keras.models.Sequential()

In [13]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[20, 20, 3]))

In [14]:
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

In [15]:
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

In [16]:
cnn.add(tf.keras.layers.Flatten())

In [32]:
cnn.add(tf.keras.layers.Dense(units=363, activation='relu'))

In [33]:
cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

### Train 

In [34]:
cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [35]:
cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 18, 18, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 9, 9, 32)         0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 32)          9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 3, 3, 32)         0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 288)               0         
                                                                 
 dense (Dense)               (None, 434)               1

In [36]:
y_train

array([[0],
       [0],
       [1],
       ...,
       [0],
       [0],
       [0]], dtype=int64)

In [37]:
y_train.shape

(3360, 1)

In [38]:
y_train = y_train.reshape(3360,1)

In [39]:
y_test.shape

(1050, 1)

In [40]:
y_test = y_test.reshape(1050,1)

In [41]:
print(y_test)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [42]:
y_val.shape

(840, 1)

In [43]:
y_val = y_val.reshape(840,1)

In [44]:
from tensorflow.keras.callbacks import EarlyStopping
early_stopping = EarlyStopping(patience=3, monitor='val_loss', restore_best_weights=True)


In [45]:
history = cnn.fit(X_train, y_train, epochs=25, validation_data=(X_val, y_val), callbacks=[early_stopping])



Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25


In [46]:
test_loss, test_accuracy = cnn.evaluate(X_test, y_test)

print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)

Test Loss: 0.44302427768707275
Test Accuracy: 0.8304761648178101


In [47]:
Test_data = pd.read_csv('test.csv')
X_data = Test_data.iloc[:,1:].values

In [48]:
imputer1 = SimpleImputer(missing_values = np.nan, strategy = 'mean')
X_data = imputer1.fit_transform(X_data)
X_data = sc.transform(X_data)

In [49]:
X_data = X_data.reshape(-1,20,20,3)

In [50]:
y_pred = cnn.predict(X_data)



In [51]:
y_pred

array([[0.28315726],
       [0.22459006],
       [0.29123026],
       ...,
       [0.80876106],
       [0.22338219],
       [0.47208193]], dtype=float32)

In [52]:
y_pred = (y_pred>0.5)

In [53]:
y_pred

array([[False],
       [False],
       [False],
       ...,
       [ True],
       [False],
       [False]])

In [54]:
df = pd.DataFrame(y_pred)

In [55]:
df.to_csv('CNN_Output.csv')