<a href="https://colab.research.google.com/github/YousefMohammad/clean_hydro_extraction/blob/main/hydrogen_project_cnn_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

> # Setups and Imports

> ## Setup kaggle

In [None]:
!pip install -q kaggle

In [None]:
from google.colab import files
files.upload()

Saving kaggle.json to kaggle.json


{'kaggle.json': b'{"username":"yousefmohammad","key":"8244e35b2e05f3ff05a24822e4a6900f"}'}

In [None]:
!mkdir ~/.kaggle

In [None]:
!cp kaggle.json ~/.kaggle/

In [None]:
!chmod 600 ~/.kaggle/kaggle.json

> ## Downloading Waste Pictures Dataset

In [1]:
!pip install gdown



In [2]:
!gdown --id 180XbhhXRFfei0NGreNO8pmAdBprNqEqw -O waste-pictures.zip

Downloading...
From (original): https://drive.google.com/uc?id=180XbhhXRFfei0NGreNO8pmAdBprNqEqw
From (redirected): https://drive.google.com/uc?id=180XbhhXRFfei0NGreNO8pmAdBprNqEqw&confirm=t&uuid=f84b583f-44f6-4517-aac8-7862ecb03723
To: /content/waste-pictures.zip
100% 2.20G/2.20G [00:31<00:00, 69.9MB/s]


In [3]:
!unzip waste-pictures.zip

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: train/plasticbag/plasticbag (186).jpg  
  inflating: train/plasticbag/plasticbag (187).jpg  
  inflating: train/plasticbag/plasticbag (188).jpg  
  inflating: train/plasticbag/plasticbag (189).jpg  
  inflating: train/plasticbag/plasticbag (19).jpg  
  inflating: train/plasticbag/plasticbag (190).jpg  
  inflating: train/plasticbag/plasticbag (191).jpg  
  inflating: train/plasticbag/plasticbag (192).jpg  
  inflating: train/plasticbag/plasticbag (193).jpg  
  inflating: train/plasticbag/plasticbag (194).jpg  
  inflating: train/plasticbag/plasticbag (195).jpg  
  inflating: train/plasticbag/plasticbag (196).jpg  
  inflating: train/plasticbag/plasticbag (197).jpg  
  inflating: train/plasticbag/plasticbag (198).jpg  
  inflating: train/plasticbag/plasticbag (199).jpg  
  inflating: train/plasticbag/plasticbag (2).jpg  
  inflating: train/plasticbag/plasticbag (2).png  
  inflating: train/plasticbag/plasticba

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!mkdir ./waste-pictures

In [6]:
!rm -r ./sample_data
!mv ./train ./waste-pictures/
!mv ./test ./waste-pictures/

In [7]:
!cp -r ./waste-pictures/ ./drive/MyDrive

> # Used Libraries

In [8]:
# General Liberaries
import os
import pandas as pd
import numpy as np

# Image Processing Liberaries
from PIL import Image

# Visualization Liberaries
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

# Machine learning Liberaries
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from skopt import BayesSearchCV
from sklearn.preprocessing import LabelEncoder

# Deep learning Liberaries
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.layers import Conv2D, BatchNormalization, MaxPool2D, Dropout, Flatten, Dense
from tensorflow.keras.preprocessing import image

> # Data Wrangling

In [10]:
main_path='waste-pictures'
os.listdir(main_path)

['test', 'train']

In [11]:
train_dir = os.path.join(main_path, 'train')
test_dir = os.path.join(main_path, 'test')

In [13]:
def count_file(dir=None, labels=None):
  for label in labels:
    num_data= len(os.listdir(os.path.join(dir, label)))
    print(f'{label} : {num_data}')

labels= os.listdir(train_dir) # train and test directories have the same cateogories
print('Train Set: \n' + '='*50)
count_file(train_dir,labels)

print('\nTest Set: \n' + '='*50)
count_file(test_dir,labels)

Train Set: 
glassbottle : 422
tabletcapsule : 701
plasticene : 755
cans : 596
bandaid : 292
plasticbag : 751
nut : 192
rag : 440
penholder : 882
nailpolishbottle : 986
thermometer : 700
leaflet : 683
bowlsanddishes : 610
pesticidebottle : 737
cigarettebutt : 54
milkbox : 128
traditionalChinesemedicine : 35
medicinebottle : 526
toothpastetube : 253
diapers : 567
carton : 173
napkin : 442
watermelonrind : 810
newspaper : 763
battery : 840
leftovers : 930
bulb : 681
facialmask : 601
XLight : 755
toothbrush : 362
plasticbottle : 184
bread : 732
toothpick : 98
chopsticks : 195

Test Set: 
glassbottle : 201
tabletcapsule : 206
plasticene : 219
cans : 209
bandaid : 120
plasticbag : 219
nut : 75
rag : 134
penholder : 210
nailpolishbottle : 327
thermometer : 208
leaflet : 198
bowlsanddishes : 205
pesticidebottle : 203
cigarettebutt : 44
milkbox : 79
traditionalChinesemedicine : 74
medicinebottle : 198
toothpastetube : 114
diapers : 211
carton : 134
napkin : 142
watermelonrind : 216
newspaper : 

In [14]:
def get_files(directory):
    file_sizes = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            file_sizes.append({'file': file_path})
    return file_sizes

In [15]:
get_files(train_dir)[:5]

[{'file': 'waste-pictures/train/glassbottle/glassbottle (223).jpg'},
 {'file': 'waste-pictures/train/glassbottle/glassbottle (313).jpg'},
 {'file': 'waste-pictures/train/glassbottle/glassbottle (329).jpg'},
 {'file': 'waste-pictures/train/glassbottle/glassbottle (380).jpg'},
 {'file': 'waste-pictures/train/glassbottle/glassbottle (440).jpg'}]

In [16]:
def create_dataframe(path=None, labels=None):
    data = []
    for label in labels:
        category_path = os.path.join(path, label)
        files = get_files(category_path)
        data.extend(files)
    df = pd.DataFrame(data)
    return df

In [17]:
train_df = create_dataframe(train_dir, labels)
test_df = create_dataframe(test_dir, labels)

In [18]:
print('shape of train_dir',train_df.shape)
print('shape of test_dir',test_df.shape)

shape of train_dir (17891, 1)
shape of test_dir (5756, 1)


In [19]:
train_df.head()

Unnamed: 0,file
0,waste-pictures/train/glassbottle/glassbottle (...
1,waste-pictures/train/glassbottle/glassbottle (...
2,waste-pictures/train/glassbottle/glassbottle (...
3,waste-pictures/train/glassbottle/glassbottle (...
4,waste-pictures/train/glassbottle/glassbottle (...


In [23]:
train_df.iloc[0]

Unnamed: 0,0
file,waste-pictures/train/glassbottle/glassbottle (...
label,glassbottle


In [25]:
def extract_label(file_path):
    for label in labels:
        if label in file_path:
            return label

dict_dir_df = {'train': train_df,
               'test': test_df}

In [26]:
for df in dict_dir_df.values():
    df['label'] = df['file'].apply(extract_label)

In [31]:
train_df.sample(3)

Unnamed: 0,file,label
13150,waste-pictures/train/battery/battery (638).jpg,battery
12579,waste-pictures/train/battery/battery (194).jpg,battery
3152,waste-pictures/train/plasticbag/plasticbag (24...,plasticbag


In [33]:
train_df.isnull().sum()

Unnamed: 0,0
file,0
label,0


In [34]:
test_df.isnull().sum()

Unnamed: 0,0
file,0
label,0


In [35]:
print('shape of train_dir',train_df.shape)
print('shape of test_dir',test_df.shape)

shape of train_dir (17891, 2)
shape of test_dir (5756, 2)


In [36]:
test_df.shape[0] / train_df.shape[0]

0.32172600748979935

> # Data Exploration

> # Data Pre-Processing

> # Data Augmentation

> # Building Model

> # Hyperparameter Tunning and Cross-Validation

> # Training Final Model

> # Model Evaluation

> # Post Processing