In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
from sklearn.model_selection import train_test_split

In [2]:
positive_dir = Path("./dataset/Positive")
negative_dir = Path("./dataset/Negative")

## Generate our dataframe 

####  getting path for each of our image

In [3]:
import os

list(map(lambda x: os.path.split(x), list(positive_dir.glob(r'*.jpg'))))[:5]


[('dataset/Positive', '14257_1.jpg'),
 ('dataset/Positive', '14993_1.jpg'),
 ('dataset/Positive', '17075_1.jpg'),
 ('dataset/Positive', '14629_1.jpg'),
 ('dataset/Positive', '13888_1.jpg')]

#### function that will create our dataframe

In [4]:
def generate_df(image_dir, label):
    filepaths = pd.Series(list(image_dir.glob(r'*.jpg')), name='Filepath').astype(str)
    labels = pd.Series(label, name='Label', index=filepaths.index)
    df = pd.concat([filepaths, labels], axis=1)
    return df
    

In [5]:
positive_df = generate_df(positive_dir, label="POSITIVE")
negative_df = generate_df(negative_dir, label="NEGATIVE")

print("Positive dataframe is like")
positive_df.head()



Positive dataframe is like


Unnamed: 0,Filepath,Label
0,dataset/Positive/14257_1.jpg,POSITIVE
1,dataset/Positive/14993_1.jpg,POSITIVE
2,dataset/Positive/17075_1.jpg,POSITIVE
3,dataset/Positive/14629_1.jpg,POSITIVE
4,dataset/Positive/13888_1.jpg,POSITIVE


In [6]:
print("Negative dataframe is like")
negative_df.head()

Negative dataframe is like


Unnamed: 0,Filepath,Label
0,dataset/Negative/11972.jpg,NEGATIVE
1,dataset/Negative/18917.jpg,NEGATIVE
2,dataset/Negative/11868.jpg,NEGATIVE
3,dataset/Negative/01101.jpg,NEGATIVE
4,dataset/Negative/08060.jpg,NEGATIVE


### concating our dataframe to have randomness


In [7]:
all_df = pd.concat([positive_df, negative_df], axis=0).sample(frac=1.0, random_state=1).reset_index(drop=True)

print("Our final dataframe is: ")
all_df

Our final dataframe is: 


Unnamed: 0,Filepath,Label
0,dataset/Positive/05967.jpg,POSITIVE
1,dataset/Positive/00475.jpg,POSITIVE
2,dataset/Positive/03754.jpg,POSITIVE
3,dataset/Negative/12901.jpg,NEGATIVE
4,dataset/Positive/18037_1.jpg,POSITIVE
...,...,...
39995,dataset/Positive/12955_1.jpg,POSITIVE
39996,dataset/Negative/12127.jpg,NEGATIVE
39997,dataset/Positive/17015_1.jpg,POSITIVE
39998,dataset/Positive/06034.jpg,POSITIVE


### spliting our dataset into test and train

In [8]:
train_df, test_df = train_test_split(
    all_df.sample(6000, random_state=1),
    train_size = 0.8,
    shuffle=True,
    random_state = 1
)


Unnamed: 0,Filepath,Label
69,dataset/Positive/17771_1.jpg,POSITIVE
39015,dataset/Negative/18716.jpg,NEGATIVE
12880,dataset/Positive/06058.jpg,POSITIVE
12875,dataset/Negative/00069.jpg,NEGATIVE
9754,dataset/Positive/17810_1.jpg,POSITIVE
...,...,...
2090,dataset/Positive/19274_1.jpg,POSITIVE
35101,dataset/Positive/15365_1.jpg,POSITIVE
8720,dataset/Positive/03388.jpg,POSITIVE
9955,dataset/Positive/04125.jpg,POSITIVE


In [9]:
print("Our train_dataframe looks like:")
train_df

Our train_dataframe looks like:


Unnamed: 0,Filepath,Label
69,dataset/Positive/17771_1.jpg,POSITIVE
39015,dataset/Negative/18716.jpg,NEGATIVE
12880,dataset/Positive/06058.jpg,POSITIVE
12875,dataset/Negative/00069.jpg,NEGATIVE
9754,dataset/Positive/17810_1.jpg,POSITIVE
...,...,...
2090,dataset/Positive/19274_1.jpg,POSITIVE
35101,dataset/Positive/15365_1.jpg,POSITIVE
8720,dataset/Positive/03388.jpg,POSITIVE
9955,dataset/Positive/04125.jpg,POSITIVE


In [10]:
print("Our test_dataframe looks like:")
test_df

Our test_dataframe looks like:


Unnamed: 0,Filepath,Label
15731,dataset/Negative/15108.jpg,NEGATIVE
34272,dataset/Negative/01251.jpg,NEGATIVE
39532,dataset/Positive/11450_1.jpg,POSITIVE
10818,dataset/Positive/06278.jpg,POSITIVE
35421,dataset/Negative/10905.jpg,NEGATIVE
...,...,...
2899,dataset/Positive/12709_1.jpg,POSITIVE
29588,dataset/Negative/03366.jpg,NEGATIVE
14103,dataset/Negative/14778.jpg,NEGATIVE
13976,dataset/Negative/05463.jpg,NEGATIVE
