In [1]:
import pandas as pd
import os, re
import numpy as np
import cv2
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_theme(style = "dark", 
        color_codes = True,
        font_scale = 1.5)

In [2]:
def load_partition_folder(partition="train"):
    partition_folder = os.path.join(os.getcwd(),'image_csv',partition)
    

    image_indexes,images,labels = [],[],[]

    for image_fname in os.listdir(partition_folder):
        image_re = re.match(r'([0-9]+)_([0-9]+)\.csv',image_fname)
        image_indexes.append(image_re.group(1))
        labels.append(image_re.group(2))
        images.append(np.loadtxt(os.path.join(partition_folder,image_fname),delimiter=',',dtype=np.float32))

    image_df = pd.DataFrame({'img_index':image_indexes,'image':images,'label':labels})

    image_df["img_index"] = image_df["img_index"].astype(int)
    image_df["label"] = image_df["label"].astype(int)

    return image_df.sort_values(by="img_index").reset_index(drop=True).drop(['img_index'],axis=1)

In [3]:
train_df = load_partition_folder()
train_df.head()

Unnamed: 0,image,label
0,"[[3.0, 3.0, 3.0, 4.0, 3.0, 3.0, 4.0, 4.0, 4.0,...",3
1,"[[2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 1.0,...",0
2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",2
3,"[[1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0,...",1
4,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",0


In [4]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10618 entries, 0 to 10617
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   image   10618 non-null  object
 1   label   10618 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 166.0+ KB


In [5]:
test_df = load_partition_folder(partition='test')
test_df.head()

Unnamed: 0,image,label
0,"[[2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",0
1,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",0
2,"[[1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,...",0
3,"[[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",0
4,"[[1.0, 2.0, 2.0, 2.0, 1.0, 1.0, 1.0, 1.0, 1.0,...",0


In [6]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1405 entries, 0 to 1404
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   image   1405 non-null   object
 1   label   1405 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 22.1+ KB


In [7]:
X_train = train_df['image']
y_train = train_df['label']

X_test = test_df['image']
y_test = test_df['label']

In [8]:
X_train.shape

(10618,)

In [9]:
y_train.shape

(10618,)

In [10]:
X_test.shape

(1405,)

In [11]:
y_test.shape

(1405,)

In [12]:
def convert_image_to_rgb(image):
    return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
 
X_train=X_train.apply(convert_image_to_rgb)
X_test=X_test.apply(convert_image_to_rgb)

In [13]:
X_train

0        [[[3.0, 3.0, 3.0], [3.0, 3.0, 3.0], [3.0, 3.0,...
1        [[[2.0, 2.0, 2.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
2        [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
3        [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
4        [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
                               ...                        
10613    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
10614    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
10615    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
10616    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
10617    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
Name: image, Length: 10618, dtype: object

In [14]:
X_test

0       [[[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [1.0, 1.0,...
1       [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
2       [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
3       [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
4       [[[1.0, 1.0, 1.0], [2.0, 2.0, 2.0], [2.0, 2.0,...
                              ...                        
1400    [[[0.0, 0.0, 0.0], [1.0, 1.0, 1.0], [0.0, 0.0,...
1401    [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0,...
1402    [[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0,...
1403    [[[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [2.0, 2.0,...
1404    [[[2.0, 2.0, 2.0], [2.0, 2.0, 2.0], [3.0, 3.0,...
Name: image, Length: 1405, dtype: object