# Loading the dataset

### *Purpose : Importing the required data to interpret equation & storing in a csv file*

In [1]:
import numpy as np
import cv2
from PIL import Image
from matplotlib import pyplot as plt
%matplotlib inline
import os
from os import listdir
from os.path import isfile, join
import pandas as pd

In [2]:
# this function will read images from dataset folders, threshold the image to separate the main part from the background, find 
# contours and draw a bounding rectangle on it. Further it is cropped, resized and reshaped to store into a list 
def load_images_from_folder(folder):
    train_data=[]
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename),cv2.IMREAD_GRAYSCALE)
        img=~img
        if img is not None:
            ret,thresh=cv2.threshold(img,127,255,cv2.THRESH_BINARY)
            ctrs=cv2.findContours(thresh,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
            ctrs = ctrs[0] if len(ctrs) == 2 else ctrs[1]
            cnt=sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr))
            w=int(28)
            h=int(28)
            maxi=0
            for c in cnt:
                x,y,w,h=cv2.boundingRect(c)
                maxi=max(w*h,maxi)
                if maxi==w*h:
                    x_max=x
                    y_max=y
                    w_max=w
                    h_max=h
            im_crop= thresh[y_max:y_max+h_max+10, x_max:x_max+w_max+10]
            im_resize = cv2.resize(im_crop,(28,28))
            im_resize=np.reshape(im_resize,(784,1))
            train_data.append(im_resize)
    return train_data

In [3]:
data = []

In [4]:
# '-' sign given the label 10 
data=load_images_from_folder('-')
for i in range(0,len(data)):
    data[i]=np.append(data[i],['10'])
    
print(len(data))

33997


In [7]:
# '+' given the label 11
data11=load_images_from_folder('+')
for i in range(0,len(data11)):
    data11[i]=np.append(data11[i],['11'])
    
print(len(data11))

25112


In [8]:
data0=load_images_from_folder('0')
for i in range(0,len(data0)):
    data0[i]=np.append(data0[i],['0'])
    
print(len(data0))

6914


In [9]:
data1=load_images_from_folder('1')
for i in range(0,len(data1)):
    data1[i]=np.append(data1[i],['1'])
    
print(len(data1))

26520


In [10]:
data2=load_images_from_folder('2')
for i in range(0,len(data2)):
    data2[i]=np.append(data2[i],['2'])
    
print(len(data2))

26141


In [11]:
data3=load_images_from_folder('3')
for i in range(0,len(data3)):
    data3[i]=np.append(data3[i],['3'])
    
print(len(data3))

10909


In [12]:
data4=load_images_from_folder('4')
for i in range(0,len(data4)):
    data4[i]=np.append(data4[i],['4'])
    
print(len(data4))

7396


In [13]:
data5=load_images_from_folder('5')
for i in range(0,len(data5)):
    data5[i]=np.append(data5[i],['5'])
    
print(len(data5))

3545


In [14]:
data6=load_images_from_folder('6')
for i in range(0,len(data6)):
    data6[i]=np.append(data6[i],['6'])
    
print(len(data6))

3118


In [15]:
data7=load_images_from_folder('7')
for i in range(0,len(data7)):
    data7[i]=np.append(data7[i],['7'])
    
print(len(data7))

2909


In [16]:
data8=load_images_from_folder('8')
for i in range(0,len(data8)):
    data8[i]=np.append(data8[i],['8'])
    
print(len(data8))

3068


In [17]:
data9=load_images_from_folder('9')
for i in range(0,len(data9)):
    data9[i]=np.append(data9[i],['9'])
    
print(len(data9))

3737


In [18]:
# times refers to '*' sign, given label 12
data12=load_images_from_folder('times')
for i in range(0,len(data12)):
    data12[i]=np.append(data12[i],['12'])
    
print(len(data12))

3251


In [21]:
# div refers to '/' as per the dataset, labelled 13
data13=load_images_from_folder('div')
for i in range(0,len(data13)):
    data13[i]=np.append(data13[i],['13'])
    
print(len(data13))

868


In [22]:
# concatenating the data to a single list
data=np.concatenate((data,data11,data0,data1,data2,data3,data4,data5,data6,data7,data8,data9,data12,data13))

In [24]:
# converting the list to a dataframe to store the data to a csv file
df=pd.DataFrame(data,index=None)

In [25]:
df.to_csv('train_data.csv',index=False)