# Data Cleansing


In [437]:
MIKE_ORIGINALS = r'C:\DS Project\Images\Mike\Original'
#MIKE_ORIGINALS = r'C:\DS Project\Images\Mike\test'
NEIL_ORIGINALS = r'C:\DS Project\Images\Neil\Original'
MANUALLY_IDENTIFIED_TRACTORS =  r'C:\DS Project\Images\Tractors'

SET_SECONDS = 2

## Imports

In [438]:
import psycopg2
import db_config as creds
import os
import time
import datetime
from stat import * 

In [439]:
class theImg:
  def __init__(self, filename, createdDate,cameraName,cameraId,folderLocation):
    self.createdDate = createdDate
    self.filename = filename
    self.cameraName = cameraName
    self.cameraId = cameraId
    self.folderLocation = folderLocation
        
    # see if the image has previously been identifed as a tractor
    self.MWIdentified = False
    for subdir, dirs, files in os.walk(MANUALLY_IDENTIFIED_TRACTORS):
        for tractorFilename in files:
            if tractorFilename == filename:
                self.MWIdentified = True
    

In [440]:
def processImages(imgLocation, cameraName, cameraId):
    imgSet = 1
    imagesInSet = []

    # loop round each image in the original folder
    for subdir, dirs, files in os.walk(imgLocation):
        
        for filename in files:
        
            filepath = subdir + os.sep + filename
            stat = os.stat(filepath)
        
            # create new image object with attributes
            createdDate = datetime.datetime.fromtimestamp(stat[ST_MTIME])
               
            newImg = theImg(filename,createdDate,cameraName,cameraId,subdir + os.sep)
        
            # check if image is > SET_SECONDS seconds offset the last image
            if (len(imagesInSet) > 0) and ( (createdDate-imagesInSet[len(imagesInSet)-1].createdDate).total_seconds() > SET_SECONDS ):
            
                # insert each of the images in the list into the database
                addSetToDB(imagesInSet)
                
                # increment the set counter
                imgSet+=1
            
                # clear out the list
                imagesInSet.clear()
        
            #set the image set 
            newImg.imgSet = imgSet    
            
            # add the image to the list
            imagesInSet.append(newImg)
        
    # add the last set to the data base     
    addSetToDB(imagesInSet) 

## Database functions

In [441]:
def connect():
    
    # Set up a connection to the postgres server.
    conn_string = "host="+ creds.PGHOST +" port="+ "5432" +" dbname="+ creds.PGDATABASE +" user=" + creds.PGUSER \
                  +" password="+ creds.PGPASSWORD
         
    conn = psycopg2.connect(conn_string)

    # Create a cursor object
    cursor = conn.cursor()
  
    print ("connected!")
    return conn, cursor

def disconnect(conn,cursor):
    
    cursor.close()
    conn.close()
    print ("disconnected!")
    
def addSetToDB(imgSet):
    
    sql = """INSERT INTO images ("fileName", "cameraId", "cameraName", "dateTaken", "setId", "MW_ManuallyIdentifiedTractor", "folderLocation") VALUES (%s, %s, %s, %s, %s, %s, %s)"""
        
    # Connecting to DB
    conn, cursor = connect()
    
    for img in imgSet:

        #insert into db
        cursor.execute(sql, (img.filename,img.cameraId,img.cameraName, img.createdDate, img.imgSet, img.MWIdentified, img.folderLocation,))
        conn.commit()
        
        print ("Inserted..",img.filename,img.cameraId,img.cameraName, img.createdDate, img.imgSet, img.MWIdentified, img.folderLocation)
        
    # Disconnect from DB
    disconnect(conn, cursor)

# Process Original Images from Mike and Neil

In [442]:
processImages(MIKE_ORIGINALS, "Mike", 1) 
#processImages(NEIL_ORIGINALS, "Neil", 2) 

connected!
Inserted.. 01Mk0005.JPG 1 Mike 2020-03-11 07:10:52 1 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0006.JPG 1 Mike 2020-03-11 07:10:54 1 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0007.JPG 1 Mike 2020-03-11 07:10:56 1 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0008.JPG 1 Mike 2020-03-11 07:10:58 1 False C:\DS Project\Images\Mike\test\
disconnected!
connected!
Inserted.. 01Mk0009.JPG 1 Mike 2020-03-11 07:11:02 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0010.JPG 1 Mike 2020-03-11 07:11:02 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0011.JPG 1 Mike 2020-03-11 07:11:02 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0012.JPG 1 Mike 2020-03-11 07:11:02 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0013.JPG 1 Mike 2020-03-11 07:11:04 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0014.JPG 1 Mike 2020-03-11 07:11:04 2 False C:\DS Project\Images\Mike\test\
Inserted.. 01Mk0015.JPG 1 Mike 2020-03-11 07:11:04 2 F