In [1]:
import exifread
import os
import datetime
from rtree import index
import pyproj
import csv

In [2]:
#converGPStoNYLISP
#converts GPS DMS coordinates to coordinates
#in the New York - Long Island State Plane Projection Coordinates
def convertGPStoNYLISPC (GPSLong,GPSLat):
    Degs,Mins,Secs = (0,1,2)
    Long = []
    Lat=[]
    #data Degs Mins and Secs are Ratios
    #convert them to floats
    Lat.append(float(GPSLat[Degs].num))
    Lat.append(float(GPSLat[Mins].num))
    Lat.append(float(GPSLat[Secs].num / GPSLat[Secs].den))
    
    Long.append(float(GPSLong[Degs].num))
    Long.append(float(GPSLong[Mins].num))
    Long.append(float(GPSLong[Secs].num / GPSLong[Secs].den))
    
    degLat  = Lat[Degs] + (60*Lat[Mins] + Lat[Secs]) / (60*60)
    degLong = Long[Degs] + (60*Long[Mins] + Long[Secs]) / (60*60)
    
    #convert to New York - Long Island State Plane Cordinates (ESRI:102718)
    projectstr = "+proj=lcc +lat_1=40.66666666666666 +lat_2=41.03333333333333 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs"
    p = pyproj.Proj(projectstr,preserve_units=True)
    
    xcoord,ycoord = p(-degLong,degLat)  
    return (xcoord,ycoord)

In [3]:
#create rtree index for project list
#PRE: project_keys is a list of (Borough,Block,Lot) pairs
#Borough, Block and Lot are strings

#Post: Returns an rtree index for project_lst
#

def createProjectIndex(keys_coords,radius=10):
    
    proj_index = index.Rtree()
    
    idx=0
    for key_coord in keys_coords:
        
        key = key_coord[0]
        xcoord,ycoord = key_coord[1]
        
        #find bounding box around lot
        left,right = xcoord - radius, xcoord + radius 
        bottom,top = ycoord - radius, ycoord + radius
        
        #insert key in the rtree index
        #print("inserting idx=" + str(idx) + " left= " + str(left) + " bottom = " + str(bottom) +" right= " + str(right) + "top= " + str(top))
        proj_index.insert(idx,(left,bottom,right,top),obj=(key,(xcoord,ycoord)))
        
        idx+=1
    
    return proj_index
    


def getProj_Keys_n_Coords(project_keys):
    plutofiles = ['BK.csv',  'BX.csv',  'MN.csv',  'QN.csv',  'SI.csv']
    
    keys_coords = []
    idx =0
    for file in plutofiles:
        with open(file,'r') as borough_file:
            borough_reader = csv.DictReader(borough_file)
            for lot in borough_reader:
                key = (lot['Borough'],lot['Block'],lot['Lot'])
                if key in project_keys:
                    #get lot coordinates
                    xcoord = int(lot['XCoord'])
                    ycoord = int(lot['YCoord'])
                    
                    #add project keys and coordinates
                    keys_coords.append((key,(xcoord,ycoord)))
                    
                    #remove index from the list
                    project_keys.remove(key)
                    if len(project_keys) == 0:
                        return keys_coords
                    
                    #increment index
                    idx+=1
                    
    #WARNING if this stage is reached there must beARE's  a wrong entry
    #in the project key or the pluto let the user know
    print("the following projects were not found:")
    for pr in project_keys:
        print(pr)
        
    return keys_coords

In [36]:
#insert images in the project folder
#PRE thre is a folder named 'proj_index' that
#with all the projcets
def movePicturesToFolders(proj_index,file_list,imgDict = {}):
    
    for image in file_list:
        try:
            #try extrating the exif info
            with open(image,'rb') as f:
                #read the exif tags
                tags = exifread.process_file(f)
                xcoord,ycoord = convertGPStoNYLISPC (tags['GPS GPSLongitude'].values,\
                                                    tags['GPS GPSLatitude'].values)
                
                #filterout pictures that are too far
                if xcoord < 905245 or xcoord > 1057756 or\
                   ycoord < 120018 or ycoord > 287281:
                        #put in the too far category
                        directory = "outside_nyc"
                        imgDict[directory] = imgDict.get(directory,[]) + [image]
                        file_list.remove(image)
                        continue
                
                #find collitions
                intersections = [lot.object for lot in proj_index.intersection((xcoord,ycoord,xcoord,ycoord),objects=True)]
                if len(intersections) != 1:
                    if len(intersections) > 1:
                        #if multiple candidates put it in the folder of the colsest property
                        mindist=1000000000000000000000000
                        minkey=0
                        for obj in intersections:
                            #compute distance between img location and project location 
                            #obj[1] is (lot xcoord,lot ycoord)
                            dist = distSqrd((xcoord,ycoord),obj[1])
                            if  dist < mindist:
                                mindist = dist
                                minkey = obj[0]
                    
                        #move to special folder for images that must be moved manually
                        #os.rename(image,os.path.join('insert_manually',image))
                        directory = minkey[0] + "_" +\
                                    minkey[1] + "_" +\
                                    minkey[2]len(img_dict['outside_nyc'])
                    
                        #make directory to move the images
                        #if not os.path.exists(directory):
                        #       os.mkdir(directory)
                        #put image in image dictionary
                        imgDict[directory] = imgDict.get(directory,[]) + [image]
                        file_list.remove(image)
                    
                    else:
                        pass
                        #print("could not find xcoord: " + str(xcoord) + " ycoord: " + str(ycoord))

                else:
                    #intersections is a list [((Borough,Block,Lot),(xcoord,ycoord)),.....]
                    directory = intersections[0][0][0] + "_" +\
                                intersections[0][0][1] + "_" +\
                                intersections[0][0][2]
                    #put image in the dictionary
                    imgDict[directory] = imgDict.get(directory,[]) + [image]
                    file_list.remove(image)
                    
        except Exception as e:
            print(e)
            print("EXIF could not be extracted from " + image)
            file_list.remove(image)
            
    
    return imgDict, file_list

#computes the euclidian distance squared between to points
def distSqrd(p1,p2):
    return sum([(i[0]-i[1])**2 for i in zip(p1,p2)])

In [5]:
#read projcets from file
#assume file project is named proj_file and is in csv format
#proj_file most be a csv file with required fields Borough, Block, Lot
def readProjectKeys():
    keys = []
    with open('proj_file','r') as proj_file:
        reader = csv.DictReader(proj_file)
        for project in reader:
            #create keys
            key = (project['Borough'],project['Block'],project['Lot'])
            keys.append(key)
    
    return keys

In [None]:
#pluto dataset
#keys of interest: Borough,Block,Lot,Address,XCoord,YCoord
#XCoord and YCoord

#image EXIF
#keys of interest: 

#os.rename(image,os.path.join(directory,image))

In [6]:
mykeys = readProjectKeys()
mykeys
projects = getProj_Keys_n_Coords(mykeys)


In [37]:
img_dict={}
file_lst= os.listdir()
radius=200
myindx = createProjectIndex(projects,radius)
img_dict,file_lst = movePicturesToFolders(myindx,file_lst)
myindx.close()

'GPS GPSLongitude'
EXIF could not be extracted from MN.csv
'GPS GPSLongitude'
EXIF could not be extracted from proj_index.dat
[Errno 21] Is a directory: 'QN_6314_1'
EXIF could not be extracted from QN_6314_1
'GPS GPSLongitude'
EXIF could not be extracted from proj_file
'GPS GPSLongitude'
EXIF could not be extracted from IMG_1795.JPG
[Errno 21] Is a directory: 'insert_manually'
EXIF could not be extracted from insert_manually
[Errno 21] Is a directory: '.git'
EXIF could not be extracted from .git
'GPS GPSLongitude'
EXIF could not be extracted from .gitignore
[Errno 21] Is a directory: 'QN_148_20'
EXIF could not be extracted from QN_148_20
'GPS GPSLongitude'
EXIF could not be extracted from IMG_1804.JPG
'GPS GPSLongitude'
EXIF could not be extracted from ImgsByBlockLot.ipynb
[Errno 21] Is a directory: '.ipynb_checkpoints'
EXIF could not be extracted from .ipynb_checkpoints
'GPS GPSLongitude'
EXIF could not be extracted from SI.csv
[Errno 21] Is a directory: 'QN_1452_44'
EXIF could not be

In [35]:
#for testing it may be helpful
#to look at what happens when the pictures

img_dict={}
file_lst= os.listdir()
myindx = createProjectIndex(projects)
img_dict,file_lst = movePicturesToFolders(myindx,file_lst)
myindx.close()

#refine the search interatively
delta = 10
radius=20
for i in range(100):
    print("Iterating with radius=" + str(radius))
    myindx = createProjectIndex(projects,radius)
    img_dict,file_lst = movePicturesToFolders(myindx,file_lst,img_dict)
    radius += delta
    print("Remaining pictures: " + str(len(file_lst)))
    myindx.close()

'GPS GPSLongitude'
EXIF could not be extracted from MN.csv
'GPS GPSLongitude'
EXIF could not be extracted from proj_index.dat
[Errno 21] Is a directory: 'QN_6314_1'
EXIF could not be extracted from QN_6314_1
'GPS GPSLongitude'
EXIF could not be extracted from proj_file
'GPS GPSLongitude'
EXIF could not be extracted from IMG_1795.JPG
[Errno 21] Is a directory: 'insert_manually'
EXIF could not be extracted from insert_manually
[Errno 21] Is a directory: '.git'
EXIF could not be extracted from .git
'GPS GPSLongitude'
EXIF could not be extracted from .gitignore
'GPS GPSLongitude'
EXIF could not be extracted from QN.csv
[Errno 21] Is a directory: 'QN_148_20'
EXIF could not be extracted from QN_148_20
'GPS GPSLongitude'
EXIF could not be extracted from IMG_1804.JPG
'GPS GPSLongitude'
EXIF could not be extracted from ImgsByBlockLot.ipynb
[Errno 21] Is a directory: '.ipynb_checkpoints'
EXIF could not be extracted from .ipynb_checkpoints
'GPS GPSLongitude'
EXIF could not be extracted from SI.cs

110

In [41]:
file_lst

['IMG_1638.JPG',
 'IMG_1536.JPG',
 'IMG_1963.JPG',
 'IMG_1434.JPG',
 'IMG_1600.JPG',
 'IMG_1490.JPG',
 'IMG_1529.JPG',
 'IMG_1598.JPG',
 'IMG_1444.JPG',
 'IMG_1676.JPG',
 'IMG_1548.JPG',
 'IMG_1516.JPG',
 'IMG_1567.JPG',
 'IMG_1483.JPG',
 'IMG_1542.JPG',
 'IMG_1452.JPG',
 'IMG_1472.JPG',
 'IMG_1778.JPG',
 'IMG_1655.JPG',
 'IMG_1625.JPG',
 'IMG_1513.JPG',
 'IMG_1474.JPG',
 'IMG_1605.JPG',
 'IMG_1906.JPG',
 'IMG_1711.JPG',
 'IMG_1586.JPG',
 'IMG_1515.JPG',
 'IMG_1646.JPG',
 'IMG_1786.JPG',
 'IMG_1591.JPG',
 'IMG_1458.JPG',
 'IMG_1766.JPG',
 'IMG_1611.JPG',
 'IMG_1716.JPG',
 'IMG_1714.JPG',
 'IMG_1546.JPG',
 'IMG_1858.JPG',
 'IMG_1862.JPG',
 'IMG_1752.JPG',
 'IMG_1837.JPG',
 'IMG_1774.JPG',
 'IMG_1855.JPG',
 'IMG_1447.JPG',
 'IMG_1432.JPG',
 'IMG_1519.JPG',
 'IMG_1448.JPG',
 'IMG_1553.JPG',
 'IMG_1685.JPG',
 'IMG_1654.JPG',
 'IMG_1832.JPG',
 'IMG_1726.JPG',
 'IMG_1721.JPG',
 'IMG_1450.JPG',
 'IMG_1585.JPG',
 'IMG_1410.JPG',
 'IMG_1885.JPG',
 'IMG_1510.JPG',
 'IMG_1441.JPG',
 'IMG_1728.JPG