# Convert a shapefile file to one png mask file per very record

This function is going to be used to create png mask files in order to use wordcloud library for data visualization

Import libraries

In [1]:
import shapefile #Provides support to handle ESRI Shapefiles in pure Python.
from PIL import Image, ImageDraw #Create and draw images
import unidecode #Remove special characters and symbols of non-English Languages

Define a function to calculate width and height and scaling factors in order that all the images of the neighbourhoods have the same size

In [2]:
def getSizeAndScaling(points, image_max_dimension, image_padding):
     
    first = True

    # get the min and max values for coordinates
    for coords in points:
        x, y = coords[0], coords[1]

        if (first):
            minX = x
            maxX = x
            minY = y
            maxY = y 
            first = False
        else:
            minX = min(x,minX)
            maxX = max(x,maxX)
            minY = min(y,minY)
            maxY = max(y,maxY)
  
    # Determine bounding box x and y distances and then calculate an xyratio
    # that can be used to determine the size of the generated PNG file. A xyratio
    # of greater than one means that PNG is to be a landscape type image whereas
    # an xyratio of less than one means the PNG is to be a portrait type image.
    xdist = maxX - minX
    ydist = maxY - minY

    xyratio = xdist/ydist

    if (xyratio >= 1):
        iwidth  = image_max_dimension
        iheight = int(image_max_dimension/xyratio)
    else:
        iwidth  = int(image_max_dimension/xyratio)
        iheight = image_max_dimension

    # Iterate through all the shapes within the shapefile and draw polyline
    # representations of them onto the PNGCanvas before saving the resultant canvas
    # as a PNG file
    xratio = (iwidth - 2 * image_padding) /xdist
    yratio = (iheight - 2 * image_padding) /ydist        
    
    return([iwidth, iheight, maxX, maxY, xratio, yratio])


Look through the file

In [3]:
# Open shapefile with Python Shapefile Library
shapefile_name = 'raw_data/BCN_Barri_ED50_SHP' # e.g. england_oa_2001
shp_r = shapefile.Reader(shapefile_name)

shp_r.encoding = 'Latin-1' #Encoding for Western languages such as Catalan
records = shp_r.records() 
    
i = 0
image_max_dimension = 600 # Change this to desired max dimension of generated PNG
image_padding = 10 # Frame to add around the picture

for feature in shp_r.shapes():

    [iwidth, iheight, maxX, maxY, xratio, yratio] = getSizeAndScaling(feature.points, image_max_dimension, image_padding)
                  
    # name of the file to save
    filename = 'outputs/masks/neighbourhoods/' + unidecode.unidecode(records[i][2]) + "-" + unidecode.unidecode(records[i][3])  + ".png"
    image = Image.new(mode = "RGB", size = (iwidth,iheight), color = "white")
    draw = ImageDraw.Draw(image)
            
    # if there is only one part
    if len(feature.parts) == 1:
              
        pixels = []

        # get each coord that makes up the polygon
        for coords in feature.points:
            x, y = coords[0], coords[1]

            px = int(iwidth - image_padding - ((maxX - x) * xratio))
            py = int(image_padding + (maxY - y) * yratio)

            pixels.append((px,py))  

        draw.polygon(pixels, fill = "black")
        
    else:        
    
        # append the total amount of points to the end of the parts list
        feature.parts.append(len(feature.points))
        # keep track of the part being added
        parts_counter = 0

        # while the parts_counter is less than the amount of parts
        while parts_counter < len(feature.parts) - 1:
        
            # keep track of the amount of points added to the feature
            coord_count = feature.parts[parts_counter]
            # number of points in each part
            no_of_points = abs(feature.parts[parts_counter] - feature.parts[parts_counter + 1])           
            # cut off point for each part
            end_point = coord_count + no_of_points
            
            # loop through each part
            pixels = []
             
            while coord_count < end_point:
                
                for coords in feature.points[coord_count:end_point]:
                    
                    x, y = coords[0], coords[1]
                   
                    px = int(iwidth - image_padding - ((maxX - x) * xratio))
                    py = int(image_padding + (maxY - y) * yratio)
                    pixels.append((px,py))                      
                
                    coord_count = coord_count + 1
               
            draw.polygon(pixels, fill = "black")
                          
            parts_counter = parts_counter + 1             
            

    # save the file
    image.save(filename)
        
    i = i + 1    