In [1]:
# basic data importing
from google.colab import files
import io
import numpy as np
import pandas

uploaded = files.upload()
imgSize = 144
data = pandas.read_csv(io.BytesIO(uploaded['SF_SOLD_AUG_2016.csv']))

Saving SF_SOLD_AUG_2016.csv to SF_SOLD_AUG_2016.csv


In [2]:
# utilities

def nonzeros(arr):
  filtered = dict()
  for index, item in enumerate(arr):
    if type(item) is np.ndarray:
      filtered[index] = nonzeros(item)
    elif item != 0:
      filtered[index] = item
  return filtered

In [9]:
# loading the photos

from PIL import Image
import urllib.request
import cv2

tempdata = data.iloc[:5]

features = []

for url in tempdata.PHOTOURL:
  # image
  temp = {'MLSNUM' : url[len(url)-8 : len(url)]}
  response = urllib.request.urlopen(url)
  image = Image.open(io.BytesIO(response.read()))
  imgArray = np.asarray(image, dtype=np.uint8)
  imgArray = cv2.resize(imgArray, (imgSize, imgSize))
  temp['IMAGE'] = imgArray
  features.append(temp)

features = np.array(features)
print(features.shape)

(5,)


In [17]:
# model imports

from tensorflow.keras.applications.vgg19 import VGG19
#from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg19 import preprocess_input
#from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.models import Model
# base_model = VGG19(weights='imagenet')

base_model = VGG19(weights='imagenet', include_top = False)
model = Model(inputs=base_model.input, outputs=base_model.get_layer('block4_pool').output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg19/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5


In [23]:
# predict feature map of each image, import into dataframe

dataArray = []
count = 1

for map in features:
  imgDictionary = dict({'MLSNUM': map['MLSNUM'], 'IMGNUM': count})
  img = np.expand_dims(map['IMAGE'], axis = 0)
  img = preprocess_input(img)

  predictedFeatures = model.predict(img)
  imgDictionary['FEATURES'] = nonzeros(predictedFeatures)
  dataArray.append(imgDictionary)

  count += 1

dataDataFrame = pandas.DataFrame(dataArray)

dataDataFrame

Unnamed: 0,MLSNUM,IMGNUM,FEATURES
0,71733847,1,"{0: {0: {0: {2: 896.57556, 6: 446.66565, 9: 88..."
1,71746032,2,"{0: {0: {0: {2: 73.08906, 6: 1296.667, 9: 838...."
2,71798931,3,"{0: {0: {0: {2: 330.06494, 3: 139.03076, 6: 95..."
3,71842445,4,"{0: {0: {0: {6: 370.16992, 9: 554.1953, 10: 83..."
4,71873314,5,"{0: {0: {0: {0: 27.816212, 1: 19.66652, 2: 499..."


In [28]:
# dataframe -> csv
import csv

with open('condo_images.csv', 'w', encoding='utf8', newline='') as output_file:
  fc = csv.DictWriter(output_file, fieldnames=dataArray[0].keys())
  fc.writeheader()
  fc.writerows(dataArray)