-
Notifications
You must be signed in to change notification settings - Fork 2
/
fabfile.py
366 lines (309 loc) · 13.3 KB
/
fabfile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import os,sys,logging,time,shutil
import getpass, socket, platform
from fabric.state import env
from fabric.api import env,local,run,sudo,put,cd,lcd,puts,task,get,hide
import requests, json, sqlite3, urllib
from settings import BUCKET_NAME,DATA_PATH,INDEX_PATH,SQLITE_PATH,DONE_DATA_PATH
from settings import RESULT_STEPS, MAX_ITER, MAX_COLLECTION, BATCH_SIZE,INDEX_RUN,CRAWL_RUN,SQLDB_NAME
from settings import START_COLLECTION,ONETIME_COUNTSTART,ITER_START_COUNT
try:
import inception
except ImportError:
print "could not import main module limited to boostrap actions"
pass
from settings import USER,private_key,HOST,LOCALUSER,localhost_private_key,LOCALHOST
env.user = LOCALUSER
env.key_filename = localhost_private_key
env.hosts = [LOCALHOST,]
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
datefmt='%m-%d %H:%M',
filename='logs/fab.log',
filemode='a')
@task
def live():
"""
Select live environment
"""
env.user = USER
# for gce - you don't need sudo user as it's always through rsa file authentication
# env.password = getpass.getpass('sudo password: ')
env.key_filename = private_key
env.hosts = [HOST,]
@task
def platformdetails():
print 'uname:', platform.uname()
print 'sys.system:', sys.platform
print 'system :', platform.system()
print 'node :', platform.node()
print 'release :', platform.release()
print 'version :', platform.version()
print 'machine :', platform.machine()
print 'processor :', platform.processor()
@task
def getusername():
print getpass.getuser()
return getpass.getuser()
@task
def getip():
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
# doesn't even have to be reachable
s.connect(('10.255.255.255', 0))
IP = s.getsockname()[0]
except:
IP = '127.0.0.1'
finally:
s.close()
print IP
return IP
@task
def notebook():
"""
Run an IPython notebook on an Cloud/AWS server
"""
from IPython.lib.security import passwd
command = "ipython notebook --ip=0.0.0.0 --certfile=mycert.pem --NotebookApp.password={} --no-browser".format(passwd())
print command
run(command)
@task
def gen_ssl():
run("openssl req -x509 -nodes -days 365 -newkey rsa:1024 -keyout mycert.key -out mycert.pem")
def yes_or_no(question):
reply = str(raw_input(question+' (y/n): ')).lower().strip()
if reply[0] == 'y':
return True
if reply[0] == 'n':
return False
else:
return yes_or_no("please enter your choice")
@task
def hostsetup():
setup()
# start setting up code .........................
sudo("rm -rf ~/TensorFlowSearch/")
run("git clone https://github.com/dataspring/TensorFlowSearch")
sudo("cp ~/TensorFlowSearch/sqllite3/*.* /home/deep/shopsite/sqllite3/")
@task
def setup():
"""
Task for initial set up of Cloud/AWS instance.
Used AMI modified for Python2.7 https://gist.github.com/AlexJoz/1670baf0b32573ca7923
Following commands show other packages/libraries installed while setting up the AMI
"""
print "running server setup..."
print env.user
print env.hosts
if yes_or_no('About to setup the above enviroment,will remove any existing code/data, do you want to proceed') == False:
return
sudo("rm -rf /home/deep/")
sudo("mkdir /home/deep/")
sudo("mkdir /home/deep/shopsite/")
sudo("mkdir /home/deep/shopsite/images/")
sudo("mkdir /home/deep/shopsite/index/")
sudo("mkdir /home/deep/shopsite/sqllite3/")
sudo("mkdir /home/deep/shopsite/done/")
sudo("chmod 700 -R /home/deep/")
sudo("chmod 700 -R /home/deep/")
#sudo("chmod 777 /mnt/") # sometimes the first one will fail due to time
# out and in any case this is idempotent
#sudo("chmod 777 /mnt/")
sudo("apt-get install build-essential")
sudo("apt-get install python-dev") # for python2.x installs
sudo("apt-get install git")
sudo("add-apt-repository ppa:kirillshkrogalev/ffmpeg-next")
sudo("apt-get update")
sudo("apt-get install -y ffmpeg")
sudo("apt-get install libffi-dev libssl-dev")
### https://bugs.launchpad.net/ubuntu/+source/python-pip/+bug/1658844
#sudo("python -m pip install -U pip")
sudo("apt-get install python-pip")
sudo("pip install -U pip setuptools")
### http://stackoverflow.com/questions/29134512/insecureplatformwarning-a-true-sslcontext-object-is-not-available-this-prevent
sudo("pip install requests[security]")
sudo("pip install pycrypto")
sudo("pip install fabric")
sudo("pip install --upgrade fabric")
sudo("pip install --upgrade flask")
sudo("pip install --upgrade ipython")
sudo("pip install --upgrade jupyter")
sudo("apt-get install -y python-scipy")
sudo("apt-get install -y libblas-dev liblapack-dev libatlas-base-dev gfortran")
sudo("pip install --upgrade nearpy")
sudo("apt-get install sqlite3 libsqlite3-dev")
###sudo("pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.11.0rc0-cp27-none-linux_x86_64.whl")
sudo("pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.0.1-cp27-none-linux_x86_64.whl")
@task
def localdevsetup():
"""
Task for initial set up of local instance.
call the setup() followed by tools installation locally
"""
env.password = getpass.getpass('sudo password: ')
setup()
sudo("cp ~/TensorFlowSearch/sqllite3/*.* /home/deep/shopsite/sqllite3/")
print "running local development tools setup"
#----- Visual studio code setup ----------------------
sudo ("add-apt-repository ppa:ubuntu-desktop/ubuntu-make")
sudo ("apt-get update")
sudo ("apt-get install ubuntu-make")
sudo("umake ide visual-studio-code")
# sudo ("apt-get install visual-studio-code")
#----- FileZilla Setup ----------------------
sudo ("apt-get install filezilla")
#----- Valentina Studio /sqlite3 browser Setup ----------------------
sudo ("apt-get install sqlitebrowser")
@task
def connect():
"""
Creates connect.sh for the current host
:return:
"""
fh = open("connect.sh",'w')
fh.write("#!/bin/bash\n"+"ssh -i "+env.key_filename+" "+"ubuntu"+"@"+HOST+"\n")
fh.close()
@task
def server():
"""
start server
"""
local('python server.py')
@task
def index():
"""
Index images
"""
INDEX_RUN = str(time.time()) #reset the batch run with latest tick every time index batch is run
logging.info("Starting with images present in {} storing index in {}".format(DATA_PATH,INDEX_PATH))
try:
if os.path.isdir(INDEX_PATH)==False:
os.mkdir(INDEX_PATH)
except:
print "Could not created {}, if its on /mnt/ have you set correct permissions?".format(INDEX_PATH)
raise ValueError
inception.load_network()
count = 0
start = time.time()
with inception.tf.Session() as sess:
for image_data in inception.get_batch(DATA_PATH, BATCH_SIZE):
print "Batch with {} images loaded in {} seconds".format(len(image_data),time.time()-start)
logging.info("Batch with {} images loaded in {} seconds".format(len(image_data),time.time()-start))
start = time.time()
count += 1
features,files = inception.extract_features(image_data,sess)
print "Batch with {} images processed in {} seconds".format(len(features),time.time()-start)
logging.info("Batch with {} images processed in {} seconds".format(len(features),time.time()-start))
start = time.time()
inception.store_index(features,files,count,INDEX_PATH)
@task
def clear():
"""
delete logs
"""
print "Clearing log files now..."
local('rm logs/*.log &')
@task
def ShopSiteImages():
"""
Get Carosuell Images by Scrapping
"""
CRAWL_RUN = str(time.time())
print 'Crawl Batch Run : ' + str(CRAWL_RUN)
logging.info('Crawl Batch Run : ' + str(CRAWL_RUN))
print '................................'
print 'Runing Scrapping Now'
print '................................'
print 'establishing sqlite3 connection'
#----------------------- sqllite3 connection ---------------------------
sqlitePath = SQLITE_PATH + SQLDB_NAME
#sqlitePath = SQLITE_PATH + 'carousell.sqlite'
try:
conn = sqlite3.connect(sqlitePath)
print 'opened ' + sqlitePath + ' successfully'
#------------drop table SellImages
# try:
# conn.execute('delete from SellImages')
# conn.commit()
# except sqlite3.Error as err:
# print err.message
except sqlite3.Error as connError:
print connError.message
print 'Exiting App'
return None
#-------------------------------------------------------------------------
service = 'https://carousell.com/ui/iso/api'
collection = '/products/collections/{0}/'
query='{{"count":{0},"start":{1}}}'
#sqlInsert = "Insert into SellImages (id, imgurl, title, price) Values('{0}', '{1}', '{2}', {3})"
sqlInsert = "Insert into SellImages (id, imgurl, title, price, collection, country, currency, imgpath, imgfilename, crawlrun) Values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
resSteps = RESULT_STEPS
maxIter = MAX_ITER
maxCollection = MAX_COLLECTION
startCollection = START_COLLECTION
#ONETIME_COUNTSTART
downloadCount = 0
processCount = 0
for collec in range(startCollection,maxCollection):
print 'executing : ' + service
#--------------------------- a fix for restarting midway ------
if collec == START_COLLECTION and ONETIME_COUNTSTART:
iterStartCount = ITER_START_COUNT
else:
iterStartCount = 0
#--------------------------- end: a fix for restarting midway ------
for count in range (iterStartCount,maxIter) :
queryString = ';path=' + collection.format(collec) + ';query=' + query.format(resSteps, count*resSteps+1)
url = service + queryString
print 'for query : ' + queryString
try:
req = requests.get(url, timeout=10) #2 seconds timeout
except requests.exceptions.ConnectionError as e:
print e.message
except requests.exceptions.Timeout as e:
print e.message
#------------------ retry once here -----------------------
time.sleep(10)
try:
req = requests.get(url, timeout=10) #2 seconds timeout
except Exception as e:
print e.message
#------------------ retry once here -----------------------
except Exception as e:
print e.message
if req.status_code == requests.codes.ok:
resJson = req.json()
if len(resJson['result']['products']) > 0:
for each in resJson['result']['products']:
print "%s\r\n%s\r\n%s\r\n%s" % (each['id'], each['primary_photo_url'], each['title'], each['price'])
#----- prep variables ---------------------------------
imgId = each['id']
imgUrl = each['primary_photo_url']
imgTitle = each['title']
imgPrice = each['price']
imgCurrencySymbol = each['currency_symbol']
imgCollection = each['collection']['name']
imgCountry = each['marketplace']['country']['name']
imgPath = DATA_PATH
if ".png" in imgUrl:
imgFileName = str(imgId) + ".png"
else:
imgFileName = str(imgId) + ".jpg"
#------------------------- insret into sqllite ------------------------------------
try:
with conn:
processCount = processCount + 1
conn.execute(sqlInsert, (imgId, imgUrl, imgTitle, imgPrice, imgCollection, imgCountry, imgCurrencySymbol, imgPath, imgFileName, CRAWL_RUN))
#------- if insertion is ok, download the image -------------------
try:
if (os.path.isfile(DONE_DATA_PATH + imgFileName) == False) and (os.path.isfile(DATA_PATH + imgFileName) == False):
downloadCount = downloadCount + 1
urllib.urlretrieve(imgUrl, DATA_PATH + imgFileName)
print 'File downloaded from : ' + imgFileName
else:
print 'File already there...'
except Exception as e:
print e
except sqlite3.IntegrityError as inte:
print inte.message, imgId
print 'ShopSiteImages completed, Total processed : ' + str(processCount) + ', downloaded : ' + str(downloadCount)
logging.info('ShopSiteImages completed, Total processed : ' + str(processCount) + ', downloaded : ' + str(downloadCount))