# Integrating with SpaceKnow GoogleCloud Bucket 

We want to integrate with an Google Cloud Bucker, more specifically the one used by SpaceKnow(The code is written for their data structure) 

The code assumes you have the right premission file in your computer (GC certificates) supplied by SpaceKnow - if not you won't be able to access the bucket. 

before you start you will have to pip install GC pluging via:
pip install --upgrade google-cloud-storage

We then import required files:

In [None]:
from google.cloud import storage
from google.oauth2 import service_account
import json
import pymongo
import analona
from dateutil import parser
from datetime import datetime

And now we can try accessing the bucket - 
firstly we need to set up all our credentials and specific information

In [None]:
bucket_name = '<add_bucket_name_here>'
credentials_file = '<add_path_to_credentials_here>.json'
project_name = '<add_storage_name_here>'
scope_url = 'https://www.googleapis.com/auth/devstorage.full_control'

Now we can initiate a client with our credentials:

In [None]:
credentials = service_account.Credentials.from_service_account_file(
    credentials_file)

scoped_credentials = credentials.with_scopes(
    [scope_url])

storage_client = storage.Client(project_name, scoped_credentials)

After doing so we can connect to our bucket and to our two blob feeds - one for weekly products and one for daily products:

In [None]:
bucket = storage_client.get_bucket(bucket_name)
weekly_blobs_list = list(bucket.list_blobs(prefix="weekly"))
daily_blobs_list = blobs=list(bucket.list_blobs(prefix="daily"))

now we can go into parsing all the available products into our MongoDB database - 
we first have to set up the connection with our mongo:

In [None]:
# change the uri based on the one given in our Azure cloud 
mongo_uri = ''
client = pymongo.MongoClient(mongo_uri)
db = client.louvre
ships_collection = db.ships
airplanes_collection = db.airplanes
buildings_collection = db.buildings
roads_collection = db.roads
vegetation_collection = db.vegetation

and now we can go into getting the actual data

In [None]:
def parse_weekly_to_mongo(wrunc_file, aoi, tile, week_string, date_string, record_file):
    features = wrunc_file["features"]
    index = 1
        
    date_timestamp, _ = date_string.split('_')
    features_date = parser.parse(date_timestamp)
    
    original_images = [record_file['image']['sceneId']]
    
    for feature in features:
        item_class = feature['properties']['class']
        item = {}
        item['_id'] = "SpaceKnow_{}_{}_{}_{}".format(item_class, tile, week_string, index)
        index += 1
        item['company'] = "SpaceKnow"
        item['geometry'] = feature['geometry']
        item['analyticsDeliveryTime'] = datetime.utcfromtimestamp(int(round(features_date.timestamp())))
        item['analyticsInfo'] = {
            'storage': "GoogleCloud", 
            'url': "gs://{}/weekly/{}/{}/{}/{}".format(bucket_name, aoi, tile, week_string, date_string)
        }
        item['tileId'] = record_file['tilePosition'] 
        print(item)
        item['sourceImagesIds'] = original_images
        
        if item_class == 'roads':
            is_valid = analona.Road(item).validate()
            if is_valid == True:
                roads_collection.update({ "_id": item["_id"] }, item, upsert = True)
            else:
                print("didn't pass varification- {}".format(is_valid))
        elif item_class == 'urban':
            is_valid = analona.Building(item).validate()            
            if is_valid == True:
                buildings_collection.update({ "_id": item["_id"] }, item, upsert = True)
            else:
                print("didn't pass varification- {}".format(is_valid))
        else:
            is_valid = analona.Building(item).validate() 
            if is_valid == True:
                vegetation_collection.update({ "_id": item["_id"] }, item, upsert = True)
            else:
                print("didn't pass varification- {}".format(is_valid))
            

for blob in weekly_blobs_list: 
    print(blob.name)
    if(blob.name.endswith("wrunc_detections.geojson")):
        print(blob.name)
        prod_type, aoi, tile, week_string, date_string, file_type = blob.name.split('/')
        record_file = "{}/{}/{}/{}/{}/record.json".format(prod_type, aoi, tile, week_string, date_string)
        record_blob = bucket.blob(record_file)
        record_file = json.loads(record_blob.download_as_string())
        wrunc_file = json.loads(blob.download_as_string())
        parse_weekly_to_mongo(wrunc_file, aoi, tile, week_string, date_string, record_file)
        

In [None]:
record_blob = bucket.blob("weekly/aoi-1/r00c06/2018W45/20181115T072837_LANc/imagery_truecolor.geotiff")
record_blob.download_to_filename("image.tiff")

Now we would like to go over the daily products and parse it:

In [None]:
def parse_daily_to_mongo(detection_file, aoi, tile, day_date_string, specific_date_string, record_file):
    features = detection_file["features"]
    index = 1
        
    date_timestamp, _ = specific_date_string.split('_')
    features_date = parser.parse(date_timestamp)
    
    original_image = record_file['image']['sceneId']
    
    for feature in features:
        print(feature)
        item_class = feature['properties']['class']
        item = {}
        item['_id'] = "SpaceKnow_{}_{}_{}_{}".format(item_class, tile, day_date_string, index)
        print(item['_id'])
        index += 1
        item['company'] = "SpaceKnow"
        item['geometry'] = feature['geometry']
        item['originalImageId'] = original_image
        item['observed'] = features_date
        item['area'] = feature['properties']['area']
        item['direction'] = feature['properties']['orientation']
        
        if item_class == "ships":
            is_valid = analona.Ship(item).validate()            
            if is_valid == True:
                ships_collection.update({ "_id": item["_id"] }, item, upsert = True)
            else:
                print("didn't pass varification- {}".format(is_valid))
        elif item_class == "airplanes":
            is_valid = analona.Plane(item).validate()            
            if is_valid == True:
                airplanes_collection.update({ "_id": item["_id"] }, item, upsert = True)
            else:
                print("didn't pass varification- {}".format(is_valid))
        else: 
            print("unknown object type: {}".format(item_class))

for blob in daily_blobs_list:
    print(blob.name)
    if(blob.name.endswith(".geojson")and not("grid" in blob.name)):
        file_type, aoi, tile, day_date, file_timestamp, file_name = blob.name.split('/')
        record_file = "{}/{}/{}/{}/{}/record.json".format(file_type, aoi, tile, day_date, file_timestamp)
        record_blob = bucket.blob(record_file)
        record_file = json.loads(record_blob.download_as_string())
        detection_file = json.loads(blob.download_as_string())
        parse_daily_to_mongo(detection_file, aoi, tile, day_date, file_timestamp, record_file)

In [None]:
record_blob = bucket.blob("daily/1-coastline/r00c03/20181120/20181120T075358_5zgP/imagery_truecolor.geotiff")
record_blob.download_to_filename("shipImage.tiff")