In [39]:
import pandas as pd
import geopandas as gp
import numpy as np
import json
import csv
from shapely.geometry import box, Polygon, LineString, Point
import math

In [2]:
##Load both data files into a geopandas geoseries spatial index. -done
##Calculate the distance from each city to every other city and store those values in either a csv or json file for use at a later time. -done
##Determine a metric or threshold to "assign" a UFO sighting to a particular city. Maybe average the distance to the 100 closest UFO's as a start.
##Your files should be in json format.

In [52]:
def readFile(file):
    if ".geojson" in file:
        with open (file, "r") as f:
            data = json.load(f)
        return data["features"]
    
    #Else use csv commands to open csv file
    elif ".csv" in file:
        df = pd.read_csv(file)
       
        data = gp.GeoDataFrame(df, 
                geometry=gp.points_from_xy(df.lon, df.lat))
       
        return data

    
def closestUfo(cities, ufos):
    ufos_s1 = gp.GeoSeries(ufos["geometry"])
    
    for city in cities:
        #grab coordinates
        lon,lat = city["geometry"]["coordinates"]
        
        #use city coordinates to determine the distance from each city to every ufo
        distances = ufos_s1.distance(Point(lon,lat))
        distances = np.array(distances.values)
        distances = np.sort(distances)

        #Find the avg distance of of the 100 closest ufos and assign to 
        #Json object
        city["properties"]["avg_distance"]=[]
        city["properties"]["avg_distance"].append(np.average(distances[:100]))
    
    writeToFile("ufos_distance.json", cities )
    
      
def haversineDistance(lon1, lat1, lon2, lat2, units="miles"):
    """Calculate the great circle distance in kilometers between two points on the earth (start and end) where each point
        is specified in decimal degrees.
    Params:
        lon1  (float)  : decimel degrees longitude of start (x value)
        lat1  (float)  : decimel degrees latitude of start (y value)
        lon2  (float)  : decimel degrees longitude of end (x value)
        lat3  (float)  : decimel degrees latitude of end (y value)
        units (string) : miles or km depending on what you want the answer to be in
    Returns:
        distance (float) : distance in whichever units chosen
    """
    radius = {"km": 6371, "miles": 3956}

    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(math.radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
    c = 2 * math.asin(math.sqrt(a))
    r = radius[units]  # choose miles or km for results
    return c * r


def calcDistances(data):
    for i in range(len(data)):
        lon1,lat1 = data[i]['geometry']['coordinates']
        data[i]['properties']['distances'] = []
        for j in range(len(data)):
            lon2,lat2 = data[j]['geometry']['coordinates']
            data[i]['properties']['distances'].append(haversineDistance(lon1, lat1, lon2, lat2))
    
    writeToFile("cities_distance.json", data)
    return(data)

def writeToFile(outfile, distances):
    with open(outfile, "w") as f:
        json.dump(distances, f, indent = 2)
        

In [53]:
if __name__ == "__main__":
    citiesDat = readFile("cities.geojson")
    cities_dist = calcDistances(citiesDat)
    
    ufoDat = readFile("ufoSubset.csv")
    closestUfo(cities_dist, ufoDat)