# Data crawling and processing TEST

In [1]:
import urllib.request
import time
import json
import datetime
import pickle
from weather import Weather
from pathlib import Path
nextBikeUrl = 'https://api.nextbike.net/maps/nextbike-live.json?city=14'

# Get bike and weather data

In [2]:
def getWeather():
    weather = Weather()
    lookup = weather.lookup(667931)
    condition = lookup.condition()
    weatherdata = {}
    weatherdata["humidity"] = lookup.atmosphere()["humidity"]
    weatherdata["text"] = condition.text()
    weatherdata["temp"] = (float(condition.temp())-32)/1.8
    weatherdata["wind"] = lookup.wind()
    
    return weatherdata

In [5]:
currentTime = str(datetime.datetime.utcnow())
weather = getWeather()
weather["time"] = currentTime

In [6]:
# get json from url
page = urllib.request.urlopen(nextBikeUrl)
data = page.read()

"""Process the crawled information."""
dataString = data.decode('utf8').replace("'", '"')
dataJson = json.loads(dataString)

In [7]:
# get places
bikedata = dataJson["countries"][0]["cities"][0]["places"]

# Data Splitting

In [8]:
def splitData(bikedata):
    bikes = []
    stations = []
    for i in bikedata:
        if i["bike"] is True:
            bikes.append(i)
        else:
            stations.append(i)
    return bikes, stations

In [9]:
bikes, stations = splitData(bikedata)

print("Räder", len(bikes))
print("Stationen", len(stations))

Räder 617
Stationen 21


# Geo Json Format
{
  "type": "Feature",
  "geometry": {
    "type": "Point",
    "coordinates": [125.6, 10.1]
  },
  "properties": {
    "name": "Dinagat Islands"
  }
}

In [10]:
def createGeoJsonBikes(bikes, stationid):
    geoJson = {}
    geoJson["geometry"] = { "type": "Point", "coordinates": [bikes["lat"], bikes["lng"]]}
    geoJson["type"] = "Feature"
    geoJson["properties"] = {"address": bikes["address"],
                             "name": bikes["name"],
                             "maintenance": bikes["maintenance"],
                             "state": bikes["bike_list"][0]["state"],
                             "bike_type": bikes["bike_list"][0]["bike_type"],
                             "boardcomputer": bikes["bike_list"][0]["boardcomputer"],
                             "uid": bikes["uid"],
                             "timestamp": currentTime,
                             "stationid": stationid,
                             "stationiduid": stationid,
                             "number": bikes["bike_list"][0]["number"]}
    return geoJson

In [11]:
def createGeoJsonStations(stations):
    places = []
    bikes = []
    for station in stations:
        placegeojson = {}
        placegeojson["geometry"] = { "type": "Point", "coordinates": [station["lat"], station["lng"]]}
        placegeojson["type"] = "Feature"
        placegeojson["properties"] = {"name": station["name"],
                                 "maintenance": station["maintenance"],
                                 "bike_racks": station["bike_racks"],
                                 "bike_types": station["bike_types"],
                                 "bikes": station["bikes"],
                                 "free_racks": station["free_racks"],
                                 "bike_numbers": station["bike_numbers"],
                                 "rack_locks": station["rack_locks"],
                                 "spot": station["spot"],
                                 "terminal_type": station["terminal_type"],
                                 "timestamp": currentTime,
                                 "number": station["number"],
                                 "uid": station["uid"]}
        
        places.append(placegeojson)
                
                
        for bike in station["bike_list"]:
            geoJson = {}
            geoJson["geometry"] = { "type": "Point", "coordinates": [station["lat"], station["lng"]]}
            geoJson["type"] = "Feature"
            geoJson["properties"] = {"address": station["name"],
                                     "name": station["name"],
                                     "maintenance": "",
                                     "state": bike["state"],
                                     "bike_type": bike["bike_type"],
                                     "boardcomputer": bike["boardcomputer"],
                                     "uid": "",
                                     "timestamp": currentTime,
                                     "stationid": station["number"],
                                     "stationiduid": station["uid"],
                                     "number": bike["number"]}

            bikes.append(geoJson)
    
    return bikes, places

In [12]:
def createFeatureCollection(bikes):
    collection = {"type": "FeatureCollection","features": []}
    for i in bikes:
        collection["features"].append(i)
    return collection

In [13]:
# getlist of all bikes not in stations
bikelist = []
for i in bikes:
        bikelist.append(createGeoJsonBikes(i, 0))

In [14]:
# get stations and bikes in staions
Sbikes,places =  createGeoJsonStations(stations)

In [15]:
# add bikes from Stations to normal bikelist
for bike in Sbikes:
    bikelist.append(bike);

In [16]:
bikeCollection = createFeatureCollection(bikelist)
stationCollection = createFeatureCollection(places)
weatherCollection = createFeatureCollection([weather])

# Save data

In [18]:
# check if pickle already exists
def pickleCollections(collection, filename):
    datapath = "data/" + filename + ".p"
    my_file = Path(datapath)
    if my_file.exists():
        collectionOld = pickle.load(open(datapath, "rb"))
        
        for dataset in collection["features"]:
            collectionOld["features"].append(dataset)
        collection = collectionOld
    
    pickle.dump(collection, open(datapath, "wb"))

In [19]:
pickleCollections(stationCollection,"stations")
pickleCollections(bikeCollection,"bikes")
pickleCollections(weatherCollection,"weather")