In [1]:
import sched, time
import requests
import json

from scipy import optimize
import numpy as np

# downloading the data

Create class to dowload data from url, using an api key.

In [2]:
class download_data(object):
    """
    class to download data at periodic time intervals
    """
    
    data_list = []
    
    def __init__(self, url, api_key, delta_t=1800, total_t=1800):
        """
        returns a download object whose attributes are the url of
        the website, the api key to download the data, the periodicity
        of the download cycle and the total time to perform this task for
        """
        self.url = url
        self.api_key = api_key
        self.delta_t = delta_t
        self.total_t = total_t
        # initialize a list for downloaded data to be stored in
        
    def run(self):
        """
        runs download task for specified time period and periodicity
        from website with given api key
        """
        
        def download():
            """
            function which downloads data from url, url, with api key, api_key
            """
            # set the request parameters
            my_url = self.url + '&apiKey=' + self.api_key
            user = 'admin'
            pwd = 'admin'

            # set proper headers
            headers = {"Accept":"application/json"}
 
            # perform the HTTP request
            response = requests.get(my_url, auth=(user, pwd), headers=headers)
 
            # append downloaded json data to data_list
            self.data_list.append(response.json())
            
        def comment():
            """
            comment function
            """
            print(time.monotonic(), 'step complete')
            
        def save_as_csv(filepath):
            """
            saves data as csv file
            """
        
        # initialize scheduler
        sch = sched.scheduler()
        # truncate number of time intervals to integer
        time_period = self.delta_t
        no_time_intervals = int(self.total_t / self.delta_t) 
        # execute download at given time periods in total time
        time_list = [t * time_period for t in range(no_time_intervals + 1)]
        for time_stamp in time_list:
            sch.enter(time_stamp, 1, comment)
            sch.enter(time_stamp, 2, download)
            
        # run scheduled tasks
        sch.run()

Run the task.

In [3]:
dd = download_data('https://api.jcdecaux.com/vls/v1/stations?contract=Dublin', 
                     'd2aa5a02f507f8ed4aad52a78772c1496c7cb505', 60*1, 60*1*2)
dd.run()

242027.468 step complete
242087.468 step complete
242147.468 step complete


Save the data.

In [4]:
with open('data.json', 'w') as outfile:
    json.dump(dd.data_list, outfile)

# loading the data 

In [2]:
with open('data.json', 'r') as infile:
    data = json.load(infile)

# displaying the data on a map

# analyzing the data

## inferring routes via a loss function

In [261]:
bike_list0, bike_list1 = [], []
length0, length1 = len(data[0]), len(data[1])

# define list of available bikes at each station
for n in range(length0):
    bike_list0.append(data[0][n]['available_bikes'])
for n in range(length1):
    bike_list1.append(data[1][n]['available_bikes'])

### initialize

In [262]:
# define initial and final vectors
xf = bike_list1
xi = bike_list0
# store length of the (two) list(s)
n = len(bike_list0)
# initialize random probabilties
p0 = np.random.rand(n, n)
p0_rshp = p0.reshape(-1)

In [263]:
def loss_fxn(p_fi):
    """
    gives loss function for given probability matrix,
    initial and final vectors
    """
    p_fi_reshaped = p_fi.reshape(n, n)
    loss = np.sum((xf - np.dot(xi, p_fi_reshaped))**2)
    return loss

In [264]:
def fun_constr0(p0_rshp_var):
    p0_tmp = p0_rshp_var.reshape(n,n)
    return np.linalg.norm(1 - np.sum(p0_tmp, axis=0))

def fun_constr1(p0_rshp_var):
    p0_tmp = p0_rshp_var.reshape(n,n)
    return np.linalg.norm(1 - np.sum(p0_tmp, axis=1))

In [None]:
cons = ({'type': 'eq', 'fun': fun_constr0}, {'type': 'eq', 'fun': fun_constr1})

# test loss function
print(loss_fxn(p0_rshp))

# define bounds on each entry in the probability matrix
bnd = (0, 1)
bnd_array = tuple([bnd for n in range(n**2)])

# minimize the loss function
opt = optimize.minimize(loss_fxn, p0_rshp, method='SLSQP', bounds=bnd_array, constraints=cons)
# reshape the data
opt_rshp = opt.x.reshape(n, n)
print(np.sum(opt_rshp, axis=1), np.sum(opt_rshp, axis=0)) 

39207093.7633


In [None]:
opt

# what I want to do

* put stations on a map
* regularly pull data
* perform time series analysis
* loss function
* comparitive study
* most frequent route