In [1]:
import datetime
import requests
import json

# How to get content of a resourse?

Using Insomnia / Postman allows you to obtain automatically a snipped of code that, from a GET request, returns the content of a page

In [2]:
url = "https://os.smartcommunitylab.it/core.mobility/bikesharing/trento"

payload = ""
response = requests.request("GET", url, data=payload)


bike_stations = json.loads(response.text)

In [3]:
bike_stations

[{'name': '10.01 Bren Center',
  'address': 'Via Guardini fr. civ. 75',
  'id': '10.01 Bren Center - Trento',
  'bikes': 3,
  'slots': 12,
  'totalSlots': 18,
  'position': [46.094653868146885, 11.117693389024225]},
 {'name': '10.02 Top Center',
  'address': 'Via Pranzelores / Via Giuseppe Gilli',
  'id': '10.02 Top Center - Trento',
  'bikes': 4,
  'slots': 19,
  'totalSlots': 24,
  'position': [46.09010372321067, 11.118583717279193]},
 {'name': '10.03 Piazza di Centa',
  'address': 'P.le di Centa / Via Vannetti',
  'id': '10.03 Piazza di Centa - Trento',
  'bikes': 0,
  'slots': 0,
  'totalSlots': 0,
  'position': [46.07538723420181, 11.12382177686004]},
 {'name': '10.04 Piazza Venezia',
  'address': 'via Grazioli / piazza Venezia',
  'id': '10.04 Piazza Venezia - Trento',
  'bikes': 2,
  'slots': 20,
  'totalSlots': 23,
  'position': [46.06753791402558, 11.126987115869156]},
 {'name': '10.06 Stazione Autocorriere',
  'address': 'Via Pozzo / Via Torre Vanga',
  'id': '10.06 Stazione 

# Slide Challenges
## How many total slots there are?

In [4]:
tot_slots = 0
for idx, station in enumerate(bike_stations):
    tot_slots += station["totalSlots"]

print(tot_slots)

# alternative #
total_slots_list = [station["totalSlots"] for station in bike_stations]

print(sum(total_slots_list))


910
910


## Add timestamp and city to the dataset
Here we prefer to attach timestamp and city to each entry, instead of putting the data in a separate place

In [5]:
#NOTE: This is a design choice, are you sure you want to format it like that?
ts = datetime.datetime.now().strftime('%Y-%m-%d, %H:%M:%S') 
city = url.split("/")[-1]

for station in bike_stations:
    station["city"] = city
    # alternative
    #station["city"] = station["id"].split(" - ")[1]

    station["timestamp"] = ts

print(f"Preview of an entry: {bike_stations[70]}")

with open(f"./fetched_data/stations_{city}.json", "w") as f:
    json.dump(bike_stations, f, indent=2)

Preview of an entry: {'name': '20.13 Quercia', 'address': 'via Palestrina 3', 'id': '20.13 Quercia - Trento', 'bikes': 2, 'slots': 4, 'totalSlots': 6, 'position': [45.90013008440864, 11.037017111949822], 'city': 'trento', 'timestamp': '2022-03-18, 14:16:16'}


Note that here we are getting the city directly from the data, it might be a not good idea as we are relying on the current representation of the data, what if it changes? What if the standard is not kept homogeneously within the source (ex. some city might be written in caps)



Redoing the above point by considering more cities

In [6]:
scrape_dict = {
    "trento": "https://os.smartcommunitylab.it/core.mobility/bikesharing/trento",
    "rovereto": "https://os.smartcommunitylab.it/core.mobility/bikesharing/rovereto"
}

for city, url in scrape_dict.items():
  ts = datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S') 

  payload = ""
  response = requests.request("GET", url, data=payload)
  bike_stations = json.loads(response.text)


  for station in bike_stations:
      station["city"] = city
      station["timestamp"] = ts

  with open(f"./fetched_data/{ts}_stations_{city}.json", "w") as f: #this way we can know the city and the chronological time
      json.dump(bike_stations, f, indent=2)