In [1]:
import requests
from datetime import datetime, timedelta

# Help me plot the data so i can see trends

import matplotlib.pyplot as plt
import numpy as np

import json

import calendar
import os

os.chdir('..')


In [2]:

# Define the endpoint
url_trips = "https://opendata.emel.pt/cycling/gira/statistics/trips"
url_usage = "https://opendata.emel.pt/cycling/gira/statistics/usage"

# Create the outputs folder if it doesn't exist
if not os.path.exists('outputs'):
    os.makedirs('outputs')
    
if not os.path.exists('data'):
    os.makedirs('data')
    os.makedirs('data/trips/')
    os.makedirs('data/usage/')

In [67]:
# Define the start and end years
start_year = 2016
current_year = datetime.now().year


In [3]:
# Loop over the years
for year in range(start_year, current_year + 1):  # Ensure we don't request future years
    # Define the dateFrom and dateTo parameters
    dateFrom = f"01-01-{year}"
    dateTo = f"31-12-{year}"

    # Ensure end time is not earlier than start time
    if datetime.strptime(dateTo, "%d-%m-%Y") < datetime.strptime(dateFrom, "%d-%m-%Y"):
        print(f"End time is earlier than start time for {year}. Skipping this year.")
        continue

    # Define the query parameters
    params = {"dateFrom": dateFrom, "dateTo": dateTo}  # Ensure both skip and limit are defined

    # Make the GET request
    response = requests.get(url_trips, params=params)

    # Check the response
    if response.status_code == 200:
        # Print the statistics for this year into a file
        with open(f"data/trips/statistics_{year}.json", "w") as file:
            file.write(response.text)
    else:
        print(f"Failed to get statistics for {year}. Status code: {response.status_code}")
        


NameError: name 'start_year' is not defined

In [None]:
# Loop over the years
for year in range(start_year, current_year + 1):  # Ensure we don't request future years
    # Define the dateFrom and dateTo parameters
    dateFrom = f"01-01-{year}"
    dateTo = f"31-12-{year}"

    # Ensure end time is not earlier than start time
    if datetime.strptime(dateTo, "%d-%m-%Y") < datetime.strptime(dateFrom, "%d-%m-%Y"):
        print(f"End time is earlier than start time for {year}. Skipping this year.")
        continue

    # Define the query parameters
    params = {"dateFrom": dateFrom, "dateTo": dateTo}  # Ensure both skip and limit are defined

    # Make the GET request
    response = requests.get(url_usage, params=params)

    # Check the response
    if response.status_code == 200:
        # Print the statistics for this year into a file
        with open(f"data/usage/statistics_{year}.json", "w") as file:
            file.write(response.text)
        print(f"Got usage for {year}")
    else:
        print(f"Failed to get usage for {year}. Status code: {response.status_code}")

In [None]:
# Cross the startdate with the day of the week, and make a graph for each year
# Loop over the years crossing both files and the days of the week to plot the data

for year in range(start_year, current_year + 1):  # Ensure we don't request future years
    # Open the statistics file
    with open(f"data/trips/statistics_{year}.json", "r") as file:
        statistics = json.load(file)
    
    # Open the usage file
    with open(f"data/usage/statistics_{year}.json", "r") as file:
        usage = json.load(file)
    
    # Create a dictionary to store the data
    data = {}
    
    # Loop over the statistics and usage
    for stat, use in zip(statistics, usage):
        # Get the start date and the day of the week
        start_date = datetime.strptime(stat["tripStartDate"], "%d-%m-%Y")
        day_of_week = start_date.weekday()
        
        # Add the data to the dictionary
        if day_of_week not in data:
            data[day_of_week] = {"statistics": [], "usage": []}
        
        data[day_of_week]["statistics"].append(stat)
        data[day_of_week]["usage"].append(use)
    
    days = sorted(data.keys())
    trips = [np.mean([use["userQtyUnique"] for use in data[day]["usage"]]) for day in days]
    duration = [np.mean([stat["avgTripSeconds"] for stat in data[day]["statistics"]]) / 60 for day in days]
    
    # Create the plot
    fig, ax1 = plt.subplots(figsize=(10, 5))

    # Plot the number of trips on the first y-axis
    ax1.plot([calendar.day_name[day] for day in days], trips, label="Users", color='tab:blue')
    ax1.set_ylabel('Number of Users', color='tab:blue')
    ax1.tick_params(axis='y', labelcolor='tab:blue')

    # Create a second y-axis that shares the same x-axis
    ax2 = ax1.twinx()

    # Plot the duration on the second y-axis
    ax2.plot([calendar.day_name[day] for day in days], duration, label="Duration", color='tab:red')
    ax2.set_ylabel('Duration (minutes)', color='tab:red')
    ax2.tick_params(axis='y', labelcolor='tab:red')

    # Set the title
    fig.suptitle(f"Year {year}")

    # Save the plot in the outputs folder
    plt.savefig(f"outputs/year_{year}.png")
    plt.close()
