In [4]:
import logging

# We define a basic config for the logger that will print the log messages to in the console.
logging.basicConfig(level=logging.INFO)

In [31]:
import requests
my_dict = requests.get("https://rickandmortyapi.com/api/character?page=20").json()

In [32]:
data = my_dict["results"]

In [33]:
data[1]["name"]

'Worldender'

In [51]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_characters_from_api(page_number):
    """
    Fetch data from the Rick and Morty API for a specific page number.

    :param page_number: (int) The number of the page to fetch.
    :return: (dict) The JSON response from the API as a dictionary.
    """
    response = requests.get(f"https://rickandmortyapi.com/api/character?page={page_number}")
    return response.json()

def search_characters_until_name(target_name, num_workers=1, num_pages=10):
    """
    Collect characters from the Rick and Morty API until a character with a 
    specific name is found. Returns all characters obtained before the target 
    and the URL from which the data was fetched.

    :param target_name: (str) The name of the character to stop the search at.
    :param num_workers: (int) The number of parallel workers to use for fetching pages.
    :param num_pages: (int) The number of pages to fetch per iteration.
    :return: (list, str) A list of character dictionaries and the URL where the search was concluded.
    """
    base_url = "https://rickandmortyapi.com/api/character?page="
    current_page = 1
    collected_characters = []
    target_name_found = False
    url_found = ""

    while not target_name_found:
        # Initiate fetching multiple pages in parallel
        with ThreadPoolExecutor(max_workers=num_workers) as executor:
            futures = {executor.submit(fetch_characters_from_api, i): i for i in range(current_page, current_page+num_pages)}

            for future in as_completed(futures):
                try:
                    page_content = future.result()
                    current_page_consulted = futures[future]

                    # Print the pages consulted at this iteration
                    print(f"Consulted pages: {list(range(current_page_consulted, current_page_consulted + num_pages))}")

                    url_found = base_url + str(current_page_consulted)

                    # Check if the "results" key is present in the API response
                    if "results" in page_content:
                        # Iterate over each character in the page data
                        for character in page_content["results"]:
                            if character["name"] == target_name:
                                target_name_found = True
                                break
                            collected_characters.append(character)

                    if target_name_found:
                        break

                except Exception as e:
                    # Handle any potential exceptions that might occur during API requests
                    print(f"Error occurred: {e}")

        current_page += num_pages

    return collected_characters, url_found

search_characters_until_name('Worldender', num_workers=20, num_pages=10)

# Now `list_of_characters` contains all characters up to (but not including) "Kyle's Teenyverse"
# and `url_where_data_found` is the URL from which the data was fetched.


Consulted pages: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
Consulted pages: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Consulted pages: [6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
Consulted pages: [9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
Consulted pages: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
Consulted pages: [8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
Consulted pages: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
Consulted pages: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16]
Consulted pages: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Consulted pages: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Consulted pages: [16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
Consulted pages: [15, 16, 17, 18, 19, 20, 21, 22, 23, 24]
Consulted pages: [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
Consulted pages: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]


([{'id': 81,
   'name': 'Crocubot',
   'status': 'Dead',
   'species': 'Animal',
   'type': 'Robot-Crocodile hybrid',
   'gender': 'Male',
   'origin': {'name': 'unknown', 'url': ''},
   'location': {'name': "Worldender's lair",
    'url': 'https://rickandmortyapi.com/api/location/4'},
   'image': 'https://rickandmortyapi.com/api/character/avatar/81.jpeg',
   'episode': ['https://rickandmortyapi.com/api/episode/25'],
   'url': 'https://rickandmortyapi.com/api/character/81',
   'created': '2017-11-30T14:23:41.053Z'},
  {'id': 82,
   'name': 'Cronenberg Rick',
   'status': 'unknown',
   'species': 'Cronenberg',
   'type': '',
   'gender': 'Male',
   'origin': {'name': 'Cronenberg Earth',
    'url': 'https://rickandmortyapi.com/api/location/12'},
   'location': {'name': 'Earth (C-137)',
    'url': 'https://rickandmortyapi.com/api/location/1'},
   'image': 'https://rickandmortyapi.com/api/character/avatar/82.jpeg',
   'episode': ['https://rickandmortyapi.com/api/episode/6',
    'https://ri

In [52]:
import requests

def fetch_characters_from_api(page_number):
    """
    Fetch data from the Rick and Morty API for a specific page number.

    :param page_number: (int) The number of the page to fetch.
    :return: (dict) The JSON response from the API as a dictionary.
    """
    response = requests.get(f"https://rickandmortyapi.com/api/character?page={page_number}")
    return response.json()

def search_characters_until_name(target_name, num_pages=10):
    """
    Collect characters from the Rick and Morty API until a character with a 
    specific name is found. Returns all characters obtained before the target 
    and the URL from which the data was fetched.

    :param target_name: (str) The name of the character to stop the search at.
    :param num_pages: (int) The number of pages to fetch per iteration.
    :return: (list, str) A list of character dictionaries and the URL where the search was concluded.
    """
    base_url = "https://rickandmortyapi.com/api/character?page="
    current_page = 1
    collected_characters = []
    target_name_found = False
    url_found = ""

    while not target_name_found and current_page <= num_pages:
        try:
            page_content = fetch_characters_from_api(current_page)

            print(f"Consulted page: {current_page}")

            url_found = base_url + str(current_page)

            # Check if the "results" key is present in the API response
            if "results" in page_content:
                # Iterate over each character in the page data
                for character in page_content["results"]:
                    if character["name"] == target_name:
                        target_name_found = True
                        break
                    collected_characters.append(character)

        except Exception as e:
            # Handle any potential exceptions that might occur during API requests
            print(f"Error occurred: {e}")

        current_page += 1

    return collected_characters, url_found

search_characters_until_name('Worldender')

Consulted page: 1
Consulted page: 2
Consulted page: 3
Consulted page: 4
Consulted page: 5
Consulted page: 6
Consulted page: 7
Consulted page: 8
Consulted page: 9
Consulted page: 10


([{'id': 1,
   'name': 'Rick Sanchez',
   'status': 'Alive',
   'species': 'Human',
   'type': '',
   'gender': 'Male',
   'origin': {'name': 'Earth (C-137)',
    'url': 'https://rickandmortyapi.com/api/location/1'},
   'location': {'name': 'Citadel of Ricks',
    'url': 'https://rickandmortyapi.com/api/location/3'},
   'image': 'https://rickandmortyapi.com/api/character/avatar/1.jpeg',
   'episode': ['https://rickandmortyapi.com/api/episode/1',
    'https://rickandmortyapi.com/api/episode/2',
    'https://rickandmortyapi.com/api/episode/3',
    'https://rickandmortyapi.com/api/episode/4',
    'https://rickandmortyapi.com/api/episode/5',
    'https://rickandmortyapi.com/api/episode/6',
    'https://rickandmortyapi.com/api/episode/7',
    'https://rickandmortyapi.com/api/episode/8',
    'https://rickandmortyapi.com/api/episode/9',
    'https://rickandmortyapi.com/api/episode/10',
    'https://rickandmortyapi.com/api/episode/11',
    'https://rickandmortyapi.com/api/episode/12',
    'htt

In [61]:
import re

# An example datetime string
datetime_string1 = "2020-01-01T12:00:00"
datetime_string2 = "2020-02-01T12:00:0"
# Use the re module to find matches
matches1 = re.findall(r"(\d{4})-(\d{2})-(\d{2})", datetime_string1)
matches2 = re.findall(r"(\d{4})-(\d{2})-(\d{2})", datetime_string2)
matches1 < matches2, matches1, matches2

(True, [('2020', '01', '01')], [('2020', '02', '01')])

In [54]:
import requests
import re
from datetime import datetime, timedelta

def fetch_data_from_api(endpoint):
    response = requests.get(endpoint)
    return response.json()

def search_last_30_days(endpoint):
    """
    Fetches and processes data from a given API endpoint, filtering for data created within the last 30 days.
    
    :param endpoint: str, The API endpoint from which to fetch data.
    
    :return: List of dictionaries containing the data items created within the last 30 days. If no such data is found,
    a string stating "No data from the last 30 days" is returned.

    The function navigates through paginated data from an API endpoint. The data is expected to contain a "created_at" key
    with a date in the "YYYY-MM-DD" format. If the date falls within the last 30 days, the data item is added to a list.
    The function keeps on collecting data until a data item is found that is older than 30 days. At that point,
    the function returns the collected data. 

    Exceptions are caught and the exception message is printed out. The function then continues to the next page.
    """
    current_page = 1
    collected_data = []

    # Apply the regex on the current date
    date_30_days_ago = datetime.now() - timedelta(days=30)
    date_30_days_ago_string = re.findall(r"\d{4}-\d{2}-\d{2}", str(date_30_days_ago))[0]

    while True:
        try:
            page_endpoint = f"{endpoint}?page={current_page}"
            page_content = fetch_data_from_api(page_endpoint)
            response = page_content.json()

            print(f"Consulted page: {current_page}")

            for item in response:
                # Extract year, month, day using a regex
                date_string = re.findall(r"\d{4}-\d{2}-\d{2}", item["created_at"])[0]

                if date_string < date_30_days_ago_string:
                    # Check if the collected_data is empty after the loop, return a special message or handle the case accordingly
                    return collected_data if collected_data else "No data from the last 30 days."
                else:
                    collected_data.extend(item)  # Append the dictionary item to collected_data

        except Exception as e:
            print(f"Error occurred: {e}")

        current_page += 1


In [55]:
import requests
import re
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_data_from_api(endpoint):
    response = requests.get(endpoint)
    return response.json()

def search_last_30_days_parallel(endpoint, num_workers=5):
    current_page = 1
    collected_data = []

    # Apply the regex on the current date
    date_30_days_ago = datetime.now() - timedelta(days=30)
    date_30_days_ago_string = re.findall(r"\d{4}-\d{2}-\d{2}", str(date_30_days_ago))[0]

    with ThreadPoolExecutor(max_workers=num_workers) as executor:
        while True:
            futures = {executor.submit(fetch_data_from_api, f"{endpoint}?page={i}"): i for i in range(current_page, current_page + num_workers)}

            for future in as_completed(futures):
                try:
                    response = future.result()

                    print(f"Consulted page: {futures[future]}")

                    for item in response:
                        # Extract year, month, day using a regex
                        date_string = re.findall(r"\d{4}-\d{2}-\d{2}", item["created_at"])[0]

                        if date_string < date_30_days_ago_string:
                            # Check if the collected_data is empty after the loop, return a special message or handle the case accordingly
                            return collected_data if collected_data else "No data from the last 30 days."
                        else:
                            collected_data.append(item)  # Append the dictionary item to collected_data

                except Exception as e:
                    print(f"Error occurred: {e}")

            current_page += num_workers
