# ID searching

* search for the earliest and latest shader ID of every year to put them into lists
* years 2014 till 2021 are part of shaders20k so maybe we can ignore them
* year 2024 is still ongoing, so we don't know for sure.

In [1]:
from functools import cache
from download import get_all_shaders
from download import get_shader as get_shader_int
import datetime

all_ids = get_all_shaders()
num_all_shaders = len(all_ids)

#maybe speed this up?
@cache
def get_shader(id):
    return get_shader_int(id)


oldest_shader = get_shader(all_ids[-1])
newst_shader = get_shader(all_ids[0])

def get_shader_date(shader_data):
    return datetime.datetime.fromtimestamp(float(shader_data["Shader"]["info"]["date"]))

earliest_date = get_shader_date(oldest_shader)
latest_date = get_shader_date(newst_shader)
print(f"Earliest shader: {earliest_date}")
print(f"Latest shader: {latest_date}")
print(f"Total number of shaders: {num_all_shaders}")

Earliest shader: 2013-01-02 11:17:05
Latest shader: 2024-07-30 13:56:02
Total number of shaders: 32659


In [10]:
from typing import Tuple

def find_shader_range_by_year(year: int, ids_to_check=all_ids) -> Tuple[str, int, str, int]:
    first_idx, first_id = find_first_shader_of_the_year(year, ids_to_check)
    # can we optimize the range for the end check?
    last_idx_of_younger, _ = find_first_shader_of_the_year(year+1, ids_to_check)
    last_idx = last_idx_of_younger + 1 if last_idx_of_next_year > 0 else 0
    last_id = get_shader(ids_to_check[last_idx])["Shader"]["info"]["id"]
    return first_idx, first_id, last_idx, last_id


def find_first_shader_of_the_year(year: int, ids_to_check=all_ids) -> Tuple[int, int]:
    """
    all_ids is newest first!
    returns the first and last shader of the given year.
    outputs the first_idx,
    """
    oldest_idx = len(ids_to_check) -1
    newest_idx = 0
    if get_shader_date(get_shader(ids_to_check[oldest_idx])).year >= year:
        print("year is too old, returning oldest item")
        return oldest_idx, get_shader(ids_to_check[oldest_idx])["Shader"]["info"]["id"]
    if get_shader_date(get_shader(ids_to_check[newest_idx])).year < year:
        print("year is too new, returning youngest item")
        return newest_idx, get_shader(ids_to_check[newest_idx])["Shader"]["info"]["id"]

    mid_idx = (oldest_idx-newest_idx) // 2
    mid_shader = get_shader(ids_to_check[mid_idx])
    mid_date = get_shader_date(mid_shader)
    prev_date = get_shader_date(get_shader(ids_to_check[mid_idx + 1]))
    # step one first beginning of year
    while not ((mid_date.year == year) and (prev_date.year < year)):
        print(mid_date, prev_date)
        if mid_date.year < year:
            oldest_idx = mid_idx
            mid_idx = (mid_idx + newest_idx) // 2
        else:
            newest_idx = mid_idx
            mid_idx = (mid_idx + oldest_idx) // 2
        mid_shader = get_shader(ids_to_check[mid_idx])
        mid_date = get_shader_date(mid_shader)
        prev_date = get_shader_date(get_shader(ids_to_check[mid_idx + 1]))
    first_id_of_year = mid_idx
    print("found transient:", mid_date, prev_date, "for beginning of year", year)
    return first_id_of_year, get_shader(ids_to_check[first_id_of_year])["Shader"]["info"]["id"]

def check_neighbors(idx, id_list):
    try:
        print(get_shader_date(get_shader(id_list[idx-1])), "<-- younger") # this can lap around, careful.
        print(get_shader_date(get_shader(id_list[idx])), "<-- to check")
        print(get_shader_date(get_shader(id_list[idx+1])), "<-- older")
    except IndexError:
        print("some neighbors out of range")

year = 2024

start_idx, start_id, end_idx, end_id = find_shader_range_by_year(year)
print(f"{year}: {start_idx} - {end_idx}")
print(f"{year}: {start_id} - {end_id}")

check_neighbors(start_idx, all_ids)
check_neighbors(end_idx, all_ids)

2020-10-05 17:33:59 2020-10-05 17:04:14
2022-08-21 23:19:27 2022-08-21 23:16:47
2023-08-06 14:21:25 2023-08-06 13:27:35
2024-02-07 19:28:36 2024-02-07 19:13:34
2023-11-02 08:16:32 2023-11-02 05:05:57
2023-12-14 16:57:26 2023-12-14 10:48:51
2024-01-09 19:03:46 2024-01-09 16:34:57
2023-12-26 17:01:55 2023-12-26 13:40:04
2024-01-02 20:00:17 2024-01-02 19:52:44
2023-12-30 15:00:50 2023-12-30 12:00:07
2024-01-01 22:13:55 2024-01-01 21:00:15
2023-12-31 08:35:03 2023-12-31 06:33:11
found: 2379 2023-12-31 21:49:34 for beginning of year
year is too new, returning youngest item
2024: 2379 - 0
2024: Xcj3Dz - lfScDD
2024-01-01 08:48:30 <-- younger
2024-01-01 07:44:01 <-- to check
2023-12-31 21:49:34 <-- older
2013-01-02 11:17:05 <-- younger
2024-07-30 13:56:02 <-- to check
2024-07-30 12:38:28 <-- older


In [17]:
ids_dir = "./data/ids"
for year in range(2013, 2025): # end is exclusive...
    start_idx, start_id, end_idx, end_id = find_shader_range_by_year(year)
    yearly_ids = all_ids[end_idx:start_idx+1]
    with open(f"{ids_dir}/api_{year}.txt", "w", encoding="utf-8") as f:
        f.write("\n".join(yearly_ids))

year is too old, returning oldest item
2020-10-05 17:33:59 2020-10-05 17:04:14
2018-06-11 18:51:33 2018-06-11 18:40:34
2016-10-30 12:21:10 2016-10-30 12:03:27
2015-12-01 03:42:52 2015-12-01 03:15:38
2015-03-15 21:22:04 2015-03-15 03:26:26
2014-05-18 14:59:27 2014-05-18 08:53:51
2013-08-29 03:55:43 2013-08-29 03:51:36
2014-02-05 00:33:52 2014-02-04 23:36:18
2013-11-01 12:09:37 2013-10-31 23:00:40
2013-12-11 04:24:27 2013-12-09 05:47:13
2014-01-09 03:40:47 2014-01-09 00:17:56
2013-12-18 17:20:49 2013-12-14 21:24:34
2013-12-22 02:18:36 2013-12-21 20:27:59
found: 32292 2013-12-24 16:19:42 for beginning of year
2020-10-05 17:33:59 2020-10-05 17:04:14
2018-06-11 18:51:33 2018-06-11 18:40:34
2016-10-30 12:21:10 2016-10-30 12:03:27
2015-12-01 03:42:52 2015-12-01 03:15:38
2015-03-15 21:22:04 2015-03-15 03:26:26
2014-05-18 14:59:27 2014-05-18 08:53:51
2013-08-29 03:55:43 2013-08-29 03:51:36
2014-02-05 00:33:52 2014-02-04 23:36:18
2013-11-01 12:09:37 2013-10-31 23:00:40
2013-12-11 04:24:27 2013-1

In [18]:
# check the number of IDs we wrote
sum = 0
saved_ids = []
for year in range(2013, 2025):
    with open(f"{ids_dir}/api_{year}.txt", "r", encoding="utf-8") as f:
        yearly_ids = f.readlines()
        print(year, len(yearly_ids))
        sum += len(yearly_ids)
        saved_ids += [yid.rstrip() for yid in yearly_ids]
        start_idx = all_ids.index(yearly_ids[0].strip())
        end_idx = all_ids.index(yearly_ids[-1].strip())
        print("start neighbors:")
        check_neighbors(start_idx, all_ids)
        print("end neighbors:")
        check_neighbors(end_idx, all_ids)
print(sum)
print(len(saved_ids))
print(num_all_shaders)
# missing shaders
print(set(all_ids) - set(saved_ids))

2013 366
start neighbors:
2014-01-06 12:40:33 <-- younger
2013-12-24 16:19:42 <-- to check
2013-12-22 02:18:36 <-- older
end neighbors:
2013-01-02 15:27:09 <-- younger
2013-01-02 11:17:05 <-- to check
some neighbors out of range
2014 365
start neighbors:
2015-01-02 18:56:56 <-- younger
2014-12-31 16:45:11 <-- to check
2014-12-30 16:35:11 <-- older
end neighbors:
2014-01-09 00:17:56 <-- younger
2014-01-06 12:40:33 <-- to check
2013-12-24 16:19:42 <-- older
2015 1427
start neighbors:
2016-01-02 02:53:45 <-- younger
2015-12-31 23:35:40 <-- to check
2015-12-30 09:35:47 <-- older
end neighbors:
2015-01-03 02:16:05 <-- younger
2015-01-02 18:56:56 <-- to check
2014-12-31 16:45:11 <-- older
2016 2293
start neighbors:
2017-01-01 07:54:52 <-- younger
2016-12-31 20:06:12 <-- to check
2016-12-31 18:38:50 <-- older
end neighbors:
2016-01-02 03:46:38 <-- younger
2016-01-02 02:53:45 <-- to check
2015-12-31 23:35:40 <-- older
2017 2611
start neighbors:
2018-01-01 01:02:13 <-- younger
2017-12-31 20:27:

In [105]:
get_shader_date(get_shader("Ms2SWW"))

datetime.datetime(2013, 1, 2, 11, 17, 5)