# Скачиваем информацию о слабах ННР

In [2]:
import requests
import string
import itertools
import concurrent.futures
import time
import os
from tqdm import tqdm
import pandas as pd

### Проверяем все имена слабов формата LLDDDD.

Всего 6760000 вариантов. ~60 часов. Существующие номера сохраняем в файл base.txt

In [2]:
# Создаем директорию для хранения страниц, если она не существует
os.makedirs('D:/Python/coin_pages', exist_ok=True)

def slab_number_generator(start_number):
    letters = string.ascii_uppercase
    digits = string.digits

    start_letter1 = start_number[0].upper()
    start_letter2 = start_number[1].upper()
    start_digit = start_number[2:]

    # Генерируем все возможные комбинации номеров слаба
    for letter1 in letters[letters.index(start_letter1):]:
        for letter2 in letters if letter1 != start_letter1 else letters[letters.index(start_letter2):]:
            for digit_combination in itertools.product(digits, repeat=4):
                slab_number = f"{letter1}{letter2}{''.join(digit_combination)}"
                if slab_number >= start_number.upper():
                    yield slab_number


def check_file_exists(slab_number):
    url = f"https://nreestr.ru/img/all/bpict/{slab_number}r.jpg"
    try:
        response = requests.head(url)
        if response.status_code == 200:
            save_progress('D:/Python/coin_pages/base.txt',f'{slab_number},')
        return response.status_code == 200
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return False

def save_progress(filename, name):
    with open(filename, 'a') as file:
        file.write(name)


def main(start_number):
    slab_numbers = slab_number_generator(start_number)
    count = 0

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        future_to_slab = {executor.submit(check_file_exists, slab_number): slab_number for slab_number in slab_numbers}

        for future in concurrent.futures.as_completed(future_to_slab):
            slab_number = future_to_slab[future]
            try:
                result = future.result()
                count += 1
                if count % 6000 == 0:
                    print(f"Processed {count} slab numbers")
            except Exception as exc:
                print(f"Slab number {slab_number} generated an exception: {exc}")

if __name__ == "__main__":
    start_number = input("Enter the starting slab number (e.g., vf0000): ").upper()
    main(start_number)

Enter the starting slab number (e.g., vf0000):  ZN6379


An error occurred: HTTPSConnectionPool(host='nreestr.ru', port=443): Max retries exceeded with url: /img/all/bpict/ZO0547r.jpg (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001EBF207B7D0>, 'Connection to nreestr.ru timed out. (connect timeout=None)'))
Processed 6000 slab numbers
Processed 12000 slab numbers
Processed 18000 slab numbers
Processed 24000 slab numbers
Processed 30000 slab numbers
Processed 36000 slab numbers
Processed 42000 slab numbers
Processed 48000 slab numbers
Processed 54000 slab numbers
Processed 60000 slab numbers
An error occurred: HTTPSConnectionPool(host='nreestr.ru', port=443): Max retries exceeded with url: /img/all/bpict/ZT8876r.jpg (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x000001EBFB367BD0>, 'Connection to nreestr.ru timed out. (connect timeout=None)'))
Processed 66000 slab numbers
Processed 72000 slab numbers
Processed 78000 slab numbers
Processed 84000 slab numbers
Processed 90000 sla

### Скачиваем страницы относящиеся к монетам.

Создаем датафрейм из base.txt . Удаляем дубликаты.

In [3]:
import csv

with open('D:/Python/base.txt', 'r') as f:
    df = pd.DataFrame(f.read().split(','),columns=['number'])
print(df.head())

   number
0  AA0203
1  AA0509
2  AA0737
3  AA1075
4  AA1076


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121111 entries, 0 to 121110
Data columns (total 1 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   number  121111 non-null  object
dtypes: object(1)
memory usage: 946.3+ KB


In [5]:
df.pivot_table(index=df['number'].str[:2],values='number',aggfunc='count').describe()

Unnamed: 0,number
count,463.0
mean,261.578834
std,406.430292
min,1.0
25%,25.5
50%,85.0
75%,242.0
max,1895.0


Сохраняем датафрейм в виде csv.

In [13]:
df = df.drop_duplicates()
df.duplicated().sum()
df = df.drop(labels=[121110],axis= 0)
df.to_csv(r'D:\Python\df_base.csv')

In [14]:
df.tail()

Unnamed: 0,number
121105,ZZ9621
121106,ZZ9638
121107,ZZ9677
121108,ZZ9908
121109,ZZ9909


Скачиваем с сайта страницы для всех монет в базе. ~3ч.

In [5]:
def download_page(slab_number):
    url = f'https://nreestr.ru/coins/search/{slab_number}';
    try:
        response = requests.get(url)
        if response.status_code == 200:
            # Сохраняем страницу в файл
            filename = f"D:/Python/coin_pages/{slab_number}.html"
            with open(filename, 'w', encoding='utf-8') as file:
                file.write(response.text)
                
            url_r = f"https://nreestr.ru/img/all/bpict/{slab_number}r.jpg"  
            response = requests.get(url_r)   
            reversname = f"D:/Python/coin_pages/{slab_number}r.jpg"
            with open(reversname, 'wb') as file:
                file.write(response.content)    


            url_a = f"https://nreestr.ru/img/all/bpict/{slab_number}a.jpg"  
            response = requests.get(url_a)   
            reversname = f"D:/Python/coin_pages/{slab_number}a.jpg"
            with open(reversname, 'wb') as file:
                file.write(response.content)    

            
            time.sleep(0.2)    
            return f"Saved: {slab_number}"
        else:
            return f"Failed: {slab_number} (Status code: {response.status_code})"
    except Exception as e:
        return f"Error: {slab_number} ({e})"

In [None]:
count = 0
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    future_to_slab = {executor.submit(download_page, slab_number): slab_number for slab_number in df['number']}

    for future in concurrent.futures.as_completed(future_to_slab):
        slab_number = future_to_slab[future]
        try:
            result = future.result()
            count += 1
            if count % 200 == 0:
                print(f"Processed {count} slab numbers")
        except Exception as exc:
            print(f"Slab number {slab_number} generated an exception: {exc}")

In [6]:
download_page('YU2656')

'Saved: YU2656'