In [1]:
print("hello")

hello


In [66]:
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

In [185]:
import asyncio

class CianAPI(object):
    def __init__(self, session, url=None):
        self.session = session
        self.url = url
        
    async def request_all_pages(self, min_price, max_price):  
        pages = []
        with ThreadPoolExecutor(16) as tp:
            loop = asyncio.get_event_loop()
            for page_from, page_to in [(1, 10), (11, 20), (21, 30), (31, 40), (41, 50), (51, 60), (61, 70)]: 
                chunk, stop = await self._next_chunk(tp, loop, min_price, max_price, page_from, page_to)
                pages += chunk
                if stop:
                    return pages
        
    async def _next_chunk(self, tp, loop, min_price, max_price, page_from, page_to):
        tasks = [
            loop.run_in_executor(tp, self._request_single_page, *(page, min_price, max_price))
            for page in range(page_from, page_to)
        ]

        results = []
        stop = False
        for code, html in await asyncio.gather(*tasks):
            if code == 301:
                stop = True
            elif code == 200:
                results.append(html)
            else:
                print('Error when download page')

        return results, stop
        
    def _request_single_page(self, page, min_price, max_price):
        r = self.session.get(self._make_url(page, min_price, max_price), allow_redirects=False)
        # print(f'Request page {page} [{min_price}; {max_price}]')
        return r.status_code, r.text
        
    def _make_url(self, page, min_price, max_price):
        if self.url is not None:
            return self.url.format(page=page)
        
        return f'https://www.cian.ru/cat.php?currency=2&deal_type=rent&engine_version=2&maxprice={max_price}&minprice={min_price}&offer_type=flat&p={page}&region=1&room1=1&room2=1&type=4'
 

async def do_fetch():
    with requests.Session() as s:
        api = CianAPI(s)
        cheap, expensive = [], []
        #cheap = await api.request_all_pages(10_000, 30_000)
        #expensive = await api.request_all_pages(100_000, 200_000)
        
    return cheap, expensive
        
cheap, expensive = await do_fetch()
print(f'Downloaded {len(cheap)} cheap pages, {len(expensive)} expensive pages')

Downloaded 0 cheap pages, 0 expensive pages


In [183]:
import shutil
import os

def download_image(url, directory, filename, session):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201'}
    fname = f'{directory}/{filename}'
    print(f'Downloading {fname}')
    r = session.get(url, headers=headers, stream=True)
    if r.status_code != 200:
        print(f'Could not download {url}')
        return
    r.raw.decode_content = True
    with open(fname,  'wb') as f:
        shutil.copyfileobj(r.raw, f)
    print(f'Image saved to {fname}')
    
print(f'Here we have {len(cheap)} cheap pages and {len(expensive)} pages')
    
async def download_all():
    for name, pages in [
        ('cheap', cheap),
        ('expensive', expensive)
    ]:
        shutil.rmtree(name, ignore_errors=True)
        os.mkdir(name)
        
        for page_index, page in enumerate(pages):
            soup = BeautifulSoup(page, 'lxml-xml')
            ads = soup.find_all('div', **{'data-name': lambda d: d in ['OfferCard', 'TopOfferCard']})
            
            images = []
            for ad in ads:
                imgs = ad.find_all('img', **{'data-name': 'GalleryImage'})
                images += [img['src'] for img in imgs]

            with requests.Session() as s:
                with ThreadPoolExecutor(32) as tp:
                    tasks = [
                        loop.run_in_executor(tp, download_image, *(image, f'{images}/name', f'{page_index}_{image_index}.jpg', s))
                        for image_index, image in enumerate(images)
                    ]
                    
                    await asyncio.gather(*tasks)
                    
await download_all()

Here we have 49 cheap pages and 49 pages
Downloading ['https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127887-4.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127890-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127891-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127897-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127899-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810103-4.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810102-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810105-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810107-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zeleny

OSError: [Errno 36] File name too long: "['https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127887-4.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127890-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127891-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127897-2.jpg', 'https://cdn-p.cian.site/images/8/721/559/kvartira-moskva-ulica-akademika-skryabina-955127899-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810103-4.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810102-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810105-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810107-2.jpg', 'https://cdn-p.cian.site/images/1/018/059/kvartira-moskva-zelenyy-prospekt-950810109-2.jpg', 'https://cdn-p.cian.site/images/4/251/559/kvartira-moskva-perervinskiy-bulvar-955152403-4.jpg', 'https://cdn-p.cian.site/images/4/251/559/kvartira-moskva-perervinskiy-bulvar-955152409-2.jpg', 'https://cdn-p.cian.site/images/4/251/559/kvartira-moskva-perervinskiy-bulvar-955152413-2.jpg', 'https://cdn-p.cian.site/images/4/251/559/kvartira-moskva-perervinskiy-bulvar-955152411-2.jpg', 'https://cdn-p.cian.site/images/4/251/559/kvartira-moskva-perervinskiy-bulvar-955152412-2.jpg', 'https://cdn-p.cian.site/images/2/296/208/kvartira-moskva-altayskaya-ulica-802692276-4.jpg', 'https://cdn-p.cian.site/images/2/296/208/kvartira-moskva-altayskaya-ulica-802692269-2.jpg', 'https://cdn-p.cian.site/images/2/296/208/kvartira-moskva-altayskaya-ulica-802692268-2.jpg', 'https://cdn-p.cian.site/images/6/651/459/kvartira-moskva-altayskaya-ulica-954156637-2.jpg', 'https://cdn-p.cian.site/images/6/651/459/kvartira-moskva-altayskaya-ulica-954156639-2.jpg', 'https://cdn-p.cian.site/images/3/971/159/kvartira-moskva-klyazminskaya-ulica-951179346-4.jpg', 'https://cdn-p.cian.site/images/3/971/159/kvartira-moskva-klyazminskaya-ulica-951179343-2.jpg', 'https://cdn-p.cian.site/images/3/971/159/kvartira-moskva-klyazminskaya-ulica-951179350-2.jpg', 'https://cdn-p.cian.site/images/3/971/159/kvartira-moskva-klyazminskaya-ulica-951179346-2.jpg', 'https://cdn-p.cian.site/images/3/971/159/kvartira-moskva-klyazminskaya-ulica-951179345-2.jpg', 'https://cdn-p.cian.site/images/0/881/559/kvartira-moskva-ulica-dmitrievskogo-955188054-4.jpg', 'https://cdn-p.cian.site/images/0/881/559/kvartira-moskva-ulica-dmitrievskogo-955188055-2.jpg', 'https://cdn-p.cian.site/images/0/881/559/kvartira-moskva-ulica-dmitrievskogo-955188053-2.jpg', 'https://cdn-p.cian.site/images/0/881/559/kvartira-moskva-ulica-dmitrievskogo-955188050-2.jpg', 'https://cdn-p.cian.site/images/0/881/559/kvartira-moskva-ulica-dmitrievskogo-955188056-2.jpg', 'https://cdn-p.cian.site/images/2/998/459/kvartira-moskva-ulica-pererva-954899235-4.jpg', 'https://cdn-p.cian.site/images/5/998/459/kvartira-moskva-ulica-pererva-954899577-2.jpg', 'https://cdn-p.cian.site/images/5/998/459/kvartira-moskva-ulica-pererva-954899578-2.jpg', 'https://cdn-p.cian.site/images/5/998/459/kvartira-moskva-ulica-pererva-954899598-2.jpg', 'https://cdn-p.cian.site/images/5/998/459/kvartira-moskva-ulica-pererva-954899597-2.jpg', 'https://cdn-p.cian.site/images/0/074/359/kvartira-moskva-luhmanovskaya-ulica-953470068-4.jpg', 'https://cdn-p.cian.site/images/0/074/359/kvartira-moskva-luhmanovskaya-ulica-953470063-2.jpg', 'https://cdn-p.cian.site/images/0/074/359/kvartira-moskva-luhmanovskaya-ulica-953470065-2.jpg', 'https://cdn-p.cian.site/images/0/074/359/kvartira-moskva-luhmanovskaya-ulica-953470060-2.jpg', 'https://cdn-p.cian.site/images/0/074/359/kvartira-moskva-luhmanovskaya-ulica-953470059-2.jpg', 'https://cdn-p.cian.site/images/5/744/459/kvartira-moskva-knyazhekozlovskiy-pereulok-954447508-4.jpg', 'https://cdn-p.cian.site/images/5/556/329/kvartira-moskva-knyazhekozlovskiy-pereulok-923655593-2.jpg', 'https://cdn-p.cian.site/images/5/556/329/kvartira-moskva-knyazhekozlovskiy-pereulok-923655591-2.jpg', 'https://cdn-p.cian.site/images/6/556/329/kvartira-moskva-knyazhekozlovskiy-pereulok-923655603-2.jpg', 'https://cdn-p.cian.site/images/6/556/329/kvartira-moskva-knyazhekozlovskiy-pereulok-923655608-2.jpg', 'https://cdn-p.cian.site/images/2/299/359/kvartira-moskva-proezd-dezhneva-953992260-4.jpg', 'https://cdn-p.cian.site/images/2/299/359/kvartira-moskva-proezd-dezhneva-953992265-2.jpg', 'https://cdn-p.cian.site/images/2/299/359/kvartira-moskva-proezd-dezhneva-953992260-2.jpg', 'https://cdn-p.cian.site/images/2/299/359/kvartira-moskva-proezd-dezhneva-953992263-2.jpg', 'https://cdn-p.cian.site/images/2/299/359/kvartira-moskva-proezd-dezhneva-953992257-2.jpg', 'https://cdn-p.cian.site/images/0/034/459/kvartira-moskva-beskudnikovskiy-bulvar-954430022-4.jpg', 'https://cdn-p.cian.site/images/0/034/459/kvartira-moskva-beskudnikovskiy-bulvar-954430023-2.jpg', 'https://cdn-p.cian.site/images/0/034/459/kvartira-moskva-beskudnikovskiy-bulvar-954430024-2.jpg', 'https://cdn-p.cian.site/images/0/034/459/kvartira-moskva-beskudnikovskiy-bulvar-954430025-2.jpg', 'https://cdn-p.cian.site/images/7/044/459/kvartira-moskva-mihnevskaya-ulica-954440791-4.jpg', 'https://cdn-p.cian.site/images/7/044/459/kvartira-moskva-mihnevskaya-ulica-954440784-2.jpg', 'https://cdn-p.cian.site/images/7/044/459/kvartira-moskva-mihnevskaya-ulica-954440786-2.jpg', 'https://cdn-p.cian.site/images/7/044/459/kvartira-moskva-mihnevskaya-ulica-954440785-2.jpg', 'https://cdn-p.cian.site/images/7/044/459/kvartira-moskva-mihnevskaya-ulica-954440788-2.jpg', 'https://cdn-p.cian.site/images/0/604/459/kvartira-moskva-3ya-parkovaya-ulica-954406042-4.jpg', 'https://cdn-p.cian.site/images/0/604/459/kvartira-moskva-3ya-parkovaya-ulica-954406042-2.jpg', 'https://cdn-p.cian.site/images/0/604/459/kvartira-moskva-3ya-parkovaya-ulica-954406028-2.jpg', 'https://cdn-p.cian.site/images/0/604/459/kvartira-moskva-3ya-parkovaya-ulica-954406045-2.jpg', 'https://cdn-p.cian.site/images/0/604/459/kvartira-moskva-3ya-parkovaya-ulica-954406043-2.jpg', 'https://cdn-p.cian.site/images/2/331/359/kvartira-moskva-ulica-kahovka-953133211-4.jpg', 'https://cdn-p.cian.site/images/2/331/359/kvartira-moskva-ulica-kahovka-953133206-2.jpg', 'https://cdn-p.cian.site/images/2/331/359/kvartira-moskva-ulica-kahovka-953133204-2.jpg', 'https://cdn-p.cian.site/images/2/331/359/kvartira-moskva-ulica-kahovka-953133209-2.jpg', 'https://cdn-p.cian.site/images/2/331/359/kvartira-moskva-ulica-kahovka-953133211-2.jpg', 'https://cdn-p.cian.site/images/5/172/159/951271529-4.jpg', 'https://cdn-p.cian.site/images/5/172/159/951271580-2.jpg', 'https://cdn-p.cian.site/images/6/172/159/951271637-2.jpg', 'https://cdn-p.cian.site/images/6/172/159/951271664-2.jpg', 'https://cdn-p.cian.site/images/7/172/159/951271703-2.jpg', 'https://cdn-p.cian.site/images/1/528/459/954825138-4.jpg', 'https://cdn-p.cian.site/images/1/528/459/954825140-2.jpg', 'https://cdn-p.cian.site/images/1/528/459/954825143-2.jpg', 'https://cdn-p.cian.site/images/1/528/459/954825149-2.jpg', 'https://cdn-p.cian.site/images/1/528/459/954825156-2.jpg', 'https://cdn-p.cian.site/images/3/223/459/954322350-4.jpg', 'https://cdn-p.cian.site/images/3/223/459/954322353-2.jpg', 'https://cdn-p.cian.site/images/3/223/459/954322355-2.jpg', 'https://cdn-p.cian.site/images/3/223/459/954322356-2.jpg', 'https://cdn-p.cian.site/images/3/223/459/954322358-2.jpg', 'https://cdn-p.cian.site/images/1/799/039/kvartira-moskva-elevatornaya-ulica-930997166-4.jpg', 'https://cdn-p.cian.site/images/2/799/039/kvartira-moskva-elevatornaya-ulica-930997239-2.jpg', 'https://cdn-p.cian.site/images/2/799/039/kvartira-moskva-elevatornaya-ulica-930997264-2.jpg', 'https://cdn-p.cian.site/images/2/799/039/kvartira-moskva-elevatornaya-ulica-930997281-2.jpg', 'https://cdn-p.cian.site/images/2/799/039/kvartira-moskva-elevatornaya-ulica-930997293-2.jpg', 'https://cdn-p.cian.site/images/7/553/259/kvartira-moskva-ulica-egora-abakumova-952355753-4.jpg', 'https://cdn-p.cian.site/images/7/553/259/kvartira-moskva-ulica-egora-abakumova-952355763-2.jpg', 'https://cdn-p.cian.site/images/7/553/259/kvartira-moskva-ulica-egora-abakumova-952355780-2.jpg', 'https://cdn-p.cian.site/images/8/553/259/kvartira-moskva-ulica-egora-abakumova-952355820-2.jpg', 'https://cdn-p.cian.site/images/1/753/259/kvartira-moskva-ulica-egora-abakumova-952357127-2.jpg', 'https://cdn-p.cian.site/images/7/384/359/kvartira-moskva-holmogorskaya-ulica-953483786-4.jpg', 'https://cdn-p.cian.site/images/7/384/359/kvartira-moskva-holmogorskaya-ulica-953483786-2.jpg', 'https://cdn-p.cian.site/images/7/384/359/kvartira-moskva-holmogorskaya-ulica-953483775-2.jpg', 'https://cdn-p.cian.site/images/7/384/359/kvartira-moskva-holmogorskaya-ulica-953483758-2.jpg', 'https://cdn-p.cian.site/images/7/384/359/kvartira-moskva-holmogorskaya-ulica-953483777-2.jpg', 'https://cdn-p.cian.site/images/6/830/529/kvartira-moskva-masterovaya-ulica-925038664-4.jpg', 'https://cdn-p.cian.site/images/6/830/529/kvartira-moskva-masterovaya-ulica-925038665-2.jpg', 'https://cdn-p.cian.site/images/6/830/529/kvartira-moskva-masterovaya-ulica-925038666-2.jpg', 'https://cdn-p.cian.site/images/6/830/529/kvartira-moskva-masterovaya-ulica-925038668-2.jpg', 'https://cdn-p.cian.site/images/6/830/529/kvartira-moskva-masterovaya-ulica-925038669-2.jpg', 'https://cdn-p.cian.site/images/1/520/529/kvartira-moskva-voronezhskaya-ulica-925025140-4.jpg', 'https://cdn-p.cian.site/images/1/520/529/kvartira-moskva-voronezhskaya-ulica-925025146-2.jpg', 'https://cdn-p.cian.site/images/1/520/529/kvartira-moskva-voronezhskaya-ulica-925025156-2.jpg', 'https://cdn-p.cian.site/images/1/520/529/kvartira-moskva-voronezhskaya-ulica-925025181-2.jpg', 'https://cdn-p.cian.site/images/2/520/529/kvartira-moskva-voronezhskaya-ulica-925025239-2.jpg', 'https://cdn-p.cian.site/images/7/930/529/kvartira-moskva-ulica-isakovskogo-925039746-4.jpg', 'https://cdn-p.cian.site/images/7/930/529/kvartira-moskva-ulica-isakovskogo-925039747-2.jpg', 'https://cdn-p.cian.site/images/7/930/529/kvartira-moskva-ulica-isakovskogo-925039749-2.jpg', 'https://cdn-p.cian.site/images/7/930/529/kvartira-moskva-ulica-isakovskogo-925039763-2.jpg', 'https://cdn-p.cian.site/images/7/930/529/kvartira-moskva-ulica-isakovskogo-925039772-2.jpg', 'https://cdn-p.cian.site/images/7/563/119/kvartira-moskva-mkad71-y-km-911365798-4.jpg', 'https://cdn-p.cian.site/images/6/563/119/kvartira-moskva-mkad71-y-km-911365692-2.jpg', 'https://cdn-p.cian.site/images/6/563/119/kvartira-moskva-mkad71-y-km-911365693-2.jpg', 'https://cdn-p.cian.site/images/6/563/119/kvartira-moskva-mkad71-y-km-911365691-2.jpg', 'https://cdn-p.cian.site/images/7/563/119/kvartira-moskva-mkad71-y-km-911365779-2.jpg', 'https://cdn-p.cian.site/images/5/444/459/kvartira-moskva-shipilovskaya-ulica-954444544-4.jpg', 'https://cdn-p.cian.site/images/5/444/459/kvartira-moskva-shipilovskaya-ulica-954444551-2.jpg', 'https://cdn-p.cian.site/images/5/444/459/kvartira-moskva-shipilovskaya-ulica-954444541-2.jpg', 'https://cdn-p.cian.site/images/5/444/459/kvartira-moskva-shipilovskaya-ulica-954444549-2.jpg', 'https://cdn-p.cian.site/images/5/444/459/kvartira-moskva-shipilovskaya-ulica-954444542-2.jpg', 'https://cdn-p.cian.site/images/5/253/459/kvartira-rasskazovka-3-ya-borovskaya-ulica-954352545-4.jpg', 'https://cdn-p.cian.site/images/5/253/459/kvartira-rasskazovka-3-ya-borovskaya-ulica-954352547-2.jpg', 'https://cdn-p.cian.site/images/5/253/459/kvartira-rasskazovka-3-ya-borovskaya-ulica-954352542-2.jpg', 'https://cdn-p.cian.site/images/5/253/459/kvartira-rasskazovka-3-ya-borovskaya-ulica-954352543-2.jpg', 'https://cdn-p.cian.site/images/5/253/459/kvartira-rasskazovka-3-ya-borovskaya-ulica-954352544-2.jpg', 'https://cdn-p.cian.site/images/9/659/159/kvartira-moskva-kubanskaya-ulica-951956934-4.jpg', 'https://cdn-p.cian.site/images/9/759/159/kvartira-moskva-kubanskaya-ulica-951957981-2.jpg', 'https://cdn-p.cian.site/images/9/759/159/kvartira-moskva-kubanskaya-ulica-951957967-2.jpg', 'https://cdn-p.cian.site/images/9/759/159/kvartira-moskva-kubanskaya-ulica-951957969-2.jpg', 'https://cdn-p.cian.site/images/9/759/159/kvartira-moskva-kubanskaya-ulica-951957996-2.jpg', 'https://cdn-p.cian.site/images/3/299/359/kvartira-moskva-erevanskaya-ulica-953992330-4.jpg', 'https://cdn-p.cian.site/images/6/839/259/kvartira-moskva-erevanskaya-ulica-952938655-2.jpg', 'https://cdn-p.cian.site/images/4/299/359/kvartira-moskva-erevanskaya-ulica-953992410-2.jpg', 'https://cdn-p.cian.site/images/3/299/359/kvartira-moskva-erevanskaya-ulica-953992342-2.jpg', 'https://cdn-p.cian.site/images/3/299/359/kvartira-moskva-erevanskaya-ulica-953992321-2.jpg']/name/0_14.jpg"

In [None]:
import tensorflow as tf

IMG_SIZE = 224

data_generator = tf.keras.preprocessing.image.ImageDataGenerator(
    shear_range=0.2,
    horizontal_flip=True,
    validation_split=0.15
)

train_generator = data_generator.flow_from_directory(
    directory='images',
    batch_size=32,
    target_size=(IMG_SIZE, IMG_SIZE),
    subset='training'
)

validation_generator = data_generator.flow_from_directory(
    directory='images',
    batch_size=32,
    target_size=(IMG_SIZE, IMG_SIZE),
    subset='validation'
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers.experimental import preprocessing

img_augmentation = Sequential(
    [
        preprocessing.RandomRotation(factor=0.15),
        preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
        preprocessing.RandomFlip(),
        preprocessing.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

In [None]:
import tensorflow as tf

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()


In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers
import matplotlib.pyplot as plt

import ssl

ssl._create_default_https_context = ssl._create_unverified_context

NUM_CLASSES = 2

def plot_hist(hist):
    plt.plot(hist.history["accuracy"])
    plt.plot(hist.history["val_accuracy"])
    plt.title("model accuracy")
    plt.ylabel("accuracy")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.show()

def build_model(num_classes):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = img_augmentation(inputs)
    model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model


with strategy.scope():
    model = build_model(num_classes=NUM_CLASSES)

epochs = 25  # @param {type: "slider", min:8, max:80}
hist = model.fit(train_generator, epochs=epochs, validation_data=validation_generator, verbose=2)
plot_hist(hist)


In [None]:
def unfreeze_model(model):
    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )


unfreeze_model(model)

epochs = 10  # @param {type: "slider", min:8, max:50}
hist = model.fit(train_generator, epochs=epochs, validation_data=validation_generator, verbose=2)
plot_hist(hist)


In [None]:
epochs = 3
hist = model.fit(train_generator, epochs=epochs, validation_data=validation_generator, verbose=2)
plot_hist(hist)


In [None]:
model.save('cian_model')

In [192]:
from tensorflow import keras
model = keras.models.load_model('cian_model')



In [193]:
from PIL import Image
import io
import numpy as np

def classify_image(url, session):
    def download_image(url):
        headers = {'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201'}
        r = session.get(url, headers=headers, stream=True)
        if r.status_code != 200:
            print(f'Could not download {url}')
            return
        r.raw.decode_content = True
        return r.content

    test_image = download_image(url)

    img = Image.open(io.BytesIO(test_image))
    img = img.convert('RGB')
    img = img.resize((IMG_SIZE, IMG_SIZE), Image.NEAREST)
    img = tf.keras.preprocessing.image.img_to_array(img)
    img = np.array([img])

    return model.predict(img)

with requests.Session() as s:
    print(classify_image('https://cdn-p.cian.site/images/5/143/749/947341508-1.jpg', s))

[[0.04558888 0.95441115]]


In [194]:
url = 'https://www.cian.ru/cat.php?currency=2&deal_type=rent&engine_version=2&foot_min=45&is_first_floor=0&maxprice=70000&metro%5B0%5D=4&metro%5B10%5D=46&metro%5B11%5D=47&metro%5B12%5D=50&metro%5B13%5D=54&metro%5B14%5D=56&metro%5B15%5D=58&metro%5B16%5D=61&metro%5B17%5D=64&metro%5B18%5D=66&metro%5B19%5D=68&metro%5B1%5D=8&metro%5B20%5D=71&metro%5B21%5D=77&metro%5B22%5D=78&metro%5B23%5D=80&metro%5B24%5D=84&metro%5B25%5D=85&metro%5B26%5D=86&metro%5B27%5D=96&metro%5B28%5D=98&metro%5B29%5D=103&metro%5B2%5D=9&metro%5B30%5D=105&metro%5B31%5D=114&metro%5B32%5D=115&metro%5B33%5D=116&metro%5B34%5D=118&metro%5B35%5D=119&metro%5B36%5D=121&metro%5B37%5D=123&metro%5B38%5D=124&metro%5B39%5D=125&metro%5B3%5D=12&metro%5B40%5D=129&metro%5B41%5D=130&metro%5B42%5D=132&metro%5B43%5D=143&metro%5B44%5D=145&metro%5B45%5D=148&metro%5B46%5D=149&metro%5B47%5D=150&metro%5B48%5D=159&metro%5B49%5D=310&metro%5B4%5D=15&metro%5B50%5D=350&metro%5B5%5D=18&metro%5B6%5D=20&metro%5B7%5D=33&metro%5B8%5D=36&metro%5B9%5D=38&minarea=45&minprice=40000&offer_type=flat&only_foot=2&p={page}&region=1&room1=1&room2=1&type=4'

with requests.Session() as s:
    api = CianAPI(s, url=url)
    interesting = await api.request_all_pages(40_000, 70_000)

In [195]:
THRESHOLD = 0.7
        
async def download_all():
    for page_index, page in enumerate(interesting):
        soup = BeautifulSoup(page, 'lxml-xml')
        ads = soup.find_all('div', **{'data-name': lambda d: d in ['OfferCard', 'TopOfferCard']})

        for ad in ads:
            images = []
            imgs = ad.find_all('img', **{'data-name': 'GalleryImage'})
            images += [img['src'] for img in imgs]

            #link = soup.find_all('a', target='_blank', href=lambda h: h.startswith('https://www.cian.ru/rent/flat'))
            link = ad.find_all('a', target='_blank', href=lambda h: h.startswith('https://www.cian.ru/rent/flat'))[0]['href']
            with requests.Session() as s:
                with ThreadPoolExecutor(32) as tp:
                    tasks = [
                        loop.run_in_executor(tp, classify_image, *(image, s))
                        for image in images
                    ]

                    scores = []
                    for score in await asyncio.gather(*tasks):
                        scores.append(score)

                    print(scores)
                    if len(scores) == 0:
                        continue
                        
                    median = np.mean(scores, axis=0)
                    if median[0][1] > THRESHOLD:
                        print(f'{link} :: {median} :: NICE ONE')
                    else:
                        print(f'{link} :: {median} :: TRASH')  
                        
                    
await download_all()

[array([[0.9989436 , 0.00105642]], dtype=float32), array([[0.9918315 , 0.00816852]], dtype=float32), array([[0.46625185, 0.5337481 ]], dtype=float32), array([[0.8657655 , 0.13423452]], dtype=float32), array([[0.8422217, 0.1577783]], dtype=float32)]
https://www.cian.ru/rent/flat/240212873/ :: [[0.83300287 0.16699716]] :: TRASH
[array([[0.15856497, 0.841435  ]], dtype=float32), array([[0.01437456, 0.98562545]], dtype=float32), array([[0.96221626, 0.0377837 ]], dtype=float32), array([[0.98084027, 0.0191597 ]], dtype=float32), array([[0.9803991 , 0.01960095]], dtype=float32)]
https://www.cian.ru/rent/flat/240752263/ :: [[0.619279   0.38072097]] :: TRASH
[array([[0.02813248, 0.9718675 ]], dtype=float32), array([[0.34404385, 0.6559562 ]], dtype=float32), array([[0.9660311 , 0.03396893]], dtype=float32), array([[0.35316548, 0.6468345 ]], dtype=float32), array([[0.06447387, 0.9355261 ]], dtype=float32)]
https://www.cian.ru/rent/flat/241142586/ :: [[0.35116935 0.64883065]] :: TRASH
[array([[0.9