In [17]:
import os

import pandas as pd
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from geopy.geocoders import GoogleV3, Nominatim
import folium

In [2]:
load_dotenv()
GOOGLE_GEO_API_KEY = os.environ.get('GOOGLE_GEO_API_KEY')

In [3]:
def find_flat_in_minsk() -> list:
    _flats = []

    for page_number in range(1, 26):
        html_page_name = 'flat/%s.html' % page_number
        with open(html_page_name) as html_file:
            soup = BeautifulSoup(html_file, 'html.parser')

        prices = soup.find_all('span', attrs={'data-bind': "text: SearchApartments.formatPrice(apartment.price, 'USD')"})

        flats = soup.find_all('span', attrs={'class': 'classified__caption-item classified__caption-item_type', 
                                             'data-bind': 'text: SearchApartments.formatRentType(apartment.rent_type)'})

        addresses = soup.find_all('span', attrs={'class': 'classified__caption-item classified__caption-item_adress', 
                                                 'data-bind': 'text: apartment.location.user_address'})

        # print(page_number, '| prices:', len(prices), ', flats:', len(flats), ', addresses', len(addresses), '\n')

        for price, flat_size, address in zip(prices, flats, addresses):
            pr = int(price.text)
            fs = flat_size.text
            if fs == 'Комната':
                fs = 0.5
            else:
                fs = int(fs[0])

            ads = address.text

            _flats.append([pr, fs, ads])
    
    return _flats

In [4]:
# minsk_flats = find_flat_in_minsk()
print('Now avalible', len(minsk_flats), 'flats in Minsk')
minsk_flats[:5]

Now avalible 900 flats in Minsk


[[295, 1, 'Чернышевского, 7'],
 [900, 4, 'Калинина, 7А'],
 [470, 2, 'Мельникайте, 16'],
 [380, 1, 'Академика Фёдорова, 3'],
 [300, 2, 'Волоха, 7 к1']]

In [5]:
def find_coords(flat_list: list):
    for flat in flat_list:
        try:
            address = flat[2] + ', Minsk'
            google_geo = GoogleV3(api_key=GOOGLE_GEO_API_KEY)
            coords = google_geo.geocode(address)

            lat = round(float(coords.latitude), 7)
            lng = round(float(coords.longitude), 7)

            flat.append(lat)
            flat.append(lng)
            # print(lat, lng)
        except Exception as ex:
            print('Exception in google_location: %s' % ex)

In [6]:
# find_coords(minsk_flats)

In [7]:
minsk_flats[:5]

[[295, 1, 'Чернышевского, 7', 53.9269839, 27.6004055],
 [900, 4, 'Калинина, 7А', 53.9264201, 27.6057458],
 [470, 2, 'Мельникайте, 16', 53.9096188, 27.5449777],
 [380, 1, 'Академика Фёдорова, 3', 53.8729582, 27.6332679],
 [300, 2, 'Волоха, 7 к1', 53.8969071, 27.5218413]]

In [8]:
columns = ['price', 'flat-size', 'address', 'lat', 'lng']
df_flats = pd.DataFrame(data=minsk_flats, columns=columns)
df_flats.head()

Unnamed: 0,price,flat-size,address,lat,lng
0,295,1.0,"Чернышевского, 7",53.926984,27.600406
1,900,4.0,"Калинина, 7А",53.92642,27.605746
2,470,2.0,"Мельникайте, 16",53.909619,27.544978
3,380,1.0,"Академика Фёдорова, 3",53.872958,27.633268
4,300,2.0,"Волоха, 7 к1",53.896907,27.521841


In [9]:
# df_flats.to_csv('minsk_flats.csv')

In [12]:
df_flats = pd.read_csv('minsk_flats.csv')
df_flats.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
print(df_flats.shape)
df_flats.head()

(900, 5)


Unnamed: 0,price,flat-size,address,lat,lng
0,295,1.0,"Чернышевского, 7",53.926984,27.600406
1,900,4.0,"Калинина, 7А",53.92642,27.605746
2,470,2.0,"Мельникайте, 16",53.909619,27.544978
3,380,1.0,"Академика Фёдорова, 3",53.872958,27.633268
4,300,2.0,"Волоха, 7 к1",53.896907,27.521841


In [18]:
address = 'Minsk, BY'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geographical coordinate of Minsk are {}, {}.'.format(latitude, longitude))

The geographical coordinate of Minsk are 53.902334, 27.5618791.


In [19]:
map_minsk_flat = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, price, flat_size, address in zip(df_flats['lat'], df_flats['lng'], df_flats['price'], df_flats['flat-size'], df_flats['address']):
    label = '{}$, {}k, {}'.format(price, flat_size, address)
    label_f = folium.Popup(label, parse_html=True)
    tooltip = folium.Tooltip(label)
    folium.CircleMarker(
        (lat, lng),
        radius=5,
        popup=label_f,
        tooltip=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_minsk_flat)  
    
map_minsk_flat