# Flats adverts on OLX mapping project

## Gathering adverts

In [259]:
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim
from datetime import date
import pyrosm
import pandas as pd
import requests
import colorsys
import folium

In [267]:
def main(url):
    soup = connect(url)
    num_pages = int(page_count(soup))
    # num_pages = 1 #for test purpose, only one page gathered

    for page in range(1,num_pages+1):
        parse(connect(url.replace("page=1",f"page={page}")))

def connect(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")

    return soup

def page_count(soup):
    # return soup.find_all("li", {"data-testid":"pagination-list-item"})[-1].text #finding amount of pages by checking last page link number
    return 3 # for test, only using 3 first pages

def parse(soup):
    cards = soup.find_all("div", {"data-cy": "l-card"})

    card_data(cards)

def card_data(cards):
    for card in cards:
        global df
        name = card.find("h6", {"class":"css-16v5mdi er34gjf0"}).text
        price = card.find("p", {"data-testid":"ad-price"}).text.replace(' ','').replace('zł','')
        negotiable = False
        if "donegocjacji" in price:
            price = float(price.strip('donegocjacji'))
            negotiable = True
        else:
            price = float(price)
        
        size = float(card.find("span", {"class":"css-643j0o"}).text.split(" - ")[0].replace(' ','').replace('m²','').replace(',','.'))
        mprice = float(card.find("span", {"class":"css-643j0o"}).text.split(" - ")[1].replace(' ','').replace('zł/m²',''))
        location = card.find("p", {"data-testid":"location-date"}).text.split(" - ")[0]
        if "Wybud" in location:
            location = location.removesuffix(' Wybudowanie')
            location = location.removesuffix(' Wybud.')

        district = None
        try:
            coords = geolocator.geocode(location + ", Pomorskie, Polska")
            lat = coords.latitude
            lon = coords.longitude
        except:
            lat = None
            lon = None

        if ", " in location:
            location = location.split(", ")[0]
            district = card.find("p", {"data-testid":"location-date"}).text.split(" - ")[0].split(", ")[1]

        dateposted = card.find("p", {"data-testid":"location-date"}).text.split(" - ")[1]
        if "Odświeżono dnia " in dateposted:
            dateposted = dateposted.strip('Odświeżono dnia ')
        elif "Dzisiaj" in dateposted:
            dateposted = date.today()

        df.loc[len(df)] = [name,price,size,mprice,location,district,lat,lon,dateposted,negotiable]

def hls_to_hex(h, l, s):
    h = h/360
    l = l/100
    s = s/100

    r,g,b = colorsys.hls_to_rgb(h, l, s)
    r_hex = str(hex(round(r*255)))[2:4]
    if len(r_hex) == 1:
        r_hex = "0" + r_hex
    g_hex = str(hex(round(g*255)))[2:4]
    if len(g_hex) == 1:
        g_hex = "0" + g_hex
    b_hex = str(hex(round(b*255)))[2:4]
    if len(b_hex) == 1:
        b_hex = "0" + b_hex
    return f"#{r_hex}{g_hex}{b_hex}"

df = pd.DataFrame(columns=['Name','Price','Size','Mprice','City','District','Lat','Lon','DatePosted','Negotiable'])

geolocator = Nominatim(user_agent="my_request")

main("https://www.olx.pl/nieruchomosci/mieszkania/sprzedaz/gdansk/?search%5Bdist%5D=30")
# main("https://www.olx.pl/nieruchomosci/dzialki/sprzedaz/gdansk/?search%5Bdist%5D=30&search%5Bprivate_business%5D=private&search%5Bfilter_enum_type%5D%5B0%5D=dzialki-budowlane")

Add separate list for City and coordinates, to avoid repeating search and make all work faster (1st try - 5m 37s, 2nd try - 6m 9s)

In [261]:
map = folium.Map(location=[54.36, 18.63],zoom_start=9,tiles="Stamen Toner")

fg = folium.FeatureGroup(name='Działki')

df['Mprice'].min()
df['Mprice'].max()

min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(df['Mprice'].values.reshape(-1, 1))
df['Mprice_n'] = X_train_minmax

for index, coordinates in df.iterrows():
    try:
        fg.add_child(folium.Circle(location=[coordinates["Lat"],coordinates['Lon']],popup=f"{coordinates['City']}, {coordinates['Mprice']}",radius=float(coordinates['Mprice_n']*500+500),color=hls_to_hex(100-coordinates['Mprice_n']*100,50,100)))
    except:
        print(f"location not found for {coordinates['City']}")

coords = geolocator.geocode("Wiele, województwo Pomorskie, Polska")
fg.add_child(folium.Circle(location=[coords.latitude,coords.longitude],popup=f"{coords}",radius=1000,color="#0000ff"))

map.add_child(fg)
map.save('analysis.html')

NameError: name 'preprocessing' is not defined

In [None]:
df[df['Name'].str.contains('Wiele')]

remove "Wybud." and "Wybudowanie" from name of location

## Displaying flats on map

In [268]:
from shapely.geometry import Point
import geopandas as gpd
import pandas as pd
import folium

gdf = gpd.read_file("trojmiasto.geojson")

In [275]:
df

Unnamed: 0,Name,Price,Size,Mprice,City,District,Lat,Lon,DatePosted,Negotiable
0,Luksusowy Apartament na Fort Forest w Gdyni,927000.0,62.0,14951.61,Gdynia,Chwarzno-Wiczlino,54.518092,18.440973,17 sierpnia 2023,False
1,Mieszkanie 2-pokojowe na Gdańskiej Żabiance na...,529000.0,40.0,13225.00,Gdańsk,Żabianka,54.421347,18.584149,Wejhera,True
2,Mieszkanie 2 pokojowe we wsi Kościeleczki,133000.0,54.9,2422.59,Kościeleczki,,54.065455,19.015425,2023-08-29,True
3,"2 POK. Piękne, PREMIUM, Starówka, blisko SKM, ...",683500.0,38.0,17986.84,Gdańsk,Śródmieście,54.352939,18.659908,2023-08-29,False
4,Mieszkanie ul. 10 Lutego Wejherowo 2-pokojowe,250000.0,37.0,6756.76,Wejherowo,,54.606068,18.231786,2023-08-29,False
...,...,...,...,...,...,...,...,...,...,...
148,"2-poziomowe mieszkanie, 4 pokoje, 89m2, balkon",820000.0,89.0,9213.48,Gdańsk,,54.348291,18.654023,2023-08-29,False
149,"2–pokojowe mieszkanie 46,20M2 do wprowadzenia!",699000.0,46.2,15129.87,Gdańsk,Jasień,54.326575,18.562338,2023-08-29,False
150,Promocja - mieszkanie 3-pokoje 55m2 - Bezpiecz...,199000.0,55.0,3618.18,Straszyn,,54.272249,18.582310,2023-08-29,False
151,Mieszkanie 55m² 3 pokoje - Bezpieczny Kredyt 2%,199000.0,55.0,3618.18,Pruszcz Gdański,,54.257648,18.650104,2023-08-29,False


In [272]:
points_gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Lon, df.Lat), crs="EPSG:4326")
points_gdf['point_name'] = ''

for point_id, point in points_gdf.iterrows():
    result = gdf['geometry'].contains(points_gdf.geometry[point_id])

    #checks if point in any of sub-polygons
    if result.any() == True:
        for idx, res in enumerate(result):
            if res == True:
                distr_name = gdf.loc[idx,'name']
                city_name = gdf.loc[idx,'is_in:city']
    else:
        distr_name = 'N/A'
        city_name = 'N/A'

    points_gdf.at[point_id,'point_name'] = city_name+", "+distr_name

In [276]:
mapPomerania = folium.Map(location=[54.43, 18.63],zoom_start=11)
g = folium.GeoJson(gdf, name="geojson").add_to(mapPomerania)
folium.GeoJsonTooltip(fields=["is_in:city","name"]).add_to(g)

for idx, coordinates in points_gdf.iterrows():
    # Place the markers with the popup labels and data
    try:
        mapPomerania.add_child(
            folium.Marker(
                location = [float(points_gdf.loc[idx].geometry.y), float(points_gdf.loc[idx].geometry.x)],
                popup = points_gdf.loc[idx]['Name'],
                icon = folium.Icon(color="red") if 'N/A' not in points_gdf.loc[idx]['point_name'] else folium.Icon(color="gray"),
            )
        )
    except:
        print(points_gdf.loc[idx]['Name'] + " location not found")

Przestronne słoneczne mieszkanie blisko centrum location not found
Przestronne słoneczne mieszkanie blisko centrum location not found


In [277]:
mapPomerania.save('analysis.html')
mapPomerania