In [15]:
import requests
from bs4 import BeautifulSoup
from time import sleep
from tqdm import tqdm
import pandas as pd
import numpy as np
import re
import json
from gensim.utils import deaccent
import random
import os
import math

#geocoding
from bokeh.io import output_notebook, show, output_file
from bokeh.plotting import figure
from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar, NumeralTickFormatter
from bokeh.palettes import brewer

from bokeh.io.doc import curdoc
from bokeh.models import Slider, HoverTool, Select
from bokeh.layouts import widgetbox, row, column
import geopy
import geopandas as gpd
from geopy.extra.rate_limiter import RateLimiter

#visualisation
import folium
import folium.plugins as plugins

## Establishing functions

In [25]:
 def get_soups(links, name):
        '''
        This function iterates over all search pages, converts them into a BeautifulSoup object and stores them in a txt file as 
        strings outside of this script. BREAKHERE is used to distinguish between objects. 
        '''
        soups = []
        for link in links:
            sleep(random.uniform(0.5, 2))
            request = requests.get(link)
            request.encoding='UTF-8'
            soups.append(BeautifulSoup(request.text,'lxml'))
        with open(name, 'w') as f:
            for s in soups:
                f.write(str(deaccent(s).encode("utf-8")) + 'BREAKHERE')
            f.close

## Getting the data from websites

In [26]:
fileDir = os.path.dirname(os.path.realpath('__file__'))

In [18]:
counter = 0

In [19]:
class DownloaderBezRealitky(): #error prone, need to correct
    def __init__(self):
        '''
        For the bezrealitky search, you need to iterate over search pages. Self.page_bezrealitky stores the maximum amount
        of pages and then via self.link a list of all pages from search is created in self.hrefs_bezrealitky.
        '''
        self.link = 'https://www.bezrealitky.cz/vypis/nabidka-pronajem/byt/praha?_token=pr1lf-vKwDFfmFbICiz2PfC-Zdwq-2JolXi4MeMHsrw&page=1'
        self.request = requests.get(self.link)
        self.request.encoding='UTF-8'
        self.soup = BeautifulSoup(self.request.text,'lxml')
        self.page_bezrealitky = int(self.soup.findAll('a',{'class':'page-link pagination__page'})[-2].text)
        self.hrefs_bezrealitky = ['https://www.bezrealitky.cz/vypis/nabidka-pronajem/byt/praha?_token=pr1lf-vKwDFfmFbICiz2PfC-Zdwq-2JolXi4MeMHsrw&page=' 
                                  + str(i) for i in range(1,self.page_bezrealitky)]
        self.soups = []
        self.counter = counter

    def get_data(self):
        '''
        Main method to obtain and transform the data. HTMLs are read from the txt file and stored in a list (soup_list) 
        within this script. Next, the method iterates over the list, converts the strings in txt file into a BeautifulSoup
        object and parses the html for relevant data. At the end, a nested dictionary (dicts) is created and stored
        as a json file outside of this script.
        '''
        with open(fileDir + '\\Data\\bezrealitky_links.txt', 'r') as f:
            content = f.read()
        soup_list = content.split('BREAKHERE')
        dicts = {}
        counter = 0
        for soup in soup_list:
            descrips = [] #empty list for apartment values
            values = [] #empty list for apartment prices
            vals = BeautifulSoup(soup,'lxml').findAll('strong', {'class':'product__value'}) #parsing for apartment values
            ##vals = soup.findAll('strong', {'class':'product__value'})
            for vl in vals:
                values.append(vl.text.strip())
            #img = soup.findAll('img')
            img = BeautifulSoup(soup,'lxml').findAll('img') #parsing for apartment info (street, city, size..)
            for i in img:
                if 'Pronajem' and 'obr. c. 1' in i['alt']: #info present at all pictures, let's take info from the first one
                        info = i['alt'].split(',')[0:4] #info separated by comma, split into a list
                        if 'Praha' == info[-1].strip(): #if street non present, insert a NaN instead
                            info.insert(2, 'NaN')
                            del info[-1]
                            m = info[1].split(' ')
                            info[1] = m[1]
                            descrips.append(info)
                        else:
                            m = info[1].split(' ')
                            info[1] = m[1]
                            descrips.append(info)
            count = 0
            for pp in values: #append apartment prices to info about apartments in list descrips
                try:
                    descrips[count].append(pp)
                    descrips[count][0] = descrips[count][0][-4:].strip()
                    count += 1
                except IndexError:
                    count += 1
                    continue
            for item in descrips:
                try:
                    if '+' in item[4]: #prices often written as '19000 Kč + 4000Kč' so we need to split it
                        prices = item.pop(4).split('+')
                        item.append(re.sub("[^0-9]", "", prices[0])) #keep only numeric characters, i.e. price
                        item.append(re.sub("[^0-9]", "", prices[1]))
                    else:
                        prices = [item.pop(4), '0'] #if only '19000 Kč', insert 0 as price for utilities not specified
                        item.append(re.sub("[^0-9]", "", prices[0]))
                        item.append(re.sub("[^0-9]", "", prices[1]))
                except IndexError:
                    continue
            for item in descrips: #store apartment info, price into a dictionary and index by counter
                try:
                    dict = {}
                    dict['Size'] = item[0]
                    dict['m2'] = re.sub("[^0-9]", "", item[1]) #keep only size, i.e. numeric characters
                    dict['Street'] = deaccent(item[2]) #deaccent to provent potential errors
                    dict['District'] = deaccent(item[3])
                    dict['Base Price'] = int(item[4])
                    dict['Utilities Price'] = int(item[5])
                    dict['Total Price'] = int(item[4]) + int(item[5])
                    dict['Source'] = 'bezrealitky.cz'
                    dicts[self.counter] = dict
                    self.counter += 1
                except IndexError:
                    #counter +=1
                    continue
            print('Done loop number ' + str(self.counter) + '. Printing descrips.')
        with open(fileDir + '\\Data\\bezrealitky.json', 'w') as write_file: #store data into a json file
            json.dump(dicts, write_file, indent = 4)

        
            
        

In [20]:
a = DownloaderBezRealitky()

In [21]:
get_soups(a.hrefs_bezrealitky[0:2], fileDir + '\\Data\\bezrealitky_links.txt')

In [22]:
a.get_data()

Done loop number 10. Printing descrips.
Done loop number 20. Printing descrips.
Done loop number 20. Printing descrips.


In [23]:
counter = a.counter

In [24]:
class DownloaderReality():
    def __init__(self):
        '''
        For the reality search, you need to iterate over search pages. Self.page_reality stores the maximum amount
        of pages and then via self.link a list of all pages from search is created in self.hrefs_reality.
        '''
        self.link = 'https://reality.idnes.cz/s/pronajem/byty/praha/?page=1'
        self.request = requests.get(self.link)
        self.request.encoding='UTF-8'
        self.soup = BeautifulSoup(self.request.text,'lxml')
        self.page_reality = int(self.soup.findAll('a',{'class':'btn btn--border paging__item'})[-1].text) - 1
        self.hrefs_reality = ['https://reality.idnes.cz/s/pronajem/byty/praha/?page=' 
                                  + str(i) for i in range(1,self.page_reality)]
        self.soups = []
        self.counter = counter
        
    def get_data(self):
        '''
        Main method to obtain and transform the data. HTMLs are read from the txt file and stored in a list (soup_list) 
        within this script. Next, the method iterates over the list, converts the strings in txt file into a BeautifulSoup
        object and parses the html for relevant data. At the end, a nested dictionary (dicts) is created and stored
        as a json file outside of this script.
        '''
        with open(fileDir + '\\Data\\reality_idnes_links.txt', 'r') as f:
            content = f.read()
        soup_list = content.split('BREAKHERE')
        dicts = {}
        counter = 0
        for soup in soup_list:
            descrips = [] #empty list for apartment values
            values = [] #empty list for apartment prices
            info_size = []
            apartments = []
            vals = BeautifulSoup(soup,'lxml').findAll('p', {'class':'c-list-products__price'}) #parsing for apartment values
            for vl in vals: #adding values
                values.append(re.sub("[^0-9]", "",vl.find('strong').text))
                
            locs = BeautifulSoup(soup,'lxml').findAll('p', {'class':'c-list-products__info'})
            for i in locs: #adding location
                if 'Komercni sdeleni' in i.text:
                    continue
                else:
                    temp_info = str(i.text)
                    temp_info = re.sub(r'^(?:\\n)+','', temp_info).strip()[:-2]
                    temp_info = temp_info.strip().split(',')
                    temp_info = [i.strip() for i in temp_info]
                    if len(temp_info) == 1:
                        temp_info.append(temp_info[0])
                        temp_info[0] = 'NaN'
                    if len(temp_info) == 3:
                        del temp_info[2]
                    descrips.append(temp_info)
                    
            sizes = BeautifulSoup(soup,'lxml').findAll('h2', {'class':'c-list-products__title'})
            for s in sizes: #adding size and m2
                try:
                    item = s.text.split('bytu')[1].strip()[:-2]
                    temp = item.split(',')
                    temp[1] = temp[1][:-10].strip()
                    info_size.append(temp)
                except IndexError:
                    continue
            
            for apart in range(0,len(info_size)):
                apartments.append(info_size[apart] + descrips[apart] + [values[apart]])
                
            for item in apartments: #store apartment info, price into a dictionary and index by counter
                try:
                    dict = {}
                    dict['Size'] = item[0]
                    dict['m2'] = item[1]
                    dict['Street'] = deaccent(item[2]) #deaccent to provent potential errors
                    dict['District'] = deaccent(item[3])
                    dict['Base Price'] = int(item[4])
                    dict['Utilities Price'] = 0
                    dict['Total Price'] = int(item[4])
                    dict['Source'] = 'reality.idnes.cz'
                    dicts[self.counter] = dict
                    self.counter +=1
                except ValueError:
                    #counter += 1
                    continue
            print('Done loop number ' + str(self.counter) + '. Printing apartments.')
        with open(fileDir + '\\Data\\idnes_reality.json', 'w') as write_file: #store data into a json file
            json.dump(dicts, write_file, indent = 4)

In [25]:
b = DownloaderReality()

In [26]:
get_soups(b.hrefs_reality[0:2], fileDir + '\\Data\\reality_idnes_links.txt')

In [27]:
b.get_data()

Done loop number 40. Printing apartments.
Done loop number 60. Printing apartments.
Done loop number 60. Printing apartments.


In [28]:
class DownloaderCeskeReality():
    def __init__(self):
        '''
        For the reality search, you need to iterate over search pages. Self.page_reality stores the maximum amount
        of pages and then via self.link a list of all pages from search is created in self.hrefs_reality.
        '''
        self.link = 'https://www.ceskereality.cz/pronajem/byty/praha/?strana=2'

        self.request = requests.get(self.link)
        self.request.encoding='UTF-8'
        self.soup = BeautifulSoup(self.request.text, 'html.parser')
        
        self.page_ceskereality = int([page.text for page in self.soup.findAll('ul',{'class':'pages'})[0]][-2]) - 1
        self.hrefs_reality = ['https://www.ceskereality.cz/pronajem/byty/praha/?strana=' 
                        + str(i) for i in range(1,self.page_ceskereality)]
        self.soups = []
        
    def get_soups(self):
        '''
        This method iterates over all search pages, converts them into a BeautifulSoup object and stores them in a txt file as 
        strings outside of this script. BREAKHERE is used to distinguish between objects. 
        '''
        for link in self.hrefs_reality[0:3]:
            sleep(random.uniform(0.5, 2))
            self.link = link
            self.request = requests.get(self.link)
            self.request.encoding='utf-8'
            self.soups.append(BeautifulSoup(self.request.text,'html.parser'))
            print('Page saved.')
            print(self.soups)
        with open('ceske_reality_links.txt', 'w') as f:
            for s in self.soups:
                f.write(str(deaccent(s).encode("utf-8")) + 'BREAKHERE')
            f.close
    
    def get_data(self):
        '''
        Main method to obtain and transform the data. HTMLs are read from the txt file and stored in a list (soup_list) 
        within this script. Next, the method iterates over the list, converts the strings in txt file into a BeautifulSoup
        object and parses the html for relevant data. At the end, a nested dictionary (dicts) is created and stored
        as a json file outside of this script.
        '''
        with open('ceske_reality_links.txt', 'r') as f:
            content = f.read()
        soup_list = content.split('BREAKHERE')
        dicts = {}
        #counter = 0
        for soup in soup_list[0:1]:
            descrips = [] #empty list for apartment values
            values = [] #empty list for apartment prices
            info_size = []
            apartments = []
            vals = BeautifulSoup(soup,'lxml').findAll('div', {'class':'cena'}) #parsing for apartment values
            for value in vals:
                values.append(re.sub("[^0-9]", "",value.text.split(',')[0]))
            locs = BeautifulSoup(soup,'lxml').findAll('div', {'class':'div_nemovitost suda'})
            for item in locs:
                print(item.text)
            #print(locs)

            '''
            for item in apartments: #store apartment info, price into a dictionary and index by counter
                try:
                    dict = {}
                    dict['Size'] = item[0]
                    dict['m2'] = item[1]
                    dict['Street'] = deaccent(item[2]) #deaccent to provent potential errors
                    dict['District'] = deaccent(item[3])
                    dict['Base Price'] = int(item[4])
                    dict['Utilities Price'] = 0
                    dict['Total Price'] = int(item[4]) 
                    dicts[counter] = dict
                    counter +=1
                except ValueError:
                    counter += 1
                    continue
            print('Done loop number ' + str(counter) + '. Printing apartments.')
        with open('idnes_reality.json', 'w') as write_file: #store data into a json file
            json.dump(dicts, write_file, indent = 4)      
            '''          

In [None]:
c = DownloaderCeskeReality()
c.get_soups()

In [None]:
c.get_data()

In [6]:
get_soups(c.hrefs_reality[0:2], 'blah.txt')

## Combining the fetched data into one file

In [27]:
big_dict = []
data = {}
def data_combine(*args):
    #input example - 'idnes_reality.json', 'bezrealitky.json'
    for arg in args:
        with open(fileDir + '\\Data\\' + arg) as json_file:
            file_ = json.load(json_file)
            big_dict.append(file_)
    for dt in big_dict:
        data.update(dt)

data_combine('bezrealitky.json', 'idnes_reality.json')

In [28]:
dataframe = pd.DataFrame(data).T
dataframe = dataframe.replace('NaN', '', regex=True)
dataframe['Address'] = dataframe['Street'] + ',' + dataframe['District'] + ',' + 'Praha'
dataframe

Unnamed: 0,Size,m2,Street,District,Base Price,Utilities Price,Total Price,Source,Address
0,3+1,95,Korunni,Praha - Vinohrady,32500,5000,37500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
1,3+1,95,Korunni,Praha - Vinohrady,12000,750,12750,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
2,3+1,95,Korunni,Praha - Vinohrady,18000,3500,21500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
3,3+1,95,Korunni,Praha - Vinohrady,16000,3500,19500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
4,3+1,95,Korunni,Praha - Vinohrady,12000,3500,15500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
5,3+1,95,Korunni,Praha - Vinohrady,29000,3500,32500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
6,3+1,95,Korunni,Praha - Vinohrady,14000,2000,16000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
7,3+1,95,Korunni,Praha - Vinohrady,11000,2000,13000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
8,3+1,95,Korunni,Praha - Vinohrady,17000,5500,22500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"
9,3+1,95,Korunni,Praha - Vinohrady,14500,3500,18000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha"


## Data Visualisation - Apartment locations

In [36]:
locator = geopy.Nominatim(user_agent='myGeocoder')

In [51]:
geocode = RateLimiter(locator.geocode) #, min_delay_seconds=1)

dataframe['location'] = dataframe['Address'].apply(geocode)

dataframe['point'] = dataframe['location'].apply(lambda loc: tuple(loc.point) if loc else None)

dataframe[['latitude', 'longitude', 'altitude']] = pd.DataFrame(dataframe['point'].tolist(), index=dataframe.index)

In [52]:
dataframe

Unnamed: 0,Size,m2,Street,District,Base Price,Utilities Price,Total Price,Source,Address,location,point,latitude,longitude,altitude
0,3+1,95,Korunni,Praha - Vinohrady,32500,5000,37500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
1,3+1,95,Korunni,Praha - Vinohrady,12000,750,12750,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
2,3+1,95,Korunni,Praha - Vinohrady,18000,3500,21500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
3,3+1,95,Korunni,Praha - Vinohrady,16000,3500,19500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
4,3+1,95,Korunni,Praha - Vinohrady,12000,3500,15500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
5,3+1,95,Korunni,Praha - Vinohrady,29000,3500,32500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
6,3+1,95,Korunni,Praha - Vinohrady,14000,2000,16000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
7,3+1,95,Korunni,Praha - Vinohrady,11000,2000,13000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
8,3+1,95,Korunni,Praha - Vinohrady,17000,5500,22500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0
9,3+1,95,Korunni,Praha - Vinohrady,14500,3500,18000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829, 0.0)",50.075295,14.440283,0.0


In [57]:
dataframe['point'] = [tuple(list(x)[0:2]) for x in dataframe['point']]
dataframe

Unnamed: 0,Size,m2,Street,District,Base Price,Utilities Price,Total Price,Source,Address,location,point,latitude,longitude,altitude
0,3+1,95,Korunni,Praha - Vinohrady,32500,5000,37500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
1,3+1,95,Korunni,Praha - Vinohrady,12000,750,12750,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
2,3+1,95,Korunni,Praha - Vinohrady,18000,3500,21500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
3,3+1,95,Korunni,Praha - Vinohrady,16000,3500,19500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
4,3+1,95,Korunni,Praha - Vinohrady,12000,3500,15500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
5,3+1,95,Korunni,Praha - Vinohrady,29000,3500,32500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
6,3+1,95,Korunni,Praha - Vinohrady,14000,2000,16000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
7,3+1,95,Korunni,Praha - Vinohrady,11000,2000,13000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
8,3+1,95,Korunni,Praha - Vinohrady,17000,5500,22500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0
9,3+1,95,Korunni,Praha - Vinohrady,14500,3500,18000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...","(50.0752954, 14.4402829)",50.075295,14.440283,0.0


In [86]:
folium_map = folium.Map(location=[50.08804, 14.42076],
                        zoom_start=12,
                        tiles='cartodbpositron')

plugins.FastMarkerCluster(data=list(zip(dataframe['latitude'].values, dataframe['longitude'].values))).add_to(folium_map)

popup = folium.Popup(dataframe['Base Price'])
popup.add_to(folium_map)                          

folium_map

## Data Visualisations - Neighborhoods

In [20]:
with open('Praha.json') as data: #could be automated to get the data?
    hoods = json.loads(data.read())
    
gdf = gpd.GeoDataFrame.from_features(hoods["features"])
print(gdf.head())


                                            geometry  OBJECTID  \
0  POLYGON ((14.53373 50.16223, 14.53424 50.16156...         1   
1  POLYGON ((14.55493 50.08171, 14.55492 50.08173...         2   
2  POLYGON ((14.45063 50.01453, 14.45044 50.01456...         3   
3  POLYGON ((14.29321 50.07751, 14.29319 50.07755...         4   
4  POLYGON ((14.44975 50.13535, 14.44968 50.13546...         5   

        DAT_VZNIK       DAT_ZMENA       PLOCHA  ID  KOD_MC  \
0  20181106141412  20191105233720  10183715.83  25  547310   
1  20171110095818  20190423111436  13529637.40   9  547361   
2  20181009145125  20200203101156  23317909.05   4  547107   
3  20181106141412  20181106164427   3253142.41  52  547174   
4  20160422155519  20200102142501   4988872.32  46  547301   

             NAZEV_MC  KOD_MO KOD_SO  TID_TMMESTSKECASTI_P   POSKYT  \
0      Praha-Čakovice      94    221                    25  HMP-IPR   
1            Praha 14      94    141                     9  HMP-IPR   
2            Prah

Index(['geometry', 'OBJECTID', 'DAT_VZNIK', 'DAT_ZMENA', 'PLOCHA', 'ID',
       'KOD_MC', 'NAZEV_MC', 'KOD_MO', 'KOD_SO', 'TID_TMMESTSKECASTI_P',
       'POSKYT', 'ID_POSKYT', 'STAV_ZMENA', 'NAZEV_1', 'Shape_Length',
       'Shape_Area'],
      dtype='object')

In [None]:
gdf_indv = gpd.GeoDataFrame(dataframe, geometry = gpd.points_from_xy(dataframe.longitude, dataframe.latitude))
print(gdf_indv.head())

In [66]:
df_final = gpd.sjoin(gdf_indv, gdf, op='within') 
df_final

Unnamed: 0,Size,m2,Street,District,Base Price,Utilities Price,Total Price,Source,Address,location,...,NAZEV_MC,KOD_MO,KOD_SO,TID_TMMESTSKECASTI_P,POSKYT,ID_POSKYT,STAV_ZMENA,NAZEV_1,Shape_Length,Shape_Area
0,3+1,95,Korunni,Praha - Vinohrady,32500,5000,37500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
1,3+1,95,Korunni,Praha - Vinohrady,12000,750,12750,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
2,3+1,95,Korunni,Praha - Vinohrady,18000,3500,21500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
3,3+1,95,Korunni,Praha - Vinohrady,16000,3500,19500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
4,3+1,95,Korunni,Praha - Vinohrady,12000,3500,15500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
5,3+1,95,Korunni,Praha - Vinohrady,29000,3500,32500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
6,3+1,95,Korunni,Praha - Vinohrady,14000,2000,16000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
7,3+1,95,Korunni,Praha - Vinohrady,11000,2000,13000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
8,3+1,95,Korunni,Praha - Vinohrady,17000,5500,22500,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
9,3+1,95,Korunni,Praha - Vinohrady,14500,3500,18000,bezrealitky.cz,"Korunni, Praha - Vinohrady,Praha","(Korunní, Vinohrady, Praha, okres Hlavní město...",...,Praha 2,27,27,30,HMP-IPR,43,U,Praha 2,0.134652,4184938.0
