# Housing data preprocessing

In [1]:
import pandas as pd
import numpy as np

## URLs iterations

In [15]:
pre_df = pd.read_csv('../csv_data/temp/otodom_opole.csv')
pre_df.reset_index()
pre_df.head()

Unnamed: 0,title,price,area,rooms,url,city,address,district
0,"Przestronne, słoneczne mieszkanie 64 m2",572000,64.0,3,https://www.otodom.pl/pl/oferta/przestronne-sl...,Opole,ul. Emila Fieldorfa,Armii Krajowej
1,Stylowe 48m2 po generalnym remoncie - wprowadź...,539000,48.0,2,https://www.otodom.pl/pl/oferta/stylowe-48m2-p...,Opole,ul. Alojzego Dambonia,Zaodrze
2,Apartament z pięknym ogrodem,869000,85.0,3,https://www.otodom.pl/pl/oferta/apartament-z-p...,Opole,Cynamonowa,Chmielowice
3,"Stylowe, nowoczesne mieszkanie z ogródkiem",629000,49.5,2,https://www.otodom.pl/pl/oferta/stylowe-nowocz...,Opole,ul. Stawowa,Szczepanowice - Wójtowa Wieś
4,Mieszkanie 3pok. po remoncie na Zaodrze,559000,56.9,3,https://www.otodom.pl/pl/oferta/mieszkanie-3po...,Opole,ul. Szymona Koszyka,Zaodrze


In [16]:
urls_list = []

for i, row in pre_df.iterrows():
    urls_list.append(row['url'])

In [17]:
from bs4 import BeautifulSoup
import requests
import json


HEADERS = { "User-Agent": "Mozilla/5.0" }


def get_json_data_from_url(url: str):
    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, 'html.parser')
    script_tag = soup.find('script', id="__NEXT_DATA__", type="application/json")
    json_data = json.loads(script_tag.string)
    
    return json_data

In [42]:
def get_target_info(json_data: any, prop_name: str):
    try:
        value = json_data['target'][f'{prop_name}']
        if isinstance(value, list) and len(value) == 1:
            return str(value[0])
        return str(value)
    except:
        return 'none'


floor_no_info_list = []
heating_info_list = []
building_type_info_list = []
rent_info_list = []


for url in urls_list:
    json_data = get_json_data_from_url(url)['props']['pageProps']['ad']
    floor_no_info_list.append(get_target_info(json_data, 'Floor_no'))
    heating_info_list.append(get_target_info(json_data, 'Heating'))
    building_type_info_list.append(get_target_info(json_data, 'Building_type'))
    rent_info_list.append(get_target_info(json_data, 'Rent'))

In [47]:
temp_df = pd.DataFrame(
    {
        'floor_no': floor_no_info_list,
        'heating_info': heating_info_list,
        'building_type': building_type_info_list,
        'rent': rent_info_list
    }
)

In [48]:
temp_df.head()

Unnamed: 0,floor_no,heating_info,building_type,rent
0,floor_3,urban,block,none
1,floor_2,urban,block,527
2,ground_floor,gas,ribbon,300
3,ground_floor,urban,block,450
4,floor_4,urban,block,850


In [55]:
df = pd.concat([pre_df, temp_df], axis=1)
df.head(15)

Unnamed: 0,title,price,area,rooms,url,city,address,district,floor_no,heating_info,building_type,rent
0,"Przestronne, słoneczne mieszkanie 64 m2",572000,64.0,3,https://www.otodom.pl/pl/oferta/przestronne-sl...,Opole,ul. Emila Fieldorfa,Armii Krajowej,floor_3,urban,block,none
1,Stylowe 48m2 po generalnym remoncie - wprowadź...,539000,48.0,2,https://www.otodom.pl/pl/oferta/stylowe-48m2-p...,Opole,ul. Alojzego Dambonia,Zaodrze,floor_2,urban,block,527
2,Apartament z pięknym ogrodem,869000,85.0,3,https://www.otodom.pl/pl/oferta/apartament-z-p...,Opole,Cynamonowa,Chmielowice,ground_floor,gas,ribbon,300
3,"Stylowe, nowoczesne mieszkanie z ogródkiem",629000,49.5,2,https://www.otodom.pl/pl/oferta/stylowe-nowocz...,Opole,ul. Stawowa,Szczepanowice - Wójtowa Wieś,ground_floor,urban,block,450
4,Mieszkanie 3pok. po remoncie na Zaodrze,559000,56.9,3,https://www.otodom.pl/pl/oferta/mieszkanie-3po...,Opole,ul. Szymona Koszyka,Zaodrze,floor_4,urban,block,850
5,"Nowe M w Opolu – Atlantis Prestige, 46 m²",639000,48.6,2,https://www.otodom.pl/pl/oferta/nowe-m-w-opolu...,Opole,no address,Kolonia Gosławicka,floor_1,urban,block,600
6,Mieszkanie Bytnara Rudego Zwm + Balkon Armii K...,610000,61.9,3,https://www.otodom.pl/pl/oferta/mieszkanie-byt...,Opole,no address,Armii Krajowej,floor_6,urban,block,none
7,Apartament gotowy do wprowadzenia,869000,84.54,3,https://www.otodom.pl/pl/oferta/apartament-got...,Opole,Cynamonowa,Chmielowice,ground_floor,gas,apartment,350
8,2-pokojowe mieszkanie w Śródmieściu Opola,400000,42.7,2,https://www.otodom.pl/pl/oferta/2-pokojowe-mie...,Opole,no address,Śródmieście,floor_1,gas,block,245
9,"Zamieszkaj wygodnie! 3 pokoje z balkonem, po r...",589000,61.2,3,https://www.otodom.pl/pl/oferta/zamieszkaj-wyg...,Opole,ul. Szarych Szeregów,Armii Krajowej,floor_4,urban,block,750


In [57]:
df.to_csv('../csv_data/raw/opole.csv')