## Save listing contents as text
Save the contents of each listing (house or apartment) into a text file, to be used later by BeautifulSoup.

In [1]:
# Import necessary libraries
import os
import pandas as pd

In [2]:
# Create a folder for data
main_path = os.path.abspath("")

In [3]:
csv_raw_file_name = "real_estate_offers_raw.csv"
csv_raw_file_path = os.path.join(main_path,csv_raw_file_name)

In [4]:
# Load csv file for further processing
df_raw = pd.read_csv(csv_raw_file_path)

In [5]:
# Clean the data

In [6]:
csv_processed_file_name = "real_estate_offers.csv"
csv_processed_file_path = os.path.join(main_path,csv_raw_file_name)

In [7]:
df_raw.rename(columns={'House Id': 'Listing Id', 'House Type': 'Property Type', 'Subtype': 'Property Subtype', 'Kitchen Type': 'Fully Equipped Kitchen','Room Number': 'Number of Rooms'}, inplace=True)

In [8]:
print(df_raw)

      Listing Id  Locality Property Type      Property Subtype    Price  \
0        4045224      8660         house                 house  1450000   
1        5750433      1180         house                 villa  2750000   
2        6532849      1150         house                 villa  3800000   
3        7029637      2890         house                 house   210000   
4        7137615      4500         house  exceptional property   600000   
...          ...       ...           ...                   ...      ...   
9235     9565021      4540         house                 house   369000   
9236     9565110      8790     apartment             apartment   369500   
9237     9565111      8790     apartment             apartment   369500   
9238     9565129      9700         house               mansion   385000   
9239     9565133      9700         house    mixed use building   385000   

     Fully Equipped Kitchen  Number of Rooms    Area  Garden Terrace  \
0             usa installed

In [9]:
df_raw["Listing Id"].duplicated()

0       False
1       False
2       False
3       False
4       False
        ...  
9235    False
9236    False
9237    False
9238    False
9239    False
Name: Listing Id, Length: 9240, dtype: bool

In [10]:
df_raw["Property Type"].unique()

array(['house', 'apartment'], dtype=object)

In [11]:
df_raw["Property Subtype"].unique()

array(['house', 'villa', 'exceptional property', 'apartment',
       'town house', 'mansion', 'mixed use building', 'triplex',
       'penthouse', 'ground floor', 'duplex', 'service flat', 'farmhouse',
       'loft', 'apartment block', 'flat studio', 'country cottage',
       'bungalow', 'other property', 'kot', 'castle', 'chalet',
       'manor house', 'pavilion'], dtype=object)

In [12]:
df_raw["Price"].unique()

array([1450000, 2750000, 3800000, ...,  483500,  797000,  369500],
      dtype=int64)

In [13]:
df_raw["Fully Equipped Kitchen"].unique()

array(['usa installed', 'hyper equipped', 'semi equipped', nan,
       'installed', 'usa hyper equipped', 'not installed',
       'usa uninstalled', 'usa semi equipped'], dtype=object)

In [14]:
df_raw["Fully Equipped Kitchen"].isnull()

0       False
1       False
2       False
3       False
4        True
        ...  
9235    False
9236     True
9237     True
9238    False
9239    False
Name: Fully Equipped Kitchen, Length: 9240, dtype: bool

In [15]:
df_raw["Fully Equipped Kitchen"].replace(['semi equipped', 'not installed', 'usa uninstalled', 'usa semi equipped'], 0, inplace=True)

In [16]:
df_raw["Fully Equipped Kitchen"].replace(['usa installed', 'hyper equipped', 'installed', 'usa hyper equipped'], 1, inplace=True)

In [17]:
df_raw["Fully Equipped Kitchen"].fillna(0, inplace = True)

In [18]:
df_raw["Fully Equipped Kitchen"].unique()

array([1., 0.])

In [19]:
#df_raw["New Building"].replace(['True', 1], inplace = True)
#df_raw["New Building"].replace(['False', 0], inplace = True)

In [20]:
df_raw["New Building"].unique()

array([ True, False])

In [21]:
# Save csv file
df_raw.to_csv(csv_processed_file_path, index=False)