## Finding the most profitable towns in a CCAA given a max price

In [1]:
import pandas as pd
import math
from bs4 import BeautifulSoup
import requests
import nums_from_string as nfs
import numpy as np
from re import search
from random import randint
from time import sleep
import openpyxl
import xlrd
import lxml
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

#### Getting data about properties for sale to query accordingly properties for rent 

Importing properties for sale csv

In [2]:
data_sale_properties = pd.read_csv('/Users/ignaciolorenzoqueralt/Documents/Ironhack/Final Project/properties/sale/2021.11.04_cataluña_90000_500.csv', engine = 'python')
data_sale_properties = data_sale_properties.loc[:, ~data_sale_properties.columns.str.contains('^Unnamed')]

In [3]:
def checking_nulls(df):
    # This function shows which columns have null values and returns a df with only nulls
    for c in df.columns:
        null_count = df[c].isnull().sum()
        if null_count > 0:
            print ("The column ", c, " has ", null_count, " null values")
    nulls = df[df.isna().any(axis=1)]
    return nulls.head(3)

In [4]:
checking_nulls(data_sale_properties)

The column  name  has  3  null values
The column  area  has  3  null values
The column  m2  has  3  null values
The column  n_rooms  has  3  null values
The column  n_bath  has  16  null values
The column  price_m2  has  16  null values
The column  price  has  16  null values
The column  price_reduction  has  16  null values
The column  opportunity  has  16  null values
The column  last_update  has  16  null values
The column  description  has  16  null values
The column  url  has  29  null values
The column  geo_town  has  29  null values
The column  province  has  29  null values
The column  ccaa  has  29  null values


Unnamed: 0,name,area,m2,n_rooms,n_bath,price_m2,price,price_reduction,opportunity,last_update,description,url,geo_town,province,ccaa
656,Piso en Congost. Piso en venta en can bassa-...,Granollers,72,2,1.0,1.0,89550.0,0.0,no,15.0,Piso a reformar en Granollers!!,,,,
657,https://www.habitaclia.com/comprar-piso-en_ven...,granollers,barcelona,cataluña,,,,,,,,,,,
687,Piso en Sota el cami ral. Piso en venta en c...,Granollers,90,4,1.0,1.0,107000.0,0.0,no,11.0,R24 vende en exclusiva piso en Granollers.,,,,


In [5]:
data_sale_properties = data_sale_properties.dropna()

In [6]:
data_sale_properties.head(3)

Unnamed: 0,name,area,m2,n_rooms,n_bath,price_m2,price,price_reduction,opportunity,last_update,description,url,geo_town,province,ccaa
0,Piso Carrer mina. Fabuloso piso luminoso,Badalona,55,3,1.0,1.0,97000.0,18000.0,yes,2.0,"Piso de 55m², 1º de altura real, se compone de...",https://www.habitaclia.com/comprar-piso-fabulo...,badalona,barcelona,cataluña
1,Piso Carrer mina. Oportunidad por precio y zona,Badalona,55,2,1.0,1.0,97000.0,0.0,no,0.0,"Piso de 55m², 2º de altura real, se compone de...",https://www.habitaclia.com/comprar-piso-oportu...,badalona,barcelona,cataluña
2,Piso Avinguda marquès de montroig. Piso con 3...,Badalona,71,3,1.0,1.0,97260.0,0.0,yes,0.0,¡¡¡OPORTUNIDAD POR ESTADO Y PRECIO!!! Vivienda...,https://www.habitaclia.com/comprar-piso-con_3_...,badalona,barcelona,cataluña


In [7]:
def convert_to_num(df, column):
    for i,value in enumerate(df[column]):
        df[column] = df[column].astype(int)

In [8]:
convert_to_num(data_sale_properties, column = 'm2')
convert_to_num(data_sale_properties, column = 'n_rooms')
convert_to_num(data_sale_properties, column = 'n_bath')

In [9]:
features = data_sale_properties.filter(['m2','n_bath', 'n_rooms'], axis=1)

In [10]:
data_sale_properties.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
m2,6228.0,70.627649,29.413505,2.0,54.0,67.0,80.0,900.0
n_rooms,6228.0,2.618818,12.659536,1.0,2.0,3.0,3.0,999.0
n_bath,6228.0,1.306358,12.64978,1.0,1.0,1.0,1.0,999.0
price_m2,6228.0,246.736191,365.390565,1.0,1.0,1.0,611.25,999.0
price,6228.0,78490.940751,18951.891458,800.0,68000.0,83732.5,93525.0,108000.0
price_reduction,6228.0,2809.33815,26036.658592,0.0,0.0,0.0,3000.0,954000.0
last_update,6228.0,6.467245,6.024951,0.0,0.0,6.0,11.0,58.0


#### Importing geo data from csv file

In [11]:
geo_data = pd.read_csv('/Users/ignaciolorenzoqueralt/Documents/Ironhack/Final Project/data/province-town-n_props/geo-data_2021.10.23.csv')

In [12]:
geo_data.head(3)

Unnamed: 0.1,Unnamed: 0,town,n_properties,province,ccaa
0,0,alegria___dulantzi,26,alava,pais_vasco
1,1,amurrio,6,alava,pais_vasco
2,2,anana,2,alava,pais_vasco


In [13]:
max_price = data_sale_properties.price.quantile(0.5)*0.12/12
max_price

837.3249999999999

#### Get input from user

In [14]:
ccaa_lst = geo_data['ccaa'].unique().tolist()
ccaa = input("ccaa: ")
while ccaa not in ccaa_lst:
    ccaa = input("There was no match between your input and our ccaa, try again: ")

ccaa: cataluña


In [15]:
num_properties_per_town = ""
while num_properties_per_town == "":
    try:
        num_properties_per_town = int(input("Minimum number of properties per town: "))
    except: 
        num_properties_per_town = ""

Minimum number of properties per town: 100


In [16]:
filtered_df = geo_data[(geo_data['ccaa'] == ccaa) & (geo_data['n_properties'] > num_properties_per_town)]

In [17]:
filtered_df['ccaa'].unique()

array(['cataluña'], dtype=object)

In [18]:
filtered_df.shape

(156, 5)

In [19]:
'''
max_price = ""
while max_price == "":
    try:
        max_price = int(input("maximum price: "))
    except: 
        max_price = ""
'''

'\nmax_price = ""\nwhile max_price == "":\n    try:\n        max_price = int(input("maximum price: "))\n    except: \n        max_price = ""\n'

#### List of towns that we need to extract data from

In [20]:
towns = filtered_df[filtered_df['ccaa'] == ccaa].town.to_list()

In [21]:
towns

['abrera',
 'alella',
 'ametlla_del_valles',
 'arenys_de_mar',
 'arenys_de_munt',
 'argentona',
 'badalona',
 'badia_del_valles',
 'barbera_del_valles',
 'barcelona',
 'begues',
 'berga',
 'bigues_i_riells',
 'cabrera_de_mar',
 'cabrils',
 'caldes_de_montbui',
 'calella',
 'canet_de_mar',
 'canovelles',
 'canyelles',
 'capellades',
 'cardedeu',
 'castellar_del_valles',
 'castelldefels',
 'cerdanyola_del_valles',
 'cervello',
 'corbera_de_llobregat',
 'cornella_de_llobregat',
 'cubelles',
 'esparreguera',
 'esplugues_de_llobregat',
 'franqueses_del_valles',
 'garriga',
 'gava',
 'gelida',
 'granollers',
 'hospitalet_de_llobregat',
 'igualada',
 'llisa_d_amunt',
 'llisa_de_vall',
 'llinars_del_valles',
 'malgrat_de_mar',
 'manlleu',
 'manresa',
 'martorell',
 'masnou',
 'masquefa',
 'matadepera',
 'mataro',
 'moia',
 'molins_de_rei',
 'mollet_del_valles',
 'montcada_i_reixac',
 'montgat',
 'montornes_del_valles',
 'olesa_de_montserrat',
 'olivella',
 'palafolls',
 'palau_solita_i_plegama

Hay que hacer un for loop que para cada town saque el df y le haga un append a el global.

#### Extracting data from each town

In [22]:
property = "alquiler"
municipio = "hospitalet_de_llobregat"
ascensor = "-ascensor"

habitaciones = "hab="+"1"
baños = "&"+"ban="+"1"
maximum_price = "pmax="+str(max_price)
metros = "m2="+"50"

In [23]:
url = "https://www.habitaclia.com/"+property+ascensor+"-"+municipio+".htm?"+habitaciones+baños+maximum_price

In [24]:
#towns = ['barcelona']
'''
'alegria___dulantzi'
 'amurrio',
 'anana',
 'aramaio',
 'arminon',
 'arraia___maeztu',
 'arrazua___ubarrundia']
 '''

"\n'alegria___dulantzi'\n 'amurrio',\n 'anana',\n 'aramaio',\n 'arminon',\n 'arraia___maeztu',\n 'arrazua___ubarrundia']\n "

In [25]:
# Creation of the df to which we will append the properties of the selected towns.
name = []
town = []
area = []
geo_town = []
features = []
m2 = []
n_rooms = []
n_bath = []
price_m2 = []
description = []
price = []
opportunity = []
price_reduction = []
opportunity = []
last_update = []
url = []

x = min(len(name), len(town), len(area), len(description), len(price), len(last_update), len(url))
dct = {'name': name[:x], 'town': town[:x], 'area': area[:x], 'm2': m2[:x], 'n_rooms': n_rooms[:x], 'n_bath': n_bath[:x], 'price_m2': price_m2[:x] ,'price': price[:x], 'price_reduction': price_reduction[:x], 'opportunity':opportunity[:x], 'last_update': last_update[:x],  'description': description[:x], 'url':url[:x] }
df = pd.DataFrame.from_dict(dct)

# Pulling properties from each town in the previously defined list:

for t in towns:
    
    # Getting the number of properties for that town to see how many pages do we need to scrape.
    url = "https://www.habitaclia.com/"+property+"-"+t+".htm?"+maximum_price
    r = requests.get(url)
    r.status_code
    soup = BeautifulSoup(r.content, 'html.parser')
    
    try:
        total_results = int(soup.find('h2', attrs={'class': 'f-right'}).find('span').get_text().replace(".",""))
        pages = range(int(math.floor(total_results/16))+1)
        properties = []

        # Adding all the properties listed in each page to the list.
        for p in pages:
            sleep(randint(2,5))
            url = "https://www.habitaclia.com/"+property+"-"+t+"-"+str(p)+".htm?"+maximum_price
            print(url)
            try:
                r = requests.get(url)
                soup = BeautifulSoup(r.content, 'html.parser')
                properties += soup.find_all('div', attrs={'class': 'list-item-info'})
                del properties[-1] # Last item is an ad
            except:
                print('Error on page', p)
            print('town: ', t, " page: ", p)

        # Creating a list for each piece of information I want to extract from each property.
        name = []
        town = []
        area = []
        geo_town = []
        features = []
        m2 = []
        n_rooms = []
        n_bath = []
        price_m2 = []
        description = []
        price = []
        opportunity = []
        price_reduction = []
        opportunity = []
        last_update = []
        url = []

        # Getting the information from each property.
        for i,properties in enumerate(properties):

            # Each feature is set as empty prior to being defined. This way we avoid errors when a feature is not available for a certain property.
            name_temp = ""
            town_temp = ""
            area_temp = ""
            geo_town_temp = ""
            m2_temp = ""
            n_rooms_temp = ""
            n_bath_temp = ""
            price_m2_temp = ""
            price_temp = ""
            opportunity_temp = ""
            price_reduction_temp = ""
            description_temp = ""
            last_update_temp = ""
            url_temp = ""

            # other_location enables us to differ between listed properties vs suggested properties, which appear when there are very few properties for one town. We want to avoid them as they are nearby properties not belonging to our target town.
            other_location = properties.find('span', attrs={'class': 'ady-relationship'})
            if other_location is None:
                other_locations_properties = ""
            else: 
                #print(i)
                other_locations_properties = other_location.get_text(strip=True).find('Se encuentra en')

            # Now I am skipping all the properties that are suggested so as to not append them to the df.
            if other_locations_properties == 0:
                pass
            else:
                try:
                    # Extracting the features of a property and saving them in a temporary variable.
                    name_temp = properties.find('h3', attrs={'class': 'list-item-title'}).get_text(strip=True)
                    town_temp = properties.find('p', attrs={'class': 'list-item-location'}).get_text(strip=True).split("-",1)[0].strip().replace("Ver mapa","")
                    area_temp = properties.find('p', attrs={'class': 'list-item-location'}).get_text(strip=True).replace('/','-').strip().replace("Ver mapa","").split("-",1)[0]
                    geo_town_temp = t
                    m2_temp = nfs.get_nums(properties.find('p', attrs={'class': 'list-item-feature'}).get_text(strip=True).split("-")[0])[0] 
                    n_rooms_temp = nfs.get_nums(properties.find('p', attrs={'class': 'list-item-feature'}).get_text(strip=True).split("-")[1])[0]
                    n_bath_temp = nfs.get_nums(properties.find('p', attrs={'class': 'list-item-feature'}).get_text(strip=True).split("-")[2])[0] 
                    price_m2_temp = nfs.get_nums(properties.find('p', attrs={'class': 'list-item-feature'}).get_text(strip=True).split("-")[3])[0] 
                    raw_price_temp = properties.find('article', attrs={'class': 'list-item-price'}).get_text()
                    if search("Oportunidad", raw_price_temp):
                        if search("ha bajado", raw_price_temp): 
                            price_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[0]
                            price_reduction_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[1]
                            opportunity_temp = "yes"
                        else:
                            price_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[0]
                            price_reduction_temp = "0"
                            opportunity_temp = "yes"
                    elif search("ha bajado", raw_price_temp):
                        price_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[0]
                        price_reduction_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[1]      
                        opportunity_temp = "no"
                    else: 
                        price_temp = nfs.get_nums((properties.find('article', attrs={'class': 'list-item-price'}).get_text()).replace(".",""))[0]
                        price_reduction_temp = "0"
                        opportunity_temp = "no"
                    description_temp = properties.find('p', attrs={'class': 'list-item-description'}).get_text(strip=True)
                    last_update_temp = nfs.get_nums(properties.find('span', attrs={'class': 'list-item-date'}).get_text(strip=True))
                    url_temp = properties.find('h3', attrs={'class': 'list-item-title'}).find('a').get('href')

                    # Appending temporary variables features to their corresponding list.
                    name.append(name_temp)
                    town.append(town_temp)
                    area.append(area_temp)
                    geo_town.append(geo_town_temp)
                    m2.append(m2_temp)
                    n_rooms.append(n_rooms_temp)
                    n_bath.append(n_bath_temp)
                    price_m2.append(price_m2_temp)
                    price.append(price_temp)
                    opportunity.append(opportunity_temp)
                    price_reduction.append(price_reduction_temp)
                    description.append(description_temp)
                    last_update.append(last_update_temp)
                    url.append(url_temp)

                except:
                    # In case we may encounter an error, we print the features of each property to find the bug.
                    '''
                    print('------------------------------------')
                    print('nombre: ', name_temp)
                    print('town_temp: ', town_temp)
                    print('area_temp: ', area_temp)
                    print('m2_temp: ', m2_temp)
                    print('n_rooms_temp: ', n_rooms_temp)
                    print('n_bath_temp: ', n_bath_temp)
                    print('price_m2_temp: ', price_m2_temp)
                    print('price_temp: ', price_temp)
                    print('opportunity_temp: ', opportunity_temp)
                    print('price_reduction_temp: ', price_reduction_temp)
                    print('description_temp: ', description_temp)
                    print('last_update_temp: ', last_update_temp)
                    print('url_temp: ', url_temp)
                    print('------------------------------------')
                    '''

        x_town = min(len(name), len(town), len(area), len(geo_town), len(description), len(price), len(last_update), len(url))
        dct_town = {'name': name[:x_town], 'town': town[:x_town], 'area': area[:x_town], 'geo_town':geo_town[:x_town],'m2': m2[:x_town], 'n_rooms': n_rooms[:x_town], 'n_bath': n_bath[:x_town], 'price_m2': price_m2[:x_town] ,'price': price[:x_town], 'price_reduction': price_reduction[:x_town], 'opportunity':opportunity[:x_town], 'last_update': last_update[:x_town],  'description': description[:x_town], 'url':url[:x_town] }
        df_town = pd.DataFrame.from_dict(dct_town)
        df = df.append(df_town, ignore_index = True)
        
        #Cleaning the final df
        #df= df[df['price']<int(max_price)]
        #df = df[~df['description'].str.contains('nuda|sin cedula|sin cédula')]

    except:
        print("no properties found at: ", url)

https://www.habitaclia.com/alquiler-abrera-0.htm?pmax=837.3249999999999
town:  abrera  page:  0
https://www.habitaclia.com/alquiler-abrera-1.htm?pmax=837.3249999999999
town:  abrera  page:  1
https://www.habitaclia.com/alquiler-alella-0.htm?pmax=837.3249999999999
town:  alella  page:  0
https://www.habitaclia.com/alquiler-alella-1.htm?pmax=837.3249999999999
town:  alella  page:  1
https://www.habitaclia.com/alquiler-ametlla_del_valles-0.htm?pmax=837.3249999999999
town:  ametlla_del_valles  page:  0
https://www.habitaclia.com/alquiler-arenys_de_mar-0.htm?pmax=837.3249999999999
town:  arenys_de_mar  page:  0
https://www.habitaclia.com/alquiler-arenys_de_mar-1.htm?pmax=837.3249999999999
town:  arenys_de_mar  page:  1
https://www.habitaclia.com/alquiler-arenys_de_munt-0.htm?pmax=837.3249999999999
town:  arenys_de_munt  page:  0
https://www.habitaclia.com/alquiler-argentona-0.htm?pmax=837.3249999999999
town:  argentona  page:  0
https://www.habitaclia.com/alquiler-badalona-0.htm?pmax=837.32

https://www.habitaclia.com/alquiler-barcelona-58.htm?pmax=837.3249999999999
town:  barcelona  page:  58
https://www.habitaclia.com/alquiler-barcelona-59.htm?pmax=837.3249999999999
town:  barcelona  page:  59
https://www.habitaclia.com/alquiler-barcelona-60.htm?pmax=837.3249999999999
town:  barcelona  page:  60
https://www.habitaclia.com/alquiler-barcelona-61.htm?pmax=837.3249999999999
town:  barcelona  page:  61
https://www.habitaclia.com/alquiler-barcelona-62.htm?pmax=837.3249999999999
town:  barcelona  page:  62
https://www.habitaclia.com/alquiler-barcelona-63.htm?pmax=837.3249999999999
town:  barcelona  page:  63
https://www.habitaclia.com/alquiler-barcelona-64.htm?pmax=837.3249999999999
town:  barcelona  page:  64
https://www.habitaclia.com/alquiler-barcelona-65.htm?pmax=837.3249999999999
town:  barcelona  page:  65
https://www.habitaclia.com/alquiler-barcelona-66.htm?pmax=837.3249999999999
town:  barcelona  page:  66
https://www.habitaclia.com/alquiler-barcelona-67.htm?pmax=837.32

town:  barcelona  page:  136
https://www.habitaclia.com/alquiler-barcelona-137.htm?pmax=837.3249999999999
town:  barcelona  page:  137
https://www.habitaclia.com/alquiler-barcelona-138.htm?pmax=837.3249999999999
town:  barcelona  page:  138
https://www.habitaclia.com/alquiler-barcelona-139.htm?pmax=837.3249999999999
town:  barcelona  page:  139
https://www.habitaclia.com/alquiler-barcelona-140.htm?pmax=837.3249999999999
town:  barcelona  page:  140
https://www.habitaclia.com/alquiler-barcelona-141.htm?pmax=837.3249999999999
town:  barcelona  page:  141
https://www.habitaclia.com/alquiler-barcelona-142.htm?pmax=837.3249999999999
town:  barcelona  page:  142
https://www.habitaclia.com/alquiler-barcelona-143.htm?pmax=837.3249999999999
town:  barcelona  page:  143
https://www.habitaclia.com/alquiler-barcelona-144.htm?pmax=837.3249999999999
town:  barcelona  page:  144
https://www.habitaclia.com/alquiler-barcelona-145.htm?pmax=837.3249999999999
town:  barcelona  page:  145
https://www.habit

town:  barcelona  page:  214
https://www.habitaclia.com/alquiler-barcelona-215.htm?pmax=837.3249999999999
town:  barcelona  page:  215
https://www.habitaclia.com/alquiler-barcelona-216.htm?pmax=837.3249999999999
town:  barcelona  page:  216
https://www.habitaclia.com/alquiler-barcelona-217.htm?pmax=837.3249999999999
town:  barcelona  page:  217
https://www.habitaclia.com/alquiler-barcelona-218.htm?pmax=837.3249999999999
town:  barcelona  page:  218
https://www.habitaclia.com/alquiler-barcelona-219.htm?pmax=837.3249999999999
town:  barcelona  page:  219
https://www.habitaclia.com/alquiler-barcelona-220.htm?pmax=837.3249999999999
town:  barcelona  page:  220
https://www.habitaclia.com/alquiler-barcelona-221.htm?pmax=837.3249999999999
town:  barcelona  page:  221
https://www.habitaclia.com/alquiler-barcelona-222.htm?pmax=837.3249999999999
town:  barcelona  page:  222
https://www.habitaclia.com/alquiler-barcelona-223.htm?pmax=837.3249999999999
town:  barcelona  page:  223
https://www.habit

town:  barcelona  page:  292
https://www.habitaclia.com/alquiler-barcelona-293.htm?pmax=837.3249999999999
town:  barcelona  page:  293
https://www.habitaclia.com/alquiler-barcelona-294.htm?pmax=837.3249999999999
town:  barcelona  page:  294
https://www.habitaclia.com/alquiler-barcelona-295.htm?pmax=837.3249999999999
town:  barcelona  page:  295
https://www.habitaclia.com/alquiler-barcelona-296.htm?pmax=837.3249999999999
town:  barcelona  page:  296
https://www.habitaclia.com/alquiler-barcelona-297.htm?pmax=837.3249999999999
town:  barcelona  page:  297
https://www.habitaclia.com/alquiler-barcelona-298.htm?pmax=837.3249999999999
town:  barcelona  page:  298
https://www.habitaclia.com/alquiler-barcelona-299.htm?pmax=837.3249999999999
town:  barcelona  page:  299
https://www.habitaclia.com/alquiler-barcelona-300.htm?pmax=837.3249999999999
town:  barcelona  page:  300
https://www.habitaclia.com/alquiler-barcelona-301.htm?pmax=837.3249999999999
town:  barcelona  page:  301
https://www.habit

town:  cerdanyola_del_valles  page:  2
https://www.habitaclia.com/alquiler-cervello-0.htm?pmax=837.3249999999999
town:  cervello  page:  0
https://www.habitaclia.com/alquiler-corbera_de_llobregat-0.htm?pmax=837.3249999999999
town:  corbera_de_llobregat  page:  0
https://www.habitaclia.com/alquiler-cornella_de_llobregat-0.htm?pmax=837.3249999999999
town:  cornella_de_llobregat  page:  0
https://www.habitaclia.com/alquiler-cornella_de_llobregat-1.htm?pmax=837.3249999999999
town:  cornella_de_llobregat  page:  1
https://www.habitaclia.com/alquiler-cornella_de_llobregat-2.htm?pmax=837.3249999999999
town:  cornella_de_llobregat  page:  2
https://www.habitaclia.com/alquiler-cornella_de_llobregat-3.htm?pmax=837.3249999999999
town:  cornella_de_llobregat  page:  3
https://www.habitaclia.com/alquiler-cubelles-0.htm?pmax=837.3249999999999
town:  cubelles  page:  0
https://www.habitaclia.com/alquiler-cubelles-1.htm?pmax=837.3249999999999
town:  cubelles  page:  1
https://www.habitaclia.com/alquil

town:  olivella  page:  0
https://www.habitaclia.com/alquiler-palafolls-0.htm?pmax=837.3249999999999
town:  palafolls  page:  0
https://www.habitaclia.com/alquiler-palau_solita_i_plegamans-0.htm?pmax=837.3249999999999
town:  palau_solita_i_plegamans  page:  0
https://www.habitaclia.com/alquiler-parets_del_valles-0.htm?pmax=837.3249999999999
town:  parets_del_valles  page:  0
https://www.habitaclia.com/alquiler-piera-0.htm?pmax=837.3249999999999
town:  piera  page:  0
https://www.habitaclia.com/alquiler-pineda_de_mar-0.htm?pmax=837.3249999999999
town:  pineda_de_mar  page:  0
https://www.habitaclia.com/alquiler-prat_de_llobregat-0.htm?pmax=837.3249999999999
town:  prat_de_llobregat  page:  0
https://www.habitaclia.com/alquiler-premia_de_dalt-0.htm?pmax=837.3249999999999
town:  premia_de_dalt  page:  0
https://www.habitaclia.com/alquiler-premia_de_mar-0.htm?pmax=837.3249999999999
town:  premia_de_mar  page:  0
https://www.habitaclia.com/alquiler-premia_de_mar-1.htm?pmax=837.3249999999999

town:  sant_vicens_de_montalt  page:  0
https://www.habitaclia.com/alquiler-sant_vicens_de_montalt-1.htm?pmax=837.3249999999999
town:  sant_vicens_de_montalt  page:  1
https://www.habitaclia.com/alquiler-sant_vicens_dels_horts-0.htm?pmax=837.3249999999999
town:  sant_vicens_dels_horts  page:  0
https://www.habitaclia.com/alquiler-santa_coloma_de_gramenet-0.htm?pmax=837.3249999999999
town:  santa_coloma_de_gramenet  page:  0
https://www.habitaclia.com/alquiler-santa_coloma_de_gramenet-1.htm?pmax=837.3249999999999
town:  santa_coloma_de_gramenet  page:  1
https://www.habitaclia.com/alquiler-santa_margarida_de_montbui-0.htm?pmax=837.3249999999999
town:  santa_margarida_de_montbui  page:  0
https://www.habitaclia.com/alquiler-santa_maria_de_palautordera-0.htm?pmax=837.3249999999999
town:  santa_maria_de_palautordera  page:  0
https://www.habitaclia.com/alquiler-santa_perpetua_de_mogoda-0.htm?pmax=837.3249999999999
town:  santa_perpetua_de_mogoda  page:  0
https://www.habitaclia.com/alquile

https://www.habitaclia.com/alquiler-mont_roig_del_camp-0.htm?pmax=837.3249999999999
town:  mont_roig_del_camp  page:  0
https://www.habitaclia.com/alquiler-mora_d_ebre-0.htm?pmax=837.3249999999999
town:  mora_d_ebre  page:  0
no properties found at:  https://www.habitaclia.com/alquiler-perello.htm?pmax=837.3249999999999
https://www.habitaclia.com/alquiler-pobla_de_mafumet-0.htm?pmax=837.3249999999999
town:  pobla_de_mafumet  page:  0
no properties found at:  https://www.habitaclia.com/alquiler-pobla_de_montornes.htm?pmax=837.3249999999999
https://www.habitaclia.com/alquiler-reus-0.htm?pmax=837.3249999999999
town:  reus  page:  0
https://www.habitaclia.com/alquiler-reus-1.htm?pmax=837.3249999999999
town:  reus  page:  1
https://www.habitaclia.com/alquiler-reus-2.htm?pmax=837.3249999999999
town:  reus  page:  2
https://www.habitaclia.com/alquiler-reus-3.htm?pmax=837.3249999999999
town:  reus  page:  3
https://www.habitaclia.com/alquiler-reus-4.htm?pmax=837.3249999999999
town:  reus  page

In [26]:
df.shape

(7901, 14)

In [27]:
df1 = df.copy()

In [28]:
df1 = df1.merge(geo_data, left_on='geo_town', right_on='town', how='left')
df1 = df1.drop(["town_x", "town_y", "n_properties"], axis=1)
df1 = df1.loc[:, ~df1.columns.str.contains('^Unnamed')]

In [29]:
def clean_last_update():
    for i,n in enumerate(df1['last_update']):
        try:
            df1['last_update'][i] = df1['last_update'][i][0]
        except:
            df1['last_update'][i] = 'null'
clean_last_update()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['last_update'][i] = 'null'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['last_update'][i] = df1['last_update'][i][0]


In [30]:
numericals = ['m2', 'n_rooms', 'n_bath', 'price_m2', 'price', 'price_reduction', 'last_update']
def anytype_to_numerical(df, columns = []):
    for c in columns:
        if df[c].dtypes == 'float64':
            df[c] = df[c].astype(int)
anytype_to_numerical(df1, columns = numericals)

In [31]:
df1

Unnamed: 0,name,area,m2,n_rooms,n_bath,price_m2,price,price_reduction,opportunity,last_update,description,url,geo_town,province,ccaa
0,"Alquiler Piso en Carrer ponent, 11. Obra nueva",Abrera,45,1,1,14,630,0,yes,,Piso en alquiler de obra nueva en el centro de...,https://www.habitaclia.com/alquiler-piso-obra_...,abrera,barcelona,cataluña
1,"Alquiler Piso en Carrer ponent, 11. Obra nueva",Abrera,55,2,1,13,750,0,no,,Pisos en alquiler de obra nueva en el centro d...,https://www.habitaclia.com/alquiler-piso-obra_...,abrera,barcelona,cataluña
2,"Alquiler Piso en Carrer ponent, 11. Obra nueva",Abrera,78,2,1,10,820,0,no,,Pisos en alquiler de obra nueva en el centro d...,https://www.habitaclia.com/alquiler-piso-obra_...,abrera,barcelona,cataluña
3,"Alquiler Piso en Carrer ponent, 11. Obra nueva",Abrera,58,2,1,13,755,0,no,,Pisos en alquiler de obra nueva en el centro d...,https://www.habitaclia.com/alquiler-piso-obra_...,abrera,barcelona,cataluña
4,"Alquiler Piso en Carrer ponent, 11. Obra nueva",Abrera,58,2,1,12,730,0,no,,Pisos en alquiler de obra nueva en el centro d...,https://www.habitaclia.com/alquiler-piso-obra_...,abrera,barcelona,cataluña
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7896,Alquiler Apartamento en La Pineda. Disponibl...,Vila,65,2,1,8,550,0,no,,ALQUILER POR MESES PARA ESTANCIAS TEMPORALES P...,https://www.habitaclia.com/alquiler-apartament...,vila_seca,tarragona,cataluña
7897,Alquiler Piso en La Pineda. Piso en alquiler...,Vila,92,2,2,8,750,0,no,21,"Piso en alquiler en La Pineda, PARA TODO EL AÑ...",https://www.habitaclia.com/alquiler-piso-en_en...,vila_seca,tarragona,cataluña
7898,"Alquiler Piso en Carrer robert d, 16. Precio...",Vila,100,3,1,7,700,0,no,4,Particular anunca Precioso piso seminuevo en V...,https://www.habitaclia.com/alquiler-piso-preci...,vila_seca,tarragona,cataluña
7899,Alquiler Piso en Carrer enric morera (d. Agr...,Vila,56,1,1,8,470,0,no,,"Situado en una zona tranquila, centrica y muy ...",https://www.habitaclia.com/alquiler-piso-agrad...,vila_seca,tarragona,cataluña


In [32]:
today = datetime.now().strftime('%Y.%m.%d')
df1.to_csv(path_or_buf = '/Users/ignaciolorenzoqueralt/Documents/Ironhack/Final Project/properties/rent/'+today+'_'+ccaa+'_'+str(max_price)+'_'+str(num_properties_per_town)+'.csv')