This is the code to process the description into the formatted string
link to sources and examples used: 
https://codereview.stackexchange.com/questions/173971/converting-number-to-words
https://stackoverflow.com/questions/8982163/how-do-i-tell-python-to-convert-integers-into-words

In [1]:
import pandas as pd 
df = pd.read_csv("df-empty.csv")

In [59]:
import math
def numberToWords(n):
    if not isinstance(n, int):
        raise ValueError("Input must be an integer")

    # Handle negative numbers
    if n < 0:
        return "minus " + numberToWords(-n)

    limit, t = 1000000000000, 0

    # If zero, return 'zero'
    if (n == 0):
        return "zero"

    # Arrays for various number parts
    multiplier = ["", "Trillion", "Billion", "Million", "Thousand"]
    first_twenty = ["", "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve", "Thirteen", "Fourteen", "Fifteen", "Sixteen", "Seventeen", "Eighteen", "Nineteen"]
    tens = ["", "Twenty", "Thirty", "Forty", "Fifty", "Sixty", "Seventy", "Eighty", "Ninety"]

    # If number is less than 20, return the corresponding word
    if (n < 20):
        return first_twenty[n]

    answer = ""
    i = n
    while(i > 0):
        curr_hun = i // limit

        # Adjust for current multiplier
        while (int(curr_hun) == 0):
            i %= limit
            limit /= 1000
            curr_hun = i // limit
            t += 1

        # Add hundreds' place if applicable
        if (int(curr_hun) > 99):
                # Explicitly converting the index to an integer
            hundreds_index = int(curr_hun) // 100
            answer += (first_twenty[hundreds_index] + " Hundred ")
        # Adjust for tens and ones
        curr_hun = int(curr_hun) % 100
        if (int(curr_hun) > 0 and int(curr_hun) < 20):
            answer += (first_twenty[int(curr_hun)] + " ")
        elif (int(curr_hun) % 10 == 0 and int(curr_hun) != 0):
                index = int(int(curr_hun) // 10) - 1
                answer += (tens[index] + " ")
        elif (int(curr_hun) > 19 and int(curr_hun) < 100):
                tens_index = int(int(curr_hun) // 10) - 1
                ones_index = int(int(curr_hun) % 10)
                answer += (tens[tens_index] + " " + first_twenty[ones_index] + " ")

        # Add the current multiplier
        if (t < 4):
            answer += (multiplier[t] + " ")

        i = i % limit
        limit = limit // 1000

    return answer



In [58]:
numberToWords(int(10970))

'Ten Million Nine Hundred Seventy Million '

In [20]:
# Check the data types to confirm the conversion
df.dtypes

Unnamed: 0                     int64
url                           object
price                         object
address                       object
descrip                       object
listed_since                  object
zip_code                      object
size                          object
year                          object
living_area                   object
kind_of_house                 object
building_type                 object
num_of_rooms                  object
num_of_bathrooms              object
layout                        object
energy_label                  object
insulation                    object
heating                       object
ownership                     object
exteriors                     object
parking                       object
date_list                     object
last_ask_price                object
last_ask_price_m2             object
city                          object
log_id                        object
num of tokens per descrip      int64
d

In [63]:
import pandas as pd



# Function to convert number to words, handling NaN values
def number_to_words(num):
    if pd.notna(num):
        return numberToWords(int(num))
    return "unknown"


df['num_rooms'] = df['num_rooms'].fillna(df['num_rooms'].median())
df['size_float'] = df['size_float'].fillna(df['size_float'].median())
df['num_rooms'] = df['num_rooms'].astype(int)
df['size_int'] = df['size_float'].astype(int)


# Function to create the description string
def create_description(row):
    city = row["city"]
    num_rooms = number_to_words(row["num_rooms"])
    space_float = number_to_words(row["size_int"])
    descrip = row["descrip_en"]
    energy = row["energy_label_standard"]
    return f"The house, located in {city}, offers a spacious {space_float} square meters area and features {num_rooms} rooms. It has an energy efficiency rating of '{energy}'. Full description: {descrip}"


# Apply the function to each row
df["f_string_descrip"] = df.apply(create_description, axis=1)




In [27]:
df.head()

Unnamed: 0.1,Unnamed: 0,url,price,address,descrip,listed_since,zip_code,size,year,living_area,...,size_float,zip_code_4_digits,postcode,latitude,longitude,num_bedrooms,num_rooms,energy_label_standard,f_string_descrip,size_int
0,0,https://www.funda.nl/koop/aalsmeer/appartement...,€ 403.000 v.o.n.,Appartementen type F (Bouwnr. 77),De Tuinen van Hornmeer - appartementen IN V...,2024,1431 WE Aalsmeer Hornmeer,81 m²,2024,81 m²,...,81.0,1431,1431.0,52.250582,4.735323,2.0,3,na,The house is in: aalsmeer with this amount of ...,81
1,1,https://www.funda.nl/koop/aalsmeer/huis-421002...,€ 589.000 k.k.,Boomgaard 37,Wat een fijne en energiezuinige eengezinswon...,2006,1432 LC Aalsmeer Oosteinde,108 m²,2006,108 m²,...,108.0,1432,1432.0,52.280207,4.793252,5.0,6,A,The house is in: aalsmeer with this amount of ...,108
2,2,https://www.funda.nl/koop/aalsmeer/huis-429389...,€ 774.500 v.o.n.,Verandawoning (Bouwnr. 7),De Tuinen van Hornmeer - IN VERKOOP De omg...,2023,1431 WE Aalsmeer Hornmeer,202 m²,2023,202 m²,...,202.0,1431,1431.0,52.250582,4.735323,3.0,5,na,The house is in: aalsmeer with this amount of ...,202
3,3,https://www.funda.nl/koop/aalsmeer/appartement...,€ 403.000 v.o.n.,Appartementen Type E (Bouwnr. 64),De Tuinen van Hornmeer - appartementen IN V...,2024,1431 WE Aalsmeer Hornmeer,84 m²,2024,84 m²,...,84.0,1431,1431.0,52.250582,4.735323,2.0,3,na,The house is in: aalsmeer with this amount of ...,84
4,4,https://www.funda.nl/koop/aalsmeer/huis-429389...,€ 774.500 v.o.n.,Herenhuis (Bouwnr. 22),De Tuinen van Hornmeer - IN VERKOOP De omg...,2022,1431 WE Aalsmeer Hornmeer,190 m²,2022,190 m²,...,190.0,1431,1431.0,52.250582,4.735323,5.0,7,na,The house is in: aalsmeer with this amount of ...,190


In [28]:
df.head()

Unnamed: 0.1,Unnamed: 0,url,price,address,descrip,listed_since,zip_code,size,year,living_area,...,size_float,zip_code_4_digits,postcode,latitude,longitude,num_bedrooms,num_rooms,energy_label_standard,f_string_descrip,size_int
0,0,https://www.funda.nl/koop/aalsmeer/appartement...,€ 403.000 v.o.n.,Appartementen type F (Bouwnr. 77),De Tuinen van Hornmeer - appartementen IN V...,2024,1431 WE Aalsmeer Hornmeer,81 m²,2024,81 m²,...,81.0,1431,1431.0,52.250582,4.735323,2.0,3,na,The house is in: aalsmeer with this amount of ...,81
1,1,https://www.funda.nl/koop/aalsmeer/huis-421002...,€ 589.000 k.k.,Boomgaard 37,Wat een fijne en energiezuinige eengezinswon...,2006,1432 LC Aalsmeer Oosteinde,108 m²,2006,108 m²,...,108.0,1432,1432.0,52.280207,4.793252,5.0,6,A,The house is in: aalsmeer with this amount of ...,108
2,2,https://www.funda.nl/koop/aalsmeer/huis-429389...,€ 774.500 v.o.n.,Verandawoning (Bouwnr. 7),De Tuinen van Hornmeer - IN VERKOOP De omg...,2023,1431 WE Aalsmeer Hornmeer,202 m²,2023,202 m²,...,202.0,1431,1431.0,52.250582,4.735323,3.0,5,na,The house is in: aalsmeer with this amount of ...,202
3,3,https://www.funda.nl/koop/aalsmeer/appartement...,€ 403.000 v.o.n.,Appartementen Type E (Bouwnr. 64),De Tuinen van Hornmeer - appartementen IN V...,2024,1431 WE Aalsmeer Hornmeer,84 m²,2024,84 m²,...,84.0,1431,1431.0,52.250582,4.735323,2.0,3,na,The house is in: aalsmeer with this amount of ...,84
4,4,https://www.funda.nl/koop/aalsmeer/huis-429389...,€ 774.500 v.o.n.,Herenhuis (Bouwnr. 22),De Tuinen van Hornmeer - IN VERKOOP De omg...,2022,1431 WE Aalsmeer Hornmeer,190 m²,2022,190 m²,...,190.0,1431,1431.0,52.250582,4.735323,5.0,7,na,The house is in: aalsmeer with this amount of ...,190


In [64]:
# Display the first 10 entries of the 'f_string_descrip' column
with pd.option_context('display.max_colwidth', None):
    print(df['f_string_descrip'].head(10))


0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

In [66]:
df.to_csv("setup_f_description.csv")