## 1. Import libraries

In [1]:
import requests
from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import sys
sys.path.append('../')
import numpy as np

## 2. Import Data

In [2]:
df = pd.read_csv("../Data/ultimo_web_scrapping.csv")

## 3. We explore the dataset

In [3]:
df.head(3)

Unnamed: 0.1,Unnamed: 0,Airport_hub,Intercity_train_connectivity,Gun_related_deaths,Guns_per_residents,Pisa_ranking,Best_university,Monthly_Fitness_Club_Membership,Beer,Monthly_Public_Transport,...,Concert_venues,Museums,Sport_venues,Currency_for_urban_area,GDP_per_capita_uno,Growth,GDP_per_capita_otro,Unemployment,url,Average_Annual_percent_sunshine
0,0,8,0.59,1,12,13,Aarhus University,$37.00,$1.70,$62.00,...,95,18,21,DKK,$44342.66,5%,"$44,343",4%,https://teleport.org/cities/aarhus/,No hay dato
1,1,26,0.17,1,24,16,University of Adelaide,$50.00,$4.30,$80.00,...,89,37,91,AUD,$46433.30,7%,"$46,433",6%,https://teleport.org/cities/adelaide/,12.6
2,2,23,0.11,16,328,24,University of New Mexico,$52.00,$2.90,$37.00,...,99,35,10,USD,$54596.65,7%,"$54,597",5%,https://teleport.org/cities/albuquerque/,168


In [4]:
df.shape

(262, 33)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 33 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Unnamed: 0                       262 non-null    int64 
 1   Airport_hub                      262 non-null    object
 2   Intercity_train_connectivity     262 non-null    object
 3   Gun_related_deaths               262 non-null    object
 4   Guns_per_residents               262 non-null    object
 5   Pisa_ranking                     262 non-null    object
 6   Best_university                  262 non-null    object
 7   Monthly_Fitness_Club_Membership  261 non-null    object
 8   Beer                             261 non-null    object
 9   Monthly_Public_Transport         261 non-null    object
 10  Lunch                            261 non-null    object
 11  Large_apartment                  262 non-null    object
 12  Medium_apartment                 262

#### Podemos observar que la mayoria de datos son tipo object, por lo que tendremos que convertir las columnas a sus respectivos dtypes.

In [6]:
pd.isna(df).sum()


Unnamed: 0                         0
Airport_hub                        0
Intercity_train_connectivity       0
Gun_related_deaths                 0
Guns_per_residents                 0
Pisa_ranking                       0
Best_university                    0
Monthly_Fitness_Club_Membership    1
Beer                               1
Monthly_Public_Transport           1
Lunch                              1
Large_apartment                    0
Medium_apartment                   0
Small_apartment                    0
cities                             0
Description                        0
Poblacion                          0
Homosexuality_acceptance           0
LGBT_adoption_rights               8
LGBT_homosexuality_rights          0
LGBT_marriage_rights               0
VAT_Sales_Tax                      0
Art_galleries                      0
Concert_venues                     0
Museums                            0
Sport_venues                       0
Currency_for_urban_area            0
G

#### Algunos de los valores presentes son nan, por lo que deberemos reemplazar la información en esos lugares.

In [7]:
#df.LGBT_adoption_rights.unique()

In [8]:
#df.Lunch.unique()

In [9]:
#df.Monthly_Fitness_Club_Membership.unique()

In [10]:
#df.Monthly_Public_Transport.unique()

In [11]:
#df.Beer.unique()

In [12]:
df['Beer'] = df['Beer'].replace(np.nan, 0)

In [13]:
df['Monthly_Fitness_Club_Membership'] = df['Monthly_Fitness_Club_Membership'].replace(np.nan, 0)

In [14]:
df["Lunch"] = df["Lunch"].replace(np.nan, 0)

In [15]:
df["LGBT_adoption_rights"] = df["LGBT_adoption_rights"].replace(np.nan, "No hay dato")

In [16]:
df["Monthly_Public_Transport"] = df["Monthly_Public_Transport"].replace(np.nan, 0)

In [17]:
df["Monthly_Public_Transport"] = df["Monthly_Public_Transport"].replace('No hay dato', 0)

In [18]:
pd.isna(df).sum()


Unnamed: 0                         0
Airport_hub                        0
Intercity_train_connectivity       0
Gun_related_deaths                 0
Guns_per_residents                 0
Pisa_ranking                       0
Best_university                    0
Monthly_Fitness_Club_Membership    0
Beer                               0
Monthly_Public_Transport           0
Lunch                              0
Large_apartment                    0
Medium_apartment                   0
Small_apartment                    0
cities                             0
Description                        0
Poblacion                          0
Homosexuality_acceptance           0
LGBT_adoption_rights               0
LGBT_homosexuality_rights          0
LGBT_marriage_rights               0
VAT_Sales_Tax                      0
Art_galleries                      0
Concert_venues                     0
Museums                            0
Sport_venues                       0
Currency_for_urban_area            0
G

In [19]:
copia = df.copy()

### Para poder trabajar con los datos debemos quitar símbolos y hacer una limpieza de las columnas

In [20]:
def delete_price(i):
    i = str(i).replace('$', "")
    return i

In [21]:
copia['GDP_per_capita_otro'] = copia['GDP_per_capita_otro'].apply(delete_price)

In [22]:
copia['Monthly_Fitness_Club_Membership'] = copia['Monthly_Fitness_Club_Membership'].apply(delete_price)

In [23]:
copia['Beer'] = copia['Beer'].apply(delete_price)

In [24]:
copia['Monthly_Public_Transport'] = copia['Monthly_Public_Transport'].apply(delete_price)

In [25]:
copia['GDP_per_capita_uno'] = copia['GDP_per_capita_uno'].apply(delete_price)

In [26]:
copia['Lunch'] = copia['Lunch'].apply(delete_price)

In [27]:
copia['Large_apartment'] = copia['Large_apartment'].apply(delete_price)

In [28]:
copia['Medium_apartment'] = copia['Medium_apartment'].apply(delete_price)

In [29]:
copia['Small_apartment'] = copia['Small_apartment'].apply(delete_price)

In [31]:
def convertir(i):
    i = i.replace(",", ".")
    return i

In [32]:
copia["GDP_per_capita_otro"] = copia["GDP_per_capita_otro"].apply(convertir)

In [33]:
def quitar(i):
    i = i.replace(",", "")
    return i

In [34]:
copia["Poblacion"] = copia["Poblacion"].apply(quitar)

In [139]:
def clean_symbol(i):
    i = i.replace("✖", "")
    return i

In [142]:
copia["LGBT_adoption_rights"] = copia["LGBT_adoption_rights"].apply(clean_symbol)

In [145]:
copia["LGBT_homosexuality_rights"] = copia["LGBT_homosexuality_rights"].apply(clean_symbol)

In [148]:
copia["LGBT_marriage_rights"] = copia["LGBT_marriage_rights"].apply(clean_symbol)

In [140]:
def clean_symbol2(i):
    i = i.replace("✔", "")
    return i

In [None]:
copia["LGBT_adoption_rights"] = copia["LGBT_adoption_rights"].apply(clean_symbol2)

In [146]:
copia["LGBT_homosexuality_rights"] = copia["LGBT_homosexuality_rights"].apply(clean_symbol2)

In [149]:
copia["LGBT_marriage_rights"] = copia["LGBT_marriage_rights"].apply(clean_symbol2)

In [36]:
copia.head()

Unnamed: 0.1,Unnamed: 0,Airport_hub,Intercity_train_connectivity,Gun_related_deaths,Guns_per_residents,Pisa_ranking,Best_university,Monthly_Fitness_Club_Membership,Beer,Monthly_Public_Transport,...,Concert_venues,Museums,Sport_venues,Currency_for_urban_area,GDP_per_capita_uno,Growth,GDP_per_capita_otro,Unemployment,url,Average_Annual_percent_sunshine
0,0,8,0.59,1,12,13,Aarhus University,37.0,1.7,62.0,...,95,18,21,DKK,44342.66,5%,44.343,4%,https://teleport.org/cities/aarhus/,No hay dato
1,1,26,0.17,1,24,16,University of Adelaide,50.0,4.3,80.0,...,89,37,91,AUD,46433.3,7%,46.433,6%,https://teleport.org/cities/adelaide/,12.6
2,2,23,0.11,16,328,24,University of New Mexico,52.0,2.9,37.0,...,99,35,10,USD,54596.65,7%,54.597,5%,https://teleport.org/cities/albuquerque/,168
3,3,45,0.66,1,1,39,427.97,83.0,0.68,19.0,...,68,24,53,KZT,24019.95,9%,24.02,6%,https://teleport.org/cities/almaty/,64
4,4,237,0.68,1,4,10,University of Amsterdam,49.0,2.1,100.0,...,339,187,154,EUR,47354.53,3%,47.355,6%,https://teleport.org/cities/amsterdam/,12.9


In [37]:
def clean_no_info(i):
    i = i.replace("No hay dato", "0")
    return i

In [38]:
copia["Airport_hub"] = copia["Airport_hub"].apply(clean_no_info)

In [39]:
copia["Intercity_train_connectivity"] = copia["Intercity_train_connectivity"].apply(clean_no_info)

In [40]:
copia["Gun_related_deaths"] = copia["Gun_related_deaths"].apply(clean_no_info)

In [41]:
copia["Guns_per_residents"] = copia["Guns_per_residents"].apply(clean_no_info)

In [42]:
copia["Pisa_ranking"] = copia["Pisa_ranking"].apply(clean_no_info)

In [43]:
copia["Large_apartment"] = copia["Large_apartment"].apply(clean_no_info)

In [44]:
copia["Small_apartment"] = copia["Small_apartment"].apply(clean_no_info)

In [45]:
copia["Medium_apartment"] = copia["Medium_apartment"].apply(clean_no_info)

In [46]:
copia["VAT_Sales_Tax"] = copia["VAT_Sales_Tax"].apply(clean_no_info)

In [47]:
copia["Art_galleries"] = copia["Art_galleries"].apply(clean_no_info)

In [48]:
copia["Concert_venues"] = copia["Concert_venues"].apply(clean_no_info)

In [49]:
copia["Museums"] = copia["Museums"].apply(clean_no_info)

In [50]:
copia["Sport_venues"] = copia["Sport_venues"].apply(clean_no_info)

In [51]:
copia["GDP_per_capita_uno"] = copia["GDP_per_capita_uno"].apply(clean_no_info)

In [52]:
copia["GDP_per_capita_otro"] = copia["GDP_per_capita_otro"].apply(clean_no_info)

In [53]:
copia["Unemployment"] = copia["Unemployment"].apply(clean_no_info)

In [54]:
copia["Average_Annual_percent_sunshine"] = copia["Average_Annual_percent_sunshine"].apply(clean_no_info)

In [93]:
copia["Homosexuality_acceptance"] = copia["Homosexuality_acceptance"].apply(clean_no_info)

In [157]:
copia.Best_university.unique()

array(['Aarhus University', 'University of Adelaide',
       'University of New Mexico', '427.97', 'University of Amsterdam',
       '478.04', 'No hay dato', 'Bilkent University',
       'National and Kapodistrian University of Athens',
       'Georgia Institute of Technology', 'University of Auckland',
       'University of Texas at Austin', '362.42',
       'Johns Hopkins University', 'Mahidol University',
       'Autonomous University of Barcelona', 'Peking University',
       'American University of Beirut', 'Queen’s University Belfast',
       'University of Belgrade', 'Indian Institute of Science',
       'University of Bergen', 'Humboldt University of Berlin',
       'University of Bern', '490.84', 'University of Birmingham',
       'University of the Andes', 'University of Bologna',
       'University of Bordeaux', 'Massachusetts Institute of Technology',
       'University of Colorado Boulder', 'Montana State University',
       'Comenius University in Bratislava', 'University

In [167]:
import re

def replace_num(list):
    for i in list:
        if i == (re.findall("\d+",i)):
            i = i.replace("No hay dato")
        return i

In [168]:
algunos = ['University of Bern', '490.84', 'University of Birmingham',
       'University of the Andes', 'University of Bologna',
       'University of Bordeaux', 'Massachusetts Institute of Technology',
       'University of Colorado Boulder', 'Montana State University',
       'Comenius University in Bratislava', 'University of Sussex',
       'University of Queensland', 'University of Bristol',
       'Masaryk University', 'KU Leuven', 'University of Bucharest',
       'Semmelweis University', '391.41', 'University at Buffalo']

In [170]:
x=replace_num(algunos)

In [171]:
x

'University of Bern'

#### Algunas de las columnas tienen datos que no se corresponden con la medición de esa columna, por lo que reemplzaremos esos números por 0, dandole la asignación de que no tenemos datos sobre ese lugar.

### Continuamos la limpieza de nuestros datos:

In [55]:
def change(x):
    x = str(x).split('.')[0]
    return x

In [57]:
copia["Airport_hub"] = copia["Airport_hub"].apply(change) 

In [102]:
copia["Pisa_ranking"] =copia["Pisa_ranking"].apply(change) 

In [101]:
copia["Concert_venues"] = copia["Concert_venues"].apply(change)

In [109]:
copia["Museums"] = copia["Museums"].apply(change)

In [115]:
copia["Sport_venues"] = copia["Sport_venues"].apply(change)

### A continuación debemos convertir el dtype de las columnas

In [None]:
copia.info()

In [None]:
copia.head()

In [None]:
## Con estas columnas hay diversos problemas a solucionar

In [117]:
copia.Currency_for_urban_area.unique()

array(['DKK', 'AUD', 'USD', 'KZT', 'EUR', 'TRY', 'PYG', 'NZD', 'AZN',
       'IDR', '0.43', 'THB', 'CNY', 'LBP', 'GBP', 'RSD', 'INR', 'NOK',
       'CHF', 'COP', 'CZK', 'RON', 'HUF', 'ARS', 'EGP', 'CAD', 'ZAR',
       'VEF', 'MAD', 'MDL', 'BRL', 'TZS', 'QAR', 'AED', 'JPY', 'PLN',
       'GIP', 'SEK', 'MXN', 'GTQ', 'CUC', 'VND', 'HKD', 'NPR', 'UAH',
       'JMD', 'MYR', 'NGN', 'BOB', 'PEN', 'NIO', 'PHP', 'BYN', 'UYU',
       'RUB', 'KES', 'No hay dato', 'PAB', 'KHR', 'ISK', 'SAR', 'CRC',
       'CLP', 'DOP', 'BAM', 'KRW', 'SGD', 'MKD', 'BGN', 'TWD', 'UZS',
       'GEL', 'IRR', 'ILS', 'TND', 'AMD', 'HRK'], dtype=object)

In [119]:
copia.Growth.unique()

array(['5%', '7%', '9%', '3%', '4%', '-1%', '13%', '6%', '12%', '-2%',
       '10%', '16%', '8%', '80', '0%', '18%', '1%', '82', '2%', '43%',
       '-4%', '11%', '127%', '14%', '-41%', '34%', '-3%', '-39%', '62%',
       '17%', '-11%', '36%', '-5%', '-6%'], dtype=object)

In [120]:
copia.Unemployment.unique()

array(['4%', '6%', '5%', '0', '10%', '25%', '1%', '20%', '19%', '8%',
       '3%', '11%', '7%', '13%', '27%', '2%', '9%', '46%', '14%', '24%',
       '12%', '40%', '0%', '43%', '15%', '18%', '16%'], dtype=object)

In [138]:
copia.Best_university.unique()

array(['Aarhus University', 'University of Adelaide',
       'University of New Mexico', '427.97', 'University of Amsterdam',
       '478.04', 'No hay dato', 'Bilkent University',
       'National and Kapodistrian University of Athens',
       'Georgia Institute of Technology', 'University of Auckland',
       'University of Texas at Austin', '362.42',
       'Johns Hopkins University', 'Mahidol University',
       'Autonomous University of Barcelona', 'Peking University',
       'American University of Beirut', 'Queen’s University Belfast',
       'University of Belgrade', 'Indian Institute of Science',
       'University of Bergen', 'Humboldt University of Berlin',
       'University of Bern', '490.84', 'University of Birmingham',
       'University of the Andes', 'University of Bologna',
       'University of Bordeaux', 'Massachusetts Institute of Technology',
       'University of Colorado Boulder', 'Montana State University',
       'Comenius University in Bratislava', 'University

#### Por ahora, nos centremos en el resto de columnas que ya podemos convertir

In [67]:
copia.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 34 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   Unnamed: 0                       262 non-null    int64 
 1   Airport_hub                      262 non-null    object
 2   Intercity_train_connectivity     262 non-null    object
 3   Gun_related_deaths               262 non-null    object
 4   Guns_per_residents               262 non-null    object
 5   Pisa_ranking                     262 non-null    object
 6   Best_university                  262 non-null    object
 7   Monthly_Fitness_Club_Membership  262 non-null    object
 8   Beer                             262 non-null    object
 9   Monthly_Public_Transport         262 non-null    object
 10  Lunch                            262 non-null    object
 11  Large_apartment                  262 non-null    object
 12  Medium_apartment                 262

In [70]:
copia["Small_apartment"] = copia["Small_apartment"].astype(dtype=int)

In [66]:
copia["Medium_apartment"] = copia["Medium_apartment"].astype(dtype=int)

In [71]:
copia["Large_apartment"] = copia["Large_apartment"].astype(dtype=int)

In [73]:
copia["Airport_hub"] = copia["Airport_hub"].astype(dtype=int)

In [76]:
copia["Gun_related_deaths"] = copia["Gun_related_deaths"].astype(dtype=int)

In [78]:
copia["Guns_per_residents"] = copia["Guns_per_residents"].astype(dtype=int)

In [104]:
copia["Pisa_ranking"] = copia["Pisa_ranking"].astype(dtype=int)

In [107]:
#copia["Monthly_Fitness_Club_Membership"] = copia["Monthly_Fitness_Club_Membership"].astype(dtype=int)

In [116]:
#copia.Monthly_Fitness_Club_Membership.unique()

In [91]:
copia["Poblacion"] = copia["Poblacion"].astype(dtype=int)

In [97]:
copia["Art_galleries"] = copia["Art_galleries"].astype(dtype=int)

In [103]:
copia["Concert_venues"] = copia["Concert_venues"].astype(dtype=int)

In [110]:
copia["Museums"] = copia["Museums"].astype(dtype=int)

In [136]:
copia["Sport_venues"] = copia["Sport_venues"].astype(dtype=int)

In [80]:
copia.head()

Unnamed: 0.1,Unnamed: 0,Airport_hub,Intercity_train_connectivity,Gun_related_deaths,Guns_per_residents,Pisa_ranking,Best_university,Monthly_Fitness_Club_Membership,Beer,Monthly_Public_Transport,...,Museums,Sport_venues,Currency_for_urban_area,GDP_per_capita_uno,Growth,GDP_per_capita_otro,Unemployment,url,Average_Annual_percent_sunshine,Small_apart
0,0,8,0.59,1,12,13,Aarhus University,37.0,1.7,62.0,...,18,21,DKK,44342.66,5%,44.343,4%,https://teleport.org/cities/aarhus/,0.0,840
1,1,26,0.17,1,24,16,University of Adelaide,50.0,4.3,80.0,...,37,91,AUD,46433.3,7%,46.433,6%,https://teleport.org/cities/adelaide/,12.6,1000
2,2,23,0.11,16,328,24,University of New Mexico,52.0,2.9,37.0,...,35,10,USD,54596.65,7%,54.597,5%,https://teleport.org/cities/albuquerque/,168.0,760
3,3,45,0.66,1,1,39,427.97,83.0,0.68,19.0,...,24,53,KZT,24019.95,9%,24.02,6%,https://teleport.org/cities/almaty/,64.0,340
4,4,237,0.68,1,4,10,University of Amsterdam,49.0,2.1,100.0,...,187,154,EUR,47354.53,3%,47.355,6%,https://teleport.org/cities/amsterdam/,12.9,1600


In [121]:
copia.Average_Annual_percent_sunshine.unique()

array(['0', '12.6', '168', '64', '12.9', '61', '243', '101', '12.5',
       '266.7', '110', '182.9', '116', '12.7', '207', '73', '209.5', '75',
       '113', '160', '213', '35', '120', '288', '12.8', '98', '324',
       '326', '124', '54', '297.1', '254', '260', '259', '111', '105',
       '84', '238.3', '82', '66', '138', '127', '72', '13', '161', '228',
       '136', '115', '76', '298', '337', '174', '227.8', '288.2', '281',
       '282', '225', '330', '302', '90', '181', '44', '198', '13.2',
       '277.1', '222', '200', '296', '265', '13.3', '91', '88', '95',
       '246', '121', '285', '97', '217', '312', '65', '211', '290', '218',
       '89', '241.4', '147', '93', '31', '94', '107', '252', '118', '96',
       '103', '336', '139', '177', '246.7', '13.5', '102', '180', '58',
       '68', '242', '99', '13.6', '100', '13.1', '130', '125', '106',
       '271', '70', '236', '299.5', '71', '250', '129', '287', '291',
       '13.4', '199.9', '213.1', '172', '49.4', '286', '36'], dtype=o

In [122]:
copia["Intercity_train_connectivity"] = copia["Intercity_train_connectivity"].astype(dtype=float)

In [134]:
copia["Monthly_Fitness_Club_Membership"] = copia["Monthly_Fitness_Club_Membership"].astype(dtype=float)

In [124]:
copia["Beer"] = copia["Beer"].astype(dtype=float)

In [125]:
copia["Lunch"] = copia["Lunch"].astype(dtype=float)

In [126]:
copia["Monthly_Public_Transport"] = copia["Monthly_Public_Transport"].astype(dtype=float)

In [127]:
copia["Homosexuality_acceptance"] = copia["Homosexuality_acceptance"].astype(dtype=float)

In [128]:
copia["VAT_Sales_Tax"] = copia["VAT_Sales_Tax"].astype(dtype=float)

In [130]:
copia["Average_Annual_percent_sunshine"] = copia["Average_Annual_percent_sunshine"].astype(dtype=float)

In [154]:
copia["GDP_per_capita_uno"] =copia["GDP_per_capita_uno"].astype(dtype=float)

### A continuación eliminaremos algunas columnas que no utilizaremos

In [131]:
copia.drop(["Unnamed: 0", "GDP_per_capita_otro", "Small_apart"], axis=1, inplace=True)

In [132]:
copia.head()

Unnamed: 0,Airport_hub,Intercity_train_connectivity,Gun_related_deaths,Guns_per_residents,Pisa_ranking,Best_university,Monthly_Fitness_Club_Membership,Beer,Monthly_Public_Transport,Lunch,...,Art_galleries,Concert_venues,Museums,Sport_venues,Currency_for_urban_area,GDP_per_capita_uno,Growth,Unemployment,url,Average_Annual_percent_sunshine
0,8,0.59,1,12,13,Aarhus University,37.0,1.7,62.0,15.0,...,51,95,18,21,DKK,44342.66,5%,4%,https://teleport.org/cities/aarhus/,0.0
1,26,0.17,1,24,16,University of Adelaide,50.0,4.3,80.0,13.0,...,82,89,37,91,AUD,46433.3,7%,6%,https://teleport.org/cities/adelaide/,12.6
2,23,0.11,16,328,24,University of New Mexico,52.0,2.9,37.0,12.0,...,127,99,35,10,USD,54596.65,7%,5%,https://teleport.org/cities/albuquerque/,168.0
3,45,0.66,1,1,39,427.97,83.0,0.68,19.0,5.7,...,61,68,24,53,KZT,24019.95,9%,6%,https://teleport.org/cities/almaty/,64.0
4,237,0.68,1,4,10,University of Amsterdam,49.0,2.1,100.0,18.0,...,284,339,187,154,EUR,47354.53,3%,6%,https://teleport.org/cities/amsterdam/,12.9


In [155]:
copia.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 262 entries, 0 to 261
Data columns (total 31 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Airport_hub                      262 non-null    int64  
 1   Intercity_train_connectivity     262 non-null    float64
 2   Gun_related_deaths               262 non-null    int64  
 3   Guns_per_residents               262 non-null    int64  
 4   Pisa_ranking                     262 non-null    int64  
 5   Best_university                  262 non-null    object 
 6   Monthly_Fitness_Club_Membership  262 non-null    float64
 7   Beer                             262 non-null    float64
 8   Monthly_Public_Transport         262 non-null    float64
 9   Lunch                            262 non-null    float64
 10  Large_apartment                  262 non-null    int64  
 11  Medium_apartment                 262 non-null    int64  
 12  Small_apartment       