In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
def concatenate_countries():
    housing = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=0)
    rental = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=1)
    income = pd.read_excel("../data/raw/week_3_project_data.xlsx", sheet_name=2)
    eurostat_df = pd.concat([housing, rental, income], ignore_index=True)
    index_labels = ['Housing'] * len(housing) + ['Rental'] * len(rental) + ['Income'] * len(income)
    eurostat_df.index = index_labels
    eurostat_df.rename(columns={"Unnamed: 0": "Country"}, inplace=True)    
    
    return eurostat_df

In [None]:
eurostat_df = contacenate_countries()
eurostat_df

In [None]:
def cleaning_eurostat_data_for_viz():
    income_df = eurostat_df[eurostat_df.index == 'Income']
    housing_df = eurostat_df[eurostat_df.index == "Housing"]
    rental_df = eurostat_df[eurostat_df.index == "Rental"]

    income_tidy = income_df.melt(id_vars="Country", var_name="Year", value_name="Income")
    housing_tidy = housing_df.melt(id_vars="Country", var_name="Year", value_name="Housing")
    rental_tidy = rental_df.melt(id_vars="Country", var_name="Year", value_name="Rental")

    return income_tidy, housing_tidy, rental_tidy

In [None]:
income_tidy, housing_tidy, rental_tidy = cleaning_eurostat_data_for_viz()
rental_tidy

In [None]:
sns.lineplot(data=income_tidy, x="Year", y="Income", hue="Country", marker="*")

In [None]:
plt.figure(figsize=(9, 6))
sns.lineplot(data=rental_tidy, x="Year", y="Rental", hue="Country", marker="o")
plt.title("Rental Trends for Germany, France and Portugal (2019-2023)")
plt.xlabel("Year")
plt.ylabel("Rental index")
plt.show()

In [None]:
def clean_countries():
    countries = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name=1)
    countries.columns = countries.columns.str.strip()
    countries.rename(columns={"Unnamed: 0": "Type"}, inplace = True)

    countries.loc[[1,2],"Type"] = '1 bed apartment (rent)'
    countries.loc[[4,5], 'Type'] = '3 bed apartment (rent)'
    countries.loc[[6,7,8], 'Type'] = 'Buy apartment (per m2 in city center)'
    countries.loc[[10,11], 'Type'] = 'Av salary (after tax)'
    countries.loc[[13,14], 'Type'] = 'Min wage (after tax)'
        
    return countries

countries = clean_countries()
one_bed_apt = countries[countries["Type"] == "1 bed apartment (rent)"]
one_bed_apt

In [None]:
plt.figure(figsize=(9, 6))
sns.lineplot(data=income_tidy, x="Year", y="Income", hue="Country", marker="o")
plt.title("Income Trends for Germany, France and Portugal (2019-2023)")
plt.xlabel("Year")
plt.ylabel("Income (€)")
plt.show()

In [None]:
def clean_countries():
    countries = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name=1)
    countries.columns = countries.columns.str.strip()
    countries.rename(columns={"Unnamed: 0": "Type"}, inplace = True)

    countries.loc[[1,2],"Type"] = '1 bed apartment (rent)'
    countries.loc[[4,5], 'Type'] = '3 bed apartment (rent)'
    countries.loc[[6,7,8], 'Type'] = 'Buy apartment (per m2 in city center)'
    countries.loc[[10,11], 'Type'] = 'Av salary (after tax)'
    countries.loc[[13,14], 'Type'] = 'Min wage (after tax)'

    columns_to_clean = ['2019', '2020', '2021', '2022', '2023', '2024']
    # Remove spaces and convert to float for each column
    for col in columns_to_clean:
        countries[col] = countries[col].astype(str).str.replace(" ", "").str.replace("\xa0", "").astype(float)
        
    return countries

In [None]:
countries = clean_countries()
one_bed_apt = countries[countries["Type"] == "1 bed apartment (rent)"]
one_bed_apt

In [None]:
one_bed_apt_melted = one_bed_apt.melt(id_vars=["Type", "Country"], var_name="Year", value_name="Value")
one_bed_apt_melted["Year"] = one_bed_apt_melted["Year"].astype(int)

In [None]:
fig = px.line(
    one_bed_apt_melted, 
    x="Year", 
    y="Value",
    color="Country",       # Color lines by Country
    line_group="Country",  # Group lines by Country
    facet_col="Type",      # Create a separate subplot for each apartment Type
    markers=True           # Add markers to each point
)

fig.show()

In [None]:
def clean_cities():
    cities = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name=0)
    cities.columns = cities.columns.str.strip()
    cities.rename(columns={"Unnamed: 0": "Type"}, inplace = True)

    cities.loc[[1,2],"Type"] = '1 bed apartment (rent)'
    cities.loc[[4,5], 'Type'] = '3 bed apartment (rent)'
    cities.loc[[6,7,8], 'Type'] = 'Buy apartment (per m2 in city center)'
    cities.loc[[10,11], 'Type'] = 'Av salary (after tax)'

    columns_to_clean = ['2019', '2020', '2021', '2022', '2023', '2024']
    # Remove spaces and convert to float for each column
    for col in columns_to_clean:
        cities[col] = cities[col].astype(str).str.replace(" ", "").str.replace("\xa0", "").astype(float)
    
    return cities

In [None]:
cities = clean_cities()
cities

In [None]:
def cleaning_eurostat_data_for_viz():
    income_df = eurostat_df[eurostat_df.index == 'Income']
    housing_df = eurostat_df[eurostat_df.index == "Housing"]
    rental_df = eurostat_df[eurostat_df.index == "Rental"]

    income_tidy = income_df.melt(id_vars="Country", var_name="Year", value_name="Income")
    housing_tidy = housing_df.melt(id_vars="Country", var_name="Year", value_name="Housing")
    rental_tidy = rental_df.melt(id_vars="Country", var_name="Year", value_name="Rental")

    return income_tidy, housing_tidy, rental_tidy, income_df, housing_df, rental_df

In [None]:
income_tidy, housing_tidy, rental_tidy, income_df, housing_df, rental_df = cleaning_eurostat_data_for_viz()
income_df

In [None]:
df_lisbon = cities[cities['City'] == 'Lisbon']
df_lisbon

In [None]:
lisbon_melted = df_lisbon.melt(id_vars=["Type", "City"], var_name="Year", value_name="Value")
lisbon_melted["Year"] = lisbon_melted["Year"].astype(int)

In [None]:
fig = px.line(
    lisbon_melted, 
    x="Year", 
    y="Value",
    color="City",       # Color lines by Country
    line_group="City",  # Group lines by Country
    facet_col="Type",      # Create a separate subplot for each apartment Type
    markers=True           # Add markers to each point
)

fig.show()