In [None]:
import pandas as pd
import plotly.express as px  # For Plotly Express, which is easy-to-use for quick visualizations
import plotly.graph_objects as go 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots  # Correct import for make_subplots

In [None]:
apt_buying = pd.read_excel("../data/raw/Apartment_buying_cost_over_time.xlsx", sheet_name = 1)
apt_buying.rename(columns = {"Unnamed: 0" : "city".lstrip()}, inplace = True)

buying_one_br = apt_buying.drop(range(3,7))

buying_one_br.columns = buying_one_br.columns.str.strip()
buying_one_br.columns = [col.split()[0] for col in buying_one_br.columns]


monthly_salary = pd.read_excel("../data/raw/numbeo_stats.xlsx", sheet_name = 0)
monthly_salary.fillna(method='ffill', inplace=True)
monthly_salary.rename(columns = {"City" : "city".lstrip()}, inplace = True)
monthly_salary["city"] = [value.lower() for value in monthly_salary["city"]]


monthly_salary = monthly_salary.drop(range(3,6))
monthly_salary =monthly_salary.drop(range(6,9))

buying_one_br = buying_one_br.round(2)

buying_one_br
monthly_salary

In [None]:
# had to strip due to different values in city column 
buying_one_br["city"] = buying_one_br["city"].str.extract(r'([a-zA-Z]+)')[0].str.lower()

fig = make_subplots(
    rows=1, cols=3, 
    subplot_titles=["Lisbon", "Berlin", "Paris"], 
    shared_yaxes=True,
    vertical_spacing=0.1
)



# function for each city plot
def plot_city_data(city_name, row, col, is_first_city=False):
    # Filter the data for the specific city
    rent_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == '1 bed apartment (rent)')]
    salary_data = monthly_salary[(monthly_salary['city'] == city_name) & (monthly_salary['Unnamed: 0'] == 'Av salary (after tax)')]
    buying_data = buying_one_br[buying_one_br['city'] == f"{city_name}"]

    years = ['2019', '2020', '2021', '2022', '2023', '2024']
    rent_values = rent_data[years].values.flatten()
    salary_values = salary_data[years].values.flatten()
    buying_values = buying_data[years].values.flatten()

    rent_percentage = (rent_values / salary_values) * 100
    mort_percentage = (buying_values / salary_values) * 100
    
    fig.add_trace(go.Scatter(
        x=years, y=rent_values, mode='lines+markers', name= "rent",
        line=dict(color='blue', width=2),
        marker=dict(size=8, color='blue'),
        showlegend=is_first_city,
        text=[f'{rent_pct:.2f}% of salary' for rent_pct in rent_percentage]
    ), row=row, col=col)
    
    fig.add_trace(go.Scatter(
        x=years, y=buying_values, mode='lines+markers', name= "mortgage",
        line=dict(color='green', width=2),
        marker=dict(size=8, color='green'),
        showlegend=is_first_city,
        text=[f'{mort_pct:.2f}% of salary' for mort_pct in mort_percentage]
    ), row=row, col=col)
    
    
    fig.add_trace(go.Scatter(
        x=years, y=salary_values, mode='lines+markers', name= "salary",
        line=dict(color='red', width=2),
        marker=dict(size=8, color='red'),
        showlegend=is_first_city,
    ), row=row, col=col)

plot_city_data('lisbon', 1, 1, is_first_city=True)
plot_city_data('berlin', 1, 2, is_first_city=False)
plot_city_data('paris', 1, 3, is_first_city=False)

fig.update_layout(
    title="average salary vs rent and mortgage (per month)",
    xaxis_title="Year",
    yaxis_title="Amount (€)",
    showlegend=True,
    height=600,
    width=1000,
    legend=dict(
        title="Legend",  
        title_font=dict(size=14, family='Arial', color='black'),
        font=dict(size=12, family='Arial', color='black'),
        orientation="h",  
        x=0.5, 
        y=-0.2,  
        xanchor="center",  
        yanchor="top",  
    )
)


fig.show()
