In [2]:
#Instalamos la libreria de STREAMLIT
%pip install streamlit

Note: you may need to restart the kernel to use updated packages.


In [3]:
#Instalamos la libreria de STREAMLIT
!pip install streamlit



In [4]:
#Instalamos la libreria de PLOTLY
%pip install plotly

Note: you may need to restart the kernel to use updated packages.


In [5]:
#Para instalar npm en visual studio
#1.Desde Google escribir node.js
#2. Instalar la versión más recomendada
! npm install localtunnel

"npm" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.


In [6]:
%pip install statsmodels

Note: you may need to restart the kernel to use updated packages.


In [7]:
%%writefile app.py
##########################################################
# DASHBOARD FINAL DE REGRESIONES
# Autora: María Fernanda Robles Soto
##########################################################

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from scipy.optimize import curve_fit

##########################################################
# CARGA DE DATOS
##########################################################
@st.cache_resource
def load_data():
    df = pd.read_csv("base_final.csv")
    df = df.replace("?", np.nan)
    df = df.fillna(method="bfill").fillna(method="ffill")
    if "host_id" in df.columns:
        df = df.drop(columns=["host_id"])
    # Conversión t/f y yes/no
    for c in df.columns:
        if df[c].dtype == "object":
            df[c] = df[c].replace({"t": 1, "f": 0, "yes": 1, "no": 0})
    # Limpiar price
    if "price" in df.columns:
        df["price"] = df["price"].replace('[\$,]', '', regex=True).astype(float)
    return df

df = load_data()

##########################################################
# SIDEBAR
##########################################################
st.sidebar.title("Dashboard de Brasil Airbnb")
pestaña = st.sidebar.selectbox("Selecciona análisis", [
    "Dashboard General",
    "Regresión Lineal Simple",
    "Regresión Lineal Múltiple",
    "Regresión No Lineal",
    "Regresión Logística"
])

##########################################################
# Dashboard de exploración general
##########################################################
if pestaña == "Dashboard General":
    st.title("Variables Clave")
    
    # Top 10 correlaciones con host_response_rate
    if "host_is_superhost" in df.columns:
        st.subheader("Top 10 Variables correlacionadas con host_is_superhost")
        num_cols = df.select_dtypes(include=np.number).columns
        corr_host = df[num_cols].corrwith(df["host_is_superhost"]).sort_values(ascending=False)
        top10_corr_host = corr_host.head(10).reset_index()
        top10_corr_host.columns = ["Variable", "Correlación"]
        st.table(top10_corr_host)

    # host_response_time
    st.subheader("host_response_time")
    tabla_resp = df["host_response_time"].value_counts().reset_index()
    tabla_resp.columns = ["Categoría", "Frecuencia"]
    col1, col2 = st.columns(2)
    with col1:
        fig1 = px.bar(tabla_resp, x="Categoría", y="Frecuencia", title="Frecuencia host_response_time",
                      color="Frecuencia", color_continuous_scale="Blues")
        st.plotly_chart(fig1, use_container_width=True)
    with col2:
        fig2 = px.pie(tabla_resp, names="Categoría", values="Frecuencia", title="Distribución host_response_time",
                      color_discrete_sequence=px.colors.sequential.RdBu)
        st.plotly_chart(fig2, use_container_width=True)

    # host_is_superhost
    st.subheader("host_is_superhost")
    tabla_host = df["host_is_superhost"].value_counts().reset_index()
    tabla_host.columns = ["Categoría", "Frecuencia"]
    col3, col4 = st.columns(2)
    with col3:
        fig3 = px.bar(tabla_host, x="Categoría", y="Frecuencia", title="Frecuencia host_is_superhost",
                      color="Frecuencia", color_continuous_scale="Oranges")
        st.plotly_chart(fig3, use_container_width=True)
    with col4:
        fig4 = px.pie(tabla_host, names="Categoría", values="Frecuencia", title="Distribución host_is_superhost",
                      color_discrete_sequence=px.colors.sequential.Plasma)
        st.plotly_chart(fig4, use_container_width=True)

    # price y location.review_scores_value
    st.subheader("Price y location.review_scores_value")
    col5, col6 = st.columns(2)
    with col5:
        if "price" in df.columns:
            fig5 = px.box(df, y="price", title="Distribución de Price", color_discrete_sequence=["green"])
            st.plotly_chart(fig5, use_container_width=True)
    with col6:
        if "location.review_scores_value" in df.columns:
            fig6 = px.box(df, y="location.review_scores_value", title="Distribución location.review_scores_value",
                          color_discrete_sequence=["purple"])
            st.plotly_chart(fig6, use_container_width=True)


##########################################################
# Regresión Lineal Simple
##########################################################
elif pestaña == "Regresión Lineal Simple":
    st.title("Regresión Lineal Simple")
    
    # Modelo 1: estimated_revenue_l365d ~ number_of_reviews_l30d
    st.subheader("estimated_revenue_l365d ~ number_of_reviews_l30d")
    if all(col in df.columns for col in ["estimated_revenue_l365d","number_of_reviews_l30d"]):
        X1 = df[["number_of_reviews_l30d"]]
        y1 = df["estimated_revenue_l365d"]
        model1 = LinearRegression().fit(X1, y1)
        y1_pred = model1.predict(X1)
        r2_model1 = r2_score(y1, y1_pred)
        corr1 = y1.corr(df["number_of_reviews_l30d"])
        st.write(f"R² del modelo: {r2_model1:.4f}")
        st.write(f"Correlación con number_of_reviews_l30d: {corr1:.4f}")
        
        fig1 = go.Figure()
        fig1.add_trace(go.Scatter(x=X1["number_of_reviews_l30d"], y=y1, mode='markers',
                                  marker=dict(color='blue', size=6, symbol='circle'),
                                  name='Datos reales'))
        fig1.add_trace(go.Scatter(x=X1["number_of_reviews_l30d"], y=y1_pred, mode='lines',
                                  line=dict(color='red', width=3), name='Predicción'))
        fig1.update_layout(title="Regresión Lineal Simple: Reales vs Predichos",
                           xaxis_title="number_of_reviews_l30d",
                           yaxis_title="estimated_revenue_l365d",
                           template="plotly_white")
        st.plotly_chart(fig1, use_container_width=True)
    else:
        st.warning("No se encontró la columna 'number_of_reviews_l30d' en los datos.")

##########################################################
# Regresión Lineal Múltiple
##########################################################
elif pestaña == "Regresión Lineal Múltiple":
    st.title("Regresión Lineal Múltiple")
    
    # Modelo A
    st.subheader("review_scores_rating ~ review_scores_accuracy + review_scores_cleanliness + review_scores_communication")
    cols_A = ["review_scores_accuracy","review_scores_cleanliness","review_scores_communication","review_scores_rating"]
    if all(c in df.columns for c in cols_A):
        X_A = df[["review_scores_accuracy","review_scores_cleanliness","review_scores_communication"]]
        y_A = df["review_scores_rating"]
        modelA = LinearRegression().fit(X_A, y_A)
        yA_pred = modelA.predict(X_A)
        r2_modelA = r2_score(y_A, yA_pred)
        st.write(f"R² del modelo: {r2_modelA:.4f}")
        # Correlaciones individuales
        for col in X_A.columns:
            st.write(f"Correlación con {col}: {y_A.corr(df[col]):.4f}")
        figA = px.scatter(x=y_A, y=yA_pred, title="Reales vs Predichos Modelo A")
        figA.add_trace(go.Scatter(x=[y_A.min(),y_A.max()], y=[y_A.min(),y_A.max()],
                                  mode='lines', line=dict(color='red', dash='dash', width=3), name='Línea ideal'))
        st.plotly_chart(figA, use_container_width=True)
    
    # Modelo B
    st.subheader("reviews_per_month ~ number_of_reviews_ltm + estimated_occupancy_l365d + number_of_reviews_l30d")
    cols_B = ["reviews_per_month","number_of_reviews_ltm","estimated_occupancy_l365d","number_of_reviews_l30d"]
    if all(c in df.columns for c in cols_B):
        X_B = df[["number_of_reviews_ltm","estimated_occupancy_l365d","number_of_reviews_l30d"]]
        y_B = df["reviews_per_month"]
        modelB = LinearRegression().fit(X_B, y_B)
        yB_pred = modelB.predict(X_B)
        r2_modelB = r2_score(y_B, yB_pred)
        st.write(f"R² del modelo: {r2_modelB:.4f}")
        for col in X_B.columns:
            st.write(f"Correlación con {col}: {y_B.corr(df[col]):.4f}")
        figB = px.scatter(x=y_B, y=yB_pred, title="Reales vs Predichos Modelo B")
        figB.add_trace(go.Scatter(x=[y_B.min(),y_B.max()], y=[y_B.min(),y_B.max()],
                                  mode='lines', line=dict(color='red', dash='dash', width=3), name='Línea ideal'))
        st.plotly_chart(figB, use_container_width=True)

##########################################################
# Regresión No lineal
##########################################################
elif pestaña == "Regresión No Lineal":
    st.title("Regresión No Lineal: price ~ bathrooms + bedrooms + accommodates")
    if all(c in df.columns for c in ["price","bathrooms","bedrooms","accommodates"]):
        x1 = df["bathrooms"].values
        x2 = df["bedrooms"].values
        x3 = df["accommodates"].values
        y = df["price"].values
        y = np.where(y <= 0, 1, y)
        def func_multi(X, a, b, c, d, e, f, g, h, i, j):
            x1, x2, x3 = X
            return (a*x1**2 + b*x2**2 + c*x3**2 + d*x1*x2 + e*x1*x3 + f*x2*x3 + g*x1 + h*x2 + i*x3 + j)
        try:
            parametros, _ = curve_fit(func_multi, (x1, x2, x3), y, maxfev=10000)
            y_pred = func_multi((x1, x2, x3), *parametros)
            r2_nl = r2_score(y, y_pred)
            st.write(f"R² del modelo cuadrático multivariable: {r2_nl:.4f}")
            fig = go.Figure()
            fig.add_trace(go.Scatter(y=y, x=np.arange(len(y)), mode='markers', name='Datos reales',
                                     marker=dict(color='blue', size=5)))
            fig.add_trace(go.Scatter(y=y_pred, x=np.arange(len(y_pred)), mode='markers', name='Predicciones',
                                     marker=dict(color='red', size=5)))
            st.plotly_chart(fig, use_container_width=True)
            st.write("Parámetros ajustados del modelo:", parametros)
        except Exception as e:
            st.error(f"No se pudo ajustar el modelo: {e}")

##########################################################
# Regresión Logística
##########################################################
elif pestaña == "Regresión Logística":
    st.title("Regresión Logística: accommodates ~ beds + bedrooms + bathrooms")
    if all(c in df.columns for c in ["accommodates","beds","bedrooms","bathrooms"]):
        X_log = df[["beds","bedrooms","bathrooms"]]
        y_log = (df["accommodates"]>2).astype(int)
        X_train, X_test, y_train, y_test = train_test_split(X_log, y_log, test_size=0.3, random_state=42)
        scaler = StandardScaler()
        X_train_s = scaler.fit_transform(X_train)
        X_test_s = scaler.transform(X_test)
        model_log = LogisticRegression(max_iter=1000)
        model_log.fit(X_train_s, y_train)
        y_pred = model_log.predict(X_test_s)
        st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.3f}")
        st.write(f"Precisión: {precision_score(y_test, y_pred):.3f}")
        st.write(f"Recall: {recall_score(y_test, y_pred):.3f}")
        st.write(f"F1-score: {f1_score(y_test, y_pred):.3f}")
        cm = confusion_matrix(y_test, y_pred)
        fig_cm = px.imshow(cm, text_auto=True, color_continuous_scale="Oranges",
                           title="Matriz de Confusión", labels=dict(x="Predicho", y="Real"))
        st.plotly_chart(fig_cm, use_container_width=True)


Overwriting app.py
