In [None]:
import pandas as pd
from scipy.stats import kstest


file_path = "../0_Dataset/Listings.csv"
data = pd.read_csv(file_path, usecols=[ 'price','city'], delimiter=',', encoding='latin-1')

In [8]:
# Eliminar nulos en price o city
data = data.dropna(subset=['price', 'city'])

# Filtrar precios válidos (>0)
data = data[data['price'] > 0]

# Crear tabla para almacenar resultados
resultados = []


In [11]:
for ciudad in data['city'].unique():
    precios = data[data['city'] == ciudad]['price'].copy()

    # Eliminar outliers con IQR
    q1 = precios.quantile(0.25)
    q3 = precios.quantile(0.75)
    iqr = q3 - q1
    li = q1 - 1.5 * iqr
    ls = q3 + 1.5 * iqr
    precios = precios[(precios >= li) & (precios <= ls)]

    # Continuar si hay suficientes datos
    if len(precios) > 30:
        # Normalizar
        z = (precios - precios.mean()) / precios.std()

        # Prueba K-S
        estadistico, p_valor = kstest(z, 'norm')
        conclusion = "✅ Normal" if p_valor >= 0.05 else "❌ No Normal"

        # Guardar resultados
        resultados.append({
            'Ciudad': ciudad,
            'n': len(precios),
            'Estadístico D': round(estadistico, 4),
            'p-valor': round(p_valor, 4),
            'Conclusión': conclusion
        })

# Mostrar resultados en DataFrame
df_resultados = pd.DataFrame(resultados).sort_values(by='p-valor', ascending=False).reset_index(drop=True)
df_resultados

Unnamed: 0,Ciudad,n,Estadístico D,p-valor,Conclusión
0,Paris,59526,0.1135,0.0,❌ No Normal
1,New York,34285,0.1259,0.0,❌ No Normal
2,Bangkok,17681,0.141,0.0,❌ No Normal
3,Rio de Janeiro,23845,0.1598,0.0,❌ No Normal
4,Sydney,30302,0.1303,0.0,❌ No Normal
5,Istanbul,22602,0.1251,0.0,❌ No Normal
6,Rome,25415,0.1022,0.0,❌ No Normal
7,Hong Kong,6467,0.1557,0.0,❌ No Normal
8,Mexico City,18568,0.1181,0.0,❌ No Normal
9,Cape Town,17115,0.1593,0.0,❌ No Normal
