# Día 6: Introducción a Machine Learning

**Introducción a Python para ML** | EAE Business School | 9 febrero 2026

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import plotly.express as px
import plotly.graph_objects as go

## Parte 1: Cargar Datos

In [None]:
url = 'https://raw.githubusercontent.com/ber2/eae-python/main/data/Houses_Barcelona_samp.csv'
df = pd.read_csv(url)
print(f'Shape: {df.shape}')
df.head()

## Parte 2: Train/Test Split

In [None]:
# Preparar datos
df_clean = df[['price', 'sqrmts']].dropna()
X = df_clean[['sqrmts']]
y = df_clean['price']

# Split 80/20
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f'Train: {len(X_train)}, Test: {len(X_test)}')

## Parte 3: Entrenar Modelo

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

print(f'Intercept: {model.intercept_:.0f}€')
print(f'Coeficiente: {model.coef_[0]:.0f}€/m²')

## Parte 4: Visualizar

In [None]:
df_viz = df_clean.copy()
df_viz['pred'] = model.predict(X)

fig = px.scatter(df_viz, x='sqrmts', y='price', opacity=0.5, title='Regresión Lineal')
fig.add_trace(go.Scatter(x=df_viz['sqrmts'], y=df_viz['pred'], mode='lines', name='Modelo'))
fig.show()

## Parte 5: Predicciones

In [None]:
nuevos = [[60], [85], [120]]
predicciones = model.predict(nuevos)
for sqm, pred in zip([60,85,120], predicciones):
    print(f'{sqm}m²: {pred:.0f}€')

## Ejercicio: Cars Dataset

In [None]:
url_cars = 'https://raw.githubusercontent.com/ber2/eae-python/main/data/cars_1990.csv'
df_cars = pd.read_csv(url_cars)
df_cars.head()

In [None]:
# EJERCICIO: Predecir precio del coche usando horsepower
# 1. Train/test split
# 2. Entrenar modelos
# 3. Interpretar coeficientes
# Vuestra solución aquí
