In [1]:
import numpy as np
import pandas as pd
from gapminder import gapminder
import matplotlib as plt

import seaborn as sns
import seaborn.objects as so

from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
def coefs_rl(x,y):
    beta_1 = np.sum((x-x.mean())*(y-y.mean()))/np.sum((x-x.mean())**2)
    beta_0 = y.mean() - beta_1 * x.mean()
    return beta_1, beta_0

In [None]:
arg = gapminder[gapminder["country"] == "Argentina"]
print(coefs_rl(arg["year"], arg["lifeExp"]))

In [None]:
modelo = linear_model.LinearRegression()
modelo.fit(arg[["year"]], arg[["lifeExp"]])
beta_1 = modelo.coef_[0][0]
beta_0 = modelo.intercept_[0]
print(beta_1, beta_0)

In [None]:
inm = pd.read_csv("inmuebles.csv")
inm.head()

In [None]:
(
    so.Plot(data = inm, x = "superficie", y = "precio")
    .add(so.Dot())
)

In [None]:
(
    so.Plot(data = inm, x = "superficie", y = "precio")
    .add(so.Dot())
    .add(so.Line(), so.PolyFit(1), label = "Regresion")
    .label(title = "Datos inmobiliarios")
)

In [None]:
modelo = linear_model.LinearRegression()
modelo.fit(inm[["superficie"]], inm[["precio"]])
beta_1 = modelo.coef_[0][0]
beta_0 = modelo.intercept_[0]
print(beta_1, beta_0)

In [None]:
b1, b0 = coefs_rl(inm["superficie"], inm["precio"])
print("El precio por metro cuadrado es:" , b1)
print("El precio inicial es:" , b0)

In [None]:
def ecm(x, y, b1, b0):
    res = np.sum((y-(b1*x+b0))**2)/len(x)
    return res

In [None]:
def r_cuad(x, y, b1, b0):
    res = np.sum(((b1*x+b0)-y.mean())**2)/np.sum((y-y.mean())**2)
    return res

In [None]:
print(ecm(inm["superficie"], inm["precio"], b1, b0))
print(r_cuad(inm["superficie"], inm["precio"], b1, b0))

In [None]:
inm["superficie"]

In [None]:
precio_r = b1 * inm["superficie"] + b0
print(mean_squared_error(inm["precio"], precio_r))
print(r2_score(inm["precio"], precio_r))

In [None]:
sns.boxplot(data = inm, x = "zona", y = "precio")

In [None]:
z1 = inm[inm["zona"] == "Zona 1"]
z2 = inm[inm["zona"] == "Zona 2"]
z3 = inm[inm["zona"] == "Zona 3"]

In [None]:
b1z1, b0z1 = coefs_rl(z1["superficie"], z1["precio"])
b1z2, b0z2 = coefs_rl(z2["superficie"], z2["precio"])
b1z3, b0z3 = coefs_rl(z3["superficie"], z3["precio"])

In [None]:
ecm1, r_cuad1 = (ecm(z1["superficie"], z1["precio"], b1z1, b0z1), r_cuad(z1["superficie"], z1["precio"], b1z1, b0z1))
ecm2, r_cuad2 = (ecm(z2["superficie"], z2["precio"], b1z2, b0z2), r_cuad(z2["superficie"], z2["precio"], b1z2, b0z2))
ecm3, r_cuad3 = (ecm(z3["superficie"], z3["precio"], b1z3, b0z3), r_cuad(z3["superficie"], z3["precio"], b1z3, b0z3))

In [None]:
print(b1z1, b0z1, ecm1, r_cuad1)
print(b1z2, b0z2, ecm2, r_cuad2)
print(b1z3, b0z3, ecm3, r_cuad3)

In [None]:
(
    so.Plot(data = inm, x = "superficie", y = "precio", color = "zona")
    .add(so.Dot())
    .add(so.Line(), so.PolyFit(1), label = "Regresion")
    .facet("zona", order = sorted(inm["zona"].unique()))
)

In [None]:
p = b1 * 105 + b0
p_z2 = b1z2 * 105 + b0z2
print(p, p_z2)

In [None]:
sp = (300-b0z2)/b1z2
print(sp)

In [None]:
inm_out = pd.read_csv("inmuebles_outliers.csv")

In [None]:
sns.boxplot(data = inm_out, x = "zona", y = "precio")

In [None]:
z2_out = inm_out[inm_out["zona"] == "Zona 2"]

In [None]:
b1z2_out, b0z2_out = coefs_rl(z2_out["superficie"], z2_out["precio"])
ecm2_out, r_cuad2_out = (ecm(z2_out["superficie"], z2_out["precio"], b1z2_out, b0z2_out), r_cuad(z2_out["superficie"], z2_out["precio"], b1z2_out, b0z2_out))

In [None]:
print(b1z2, b0z2, ecm2, r_cuad2)
print(b1z2_out, b0z2_out, ecm2_out, r_cuad2_out)

In [None]:
btc = pd.read_csv("bitcoin.csv")
btc.head()

In [None]:
btc["Date"] = pd.to_datetime(btc["Date"], format = "%Y-%m-%d")
btc_2021 = btc[(btc["Date"]>"2021-01-01") & (btc["Date"] < "2021-07-01")]

In [None]:
(
    so.Plot(data = btc_2021, x = "Date", y = "Close")
    .add(so.Line())
    .add(so.Line(), so.PolyFit(5))
)

In [None]:
tips = sns.load_dataset("tips")

In [None]:
x = tips["total_bill"]
xf = x - x.mean()

In [None]:
(
    so.Plot(x = xf, y = tips["tip"])
    .add(so.Dot())
    .add(so.Line(), so.PolyFit())
)

In [None]:
tips["tip"].mean()

In [None]:
c, d = coefs_rl(xf, tips["tip"])
print(c,d)

In [None]:
c, d = coefs_rl(tips["total_bill"], tips["tip"])
print(c,d)