# Data science demo

In [19]:
import pandas as pd
import sklearn as sl
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

## load dataset to see what we're dealing with

In [None]:
dataset = pd.read_csv('flats.csv')
dataset.head()

See how flat prices are distributed vs. size

In [None]:
plt.scatter(dataset['size'], dataset['price'])
plt.grid(True)

And see price vs. rooms:

In [None]:
plt.scatter(dataset['rooms'], dataset['price'])
plt.grid(True)

## Now, let's create a model and fit it to the data we have.

In [23]:
# First, we decide which features we want to use for prediction
X = dataset[['size', 'rooms']]
y = dataset['price']

In [None]:
model = LinearRegression()
model.fit(X, y)

## See what line the model fits:

In [None]:
coefficients = model.coef_
intercept = model.intercept_

for name, coef in zip(X.columns, coefficients):
    print(f'Coefficient for {name}: {coef}')
print(f'Intercept: {intercept}')

In [None]:
total_formula = 'price = ' + ' + '.join([f'({coef} * {name})' for name, coef in zip(X.columns, coefficients)]) + f' + ({intercept})'
print('Regression formula:', total_formula)

## Visualize the line:

In [None]:
import numpy as np
plt.scatter(dataset["size"], dataset["price"], alpha=0.5, label="Data")

# Line: model prediction as size changes, rooms fixed at mean
size_grid = np.linspace(dataset["size"].min(), dataset["size"].max(), 100)
rooms_mean = dataset["rooms"].mean()

X_grid = pd.DataFrame({
    "size": size_grid,
    "rooms": rooms_mean
})

y_pred_grid = model.predict(X_grid)

plt.plot(size_grid, y_pred_grid, color="red", linewidth=2, label="Model prediction")

plt.xlabel("Size")
plt.ylabel("Price")
plt.title("Price vs. size with fitted model line\n(rooms fixed at average)")
plt.legend()
plt.grid(True)
plt.show()

## Making new predictions

In [None]:
# Suppose we want to see the predicted price for a flat of size 70 and 3 rooms
flat_size = 70
flat_rooms = 3
predicted_price = model.predict([[flat_size, flat_rooms]])
print(f'Predicted price for a flat of size {flat_size} and {flat_rooms} rooms: {predicted_price[0]}')
