<a href="https://colab.research.google.com/github/avarela1963/CIFO_PROJECT_GROUP_3/blob/main/cars4you-toys/notebooks/00_import_core_libraries%20/toy_import_core_libraries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Célula 1 — Imports + seed

In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)


# Célula 2 — Criar um DataFrame “Cars4You-like”


In [2]:
df = pd.DataFrame({
    "ano": np.random.randint(2008, 2023, size=8),
    "km": np.random.randint(10_000, 250_000, size=8),
    "potencia_cv": np.random.randint(70, 200, size=8),
    "marca": np.random.choice(["BMW", "Audi", "Renault", "Peugeot"], size=8),
    "combustivel": np.random.choice(["Gasolina", "Diesel", "Híbrido"], size=8),
})

df


Unnamed: 0,ano,km,potencia_cv,marca,combustivel
0,2014,64886,173,Audi,Diesel
1,2011,147337,122,BMW,Híbrido
2,2020,223458,71,Peugeot,Diesel
3,2022,178266,157,BMW,Diesel
4,2018,97498,107,BMW,Híbrido
5,2015,122727,199,Renault,Diesel
6,2020,136324,90,Renault,Híbrido
7,2012,185203,127,Renault,Híbrido


# Célula 3 — Pandas → NumPy (várias formas)

In [6]:
# 1) DataFrame -> numpy array (inclui colunas numéricas e categóricas, vira dtype=object)
arr_all = df.to_numpy()
arr_all,  #arr_all.dtype


(array([[2014, 64886, 173, 'Audi', 'Diesel'],
        [2011, 147337, 122, 'BMW', 'Híbrido'],
        [2020, 223458, 71, 'Peugeot', 'Diesel'],
        [2022, 178266, 157, 'BMW', 'Diesel'],
        [2018, 97498, 107, 'BMW', 'Híbrido'],
        [2015, 122727, 199, 'Renault', 'Diesel'],
        [2020, 136324, 90, 'Renault', 'Híbrido'],
        [2012, 185203, 127, 'Renault', 'Híbrido']], dtype=object),)

In [7]:
# 2) Só colunas numéricas -> numpy array (dtype numérico)
num_cols = ["ano", "km", "potencia_cv"]
arr_num = df[num_cols].to_numpy()
arr_num, arr_num.dtype


(array([[  2014,  64886,    173],
        [  2011, 147337,    122],
        [  2020, 223458,     71],
        [  2022, 178266,    157],
        [  2018,  97498,    107],
        [  2015, 122727,    199],
        [  2020, 136324,     90],
        [  2012, 185203,    127]]),
 dtype('int64'))

In [11]:
# 3) Uma coluna -> numpy 1D
km_np = df["km"].to_numpy()
km_np[:5], km_np.dtype


(array([ 64886, 147337, 223458, 178266,  97498]), dtype('int64'))

# Célula 4 — NumPy → Pandas (array para DataFrame/Series)

In [9]:
# array numérico -> DataFrame (precisas de nomes de colunas)
df_from_np = pd.DataFrame(arr_num, columns=num_cols)
df_from_np.head()


Unnamed: 0,ano,km,potencia_cv
0,2014,64886,173
1,2011,147337,122
2,2020,223458,71
3,2022,178266,157
4,2018,97498,107


In [12]:
# array 1D -> Series
km_series = pd.Series(km_np, name="km")
km_series.head()


Unnamed: 0,km
0,64886
1,147337
2,223458
3,178266
4,97498


# Célula 5 — Exemplo prático: criar “preço” com NumPy e voltar ao DataFrame


In [13]:
# Toy price formula (não é realista; é só para treino)
# preço_base - penalização_km + bónus_potência + bónus_ano + ruído
preco = (
    20000
    - 0.03 * df["km"].to_numpy()
    + 80 * df["potencia_cv"].to_numpy()
    + 250 * (df["ano"].to_numpy() - 2010)
    + np.random.normal(0, 800, size=len(df))
)

df["preco_estimado"] = np.round(preco, 0).astype(int)
df


Unnamed: 0,ano,km,potencia_cv,marca,combustivel,preco_estimado
0,2014,64886,173,Audi,Diesel,32874
1,2011,147337,122,BMW,Híbrido,25874
2,2020,223458,71,Peugeot,Diesel,21810
3,2022,178266,157,BMW,Diesel,30878
4,2018,97498,107,BMW,Híbrido,27400
5,2015,122727,199,Renault,Diesel,33464
6,2020,136324,90,Renault,Híbrido,25686
7,2012,185203,127,Renault,Híbrido,25636


# Célula 6 — Dica importante: cuidado com dtype=object

In [14]:
# Se misturares texto e números num array, vira object -> operações numéricas ficam chatas/lentas
arr_mixed = df[["ano", "marca"]].to_numpy()
arr_mixed, arr_mixed.dtype


(array([[2014, 'Audi'],
        [2011, 'BMW'],
        [2020, 'Peugeot'],
        [2022, 'BMW'],
        [2018, 'BMW'],
        [2015, 'Renault'],
        [2020, 'Renault'],
        [2012, 'Renault']], dtype=object),
 dtype('O'))