# **Functions**

In [1]:
# Função para obter os dados da API
def get_data(url):
    try:
        # Fazendo a requisição GET
        response = requests.get(url)
        response.raise_for_status()  # Levanta um HTTPError para status de resposta de erro
        # Convertendo a resposta em JSON
        data = response.json()
        # Carregando os dados em um DataFrame do pandas
        df = pd.DataFrame(data)
        return df
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Other error occurred: {err}")
    return None

# **Libraries**

In [2]:
import requests
import pandas as pd

# **Loading Data**

## Products

In [151]:
# URL da API para obter todos os produtos
url_products = 'https://fakestoreapi.com/products'

df_products = get_data(url_products)
df_products.head()

Unnamed: 0,id,title,price,description,category,image,rating
0,1,"Fjallraven - Foldsack No. 1 Backpack, Fits 15 ...",109.95,Your perfect pack for everyday use and walks i...,men's clothing,https://fakestoreapi.com/img/81fPKd-2AYL._AC_S...,"{'rate': 3.9, 'count': 120}"
1,2,Mens Casual Premium Slim Fit T-Shirts,22.3,"Slim-fitting style, contrast raglan long sleev...",men's clothing,https://fakestoreapi.com/img/71-3HjGNDUL._AC_S...,"{'rate': 4.1, 'count': 259}"
2,3,Mens Cotton Jacket,55.99,great outerwear jackets for Spring/Autumn/Wint...,men's clothing,https://fakestoreapi.com/img/71li-ujtlUL._AC_U...,"{'rate': 4.7, 'count': 500}"
3,4,Mens Casual Slim Fit,15.99,The color could be slightly different between ...,men's clothing,https://fakestoreapi.com/img/71YXzeOuslL._AC_U...,"{'rate': 2.1, 'count': 430}"
4,5,John Hardy Women's Legends Naga Gold & Silver ...,695.0,"From our Legends Collection, the Naga was insp...",jewelery,https://fakestoreapi.com/img/71pWzhdJNwL._AC_U...,"{'rate': 4.6, 'count': 400}"


In [152]:
df_products.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           20 non-null     int64  
 1   title        20 non-null     object 
 2   price        20 non-null     float64
 3   description  20 non-null     object 
 4   category     20 non-null     object 
 5   image        20 non-null     object 
 6   rating       20 non-null     object 
dtypes: float64(1), int64(1), object(5)
memory usage: 1.2+ KB


In [157]:
df_products.to_csv('data/df_products.csv')
print('df_products salvo!')

df_products salvo!


## Carts

In [153]:
# URL da API para obter todos os carrinhos
url_carts = 'https://fakestoreapi.com/carts'

df_carts = get_data(url_carts)
df_carts.head()

Unnamed: 0,id,userId,date,products,__v
0,1,1,2020-03-02T00:00:00.000Z,"[{'productId': 1, 'quantity': 4}, {'productId'...",0
1,2,1,2020-01-02T00:00:00.000Z,"[{'productId': 2, 'quantity': 4}, {'productId'...",0
2,3,2,2020-03-01T00:00:00.000Z,"[{'productId': 1, 'quantity': 2}, {'productId'...",0
3,4,3,2020-01-01T00:00:00.000Z,"[{'productId': 1, 'quantity': 4}]",0
4,5,3,2020-03-01T00:00:00.000Z,"[{'productId': 7, 'quantity': 1}, {'productId'...",0


In [154]:
df_carts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        7 non-null      int64 
 1   userId    7 non-null      int64 
 2   date      7 non-null      object
 3   products  7 non-null      object
 4   __v       7 non-null      int64 
dtypes: int64(3), object(2)
memory usage: 412.0+ bytes


In [158]:
df_carts.to_csv('data/df_carts.csv')
print('df_carts salvo!')

df_carts salvo!


## Users

In [155]:
url_users = 'https://fakestoreapi.com/users'
df_users = get_data(url_users)
df_users.head()

Unnamed: 0,address,id,email,username,password,name,phone,__v
0,"{'geolocation': {'lat': '-37.3159', 'long': '8...",1,john@gmail.com,johnd,m38rmF$,"{'firstname': 'john', 'lastname': 'doe'}",1-570-236-7033,0
1,"{'geolocation': {'lat': '-37.3159', 'long': '8...",2,morrison@gmail.com,mor_2314,83r5^_,"{'firstname': 'david', 'lastname': 'morrison'}",1-570-236-7033,0
2,"{'geolocation': {'lat': '40.3467', 'long': '-3...",3,kevin@gmail.com,kevinryan,kev02937@,"{'firstname': 'kevin', 'lastname': 'ryan'}",1-567-094-1345,0
3,"{'geolocation': {'lat': '50.3467', 'long': '-2...",4,don@gmail.com,donero,ewedon,"{'firstname': 'don', 'lastname': 'romer'}",1-765-789-6734,0
4,"{'geolocation': {'lat': '40.3467', 'long': '-4...",5,derek@gmail.com,derek,jklg*_56,"{'firstname': 'derek', 'lastname': 'powell'}",1-956-001-1945,0


In [156]:
df_users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   address   10 non-null     object
 1   id        10 non-null     int64 
 2   email     10 non-null     object
 3   username  10 non-null     object
 4   password  10 non-null     object
 5   name      10 non-null     object
 6   phone     10 non-null     object
 7   __v       10 non-null     int64 
dtypes: int64(2), object(6)
memory usage: 772.0+ bytes


In [159]:
df_users.to_csv('data/df_users.csv')
print('df_users salvo!')

df_users salvo!


# **Data Cleaning**

- Check column types (esp. datetime)
- Check for nulls and duplicates
- Split columns if necessary

In [9]:
# Load saved data
df_products = pd.read_csv('data/df_products.csv', index_col=0)
df_carts = pd.read_csv('data/df_carts.csv', index_col=0)
df_users = pd.read_csv('data/df_users.csv', index_col=0)

## Data Cleaning - Products

In [10]:
df_products.head()

Unnamed: 0,id,title,price,description,category,image,rating
0,1,"Fjallraven - Foldsack No. 1 Backpack, Fits 15 ...",109.95,Your perfect pack for everyday use and walks i...,men's clothing,https://fakestoreapi.com/img/81fPKd-2AYL._AC_S...,"{'rate': 3.9, 'count': 120}"
1,2,Mens Casual Premium Slim Fit T-Shirts,22.3,"Slim-fitting style, contrast raglan long sleev...",men's clothing,https://fakestoreapi.com/img/71-3HjGNDUL._AC_S...,"{'rate': 4.1, 'count': 259}"
2,3,Mens Cotton Jacket,55.99,great outerwear jackets for Spring/Autumn/Wint...,men's clothing,https://fakestoreapi.com/img/71li-ujtlUL._AC_U...,"{'rate': 4.7, 'count': 500}"
3,4,Mens Casual Slim Fit,15.99,The color could be slightly different between ...,men's clothing,https://fakestoreapi.com/img/71YXzeOuslL._AC_U...,"{'rate': 2.1, 'count': 430}"
4,5,John Hardy Women's Legends Naga Gold & Silver ...,695.0,"From our Legends Collection, the Naga was insp...",jewelery,https://fakestoreapi.com/img/71pWzhdJNwL._AC_U...,"{'rate': 4.6, 'count': 400}"


### Checking column types

In [12]:
df_products.dtypes

id               int64
title           object
price          float64
description     object
category        object
image           object
rating          object
dtype: object

### Checking null values and duplicated rows

In [13]:
df_products.isnull().sum()

id             0
title          0
price          0
description    0
category       0
image          0
rating         0
dtype: int64

In [17]:
print(f"O número de linhas duplicadas é: {df_products.duplicated().sum()}")

O número de linhas duplicadas é: 0


### Split columns

- split 'rating' column into 2 new columns: avg_rating and count_ratings
- delete old 'rating' column
- its easier to perform calculations with those 2 metrics separated.

In [20]:
# Garantindo que a coluna 'rating' seja convertida para dicionário p/poder aplicar lambda
df_products['rating'] = df_products['rating'].apply(eval)

# Extraindo valores da coluna 'rating' e criando novas colunas
df_products['avg_rating'] = df_products['rating'].apply(lambda x: x['rate'])
df_products['count_ratings'] = df_products['rating'].apply(lambda x: x['count'])

# Removendo a coluna antiga 'rating'
df_products = df_products.drop(columns=['rating'])

# Exibindo as primeiras linhas do DataFrame para verificar
df_products.head()

Unnamed: 0,id,title,price,description,category,image,avg_rating,count_ratings
0,1,"Fjallraven - Foldsack No. 1 Backpack, Fits 15 ...",109.95,Your perfect pack for everyday use and walks i...,men's clothing,https://fakestoreapi.com/img/81fPKd-2AYL._AC_S...,3.9,120
1,2,Mens Casual Premium Slim Fit T-Shirts,22.3,"Slim-fitting style, contrast raglan long sleev...",men's clothing,https://fakestoreapi.com/img/71-3HjGNDUL._AC_S...,4.1,259
2,3,Mens Cotton Jacket,55.99,great outerwear jackets for Spring/Autumn/Wint...,men's clothing,https://fakestoreapi.com/img/71li-ujtlUL._AC_U...,4.7,500
3,4,Mens Casual Slim Fit,15.99,The color could be slightly different between ...,men's clothing,https://fakestoreapi.com/img/71YXzeOuslL._AC_U...,2.1,430
4,5,John Hardy Women's Legends Naga Gold & Silver ...,695.0,"From our Legends Collection, the Naga was insp...",jewelery,https://fakestoreapi.com/img/71pWzhdJNwL._AC_U...,4.6,400


## Data Cleaning - Carts

## Data Cleaning - Users

# **Exploratory Data Analysis** (EDA)

# **References**

- **FakeStore API:** https://fakestoreapi.com/
- **Exporting notebooks as PDF files:** [MLJar](https://mljar.com/blog/jupyter-notebook-pdf/), [nbconvert docs](https://nbconvert.readthedocs.io/en/stable/)