# Crear columnas condicionales

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Creamos el Dataframe a partir del dataset en CSV
# Amazon Top 50 Bestselling Books 2009-2019
df = pd.read_csv('amazon.csv')

In [3]:
# Mostrambos el Dataframe
df

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction
...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction


## Crear una columna condicional con dos opciones

In [4]:
# Deseamos crear una nueva columna que nos indique si el libre tiene
# una buena calificación o no, según el siguiente criterio:
# User Rating >= 4 -> 'Bueno'
# User Rating < 4  -> 'Malo'
df['Valoracion'] = np.where(
    df['User Rating'] >= 4,
    'Bueno',
    'Malo'
)

In [5]:
# Mostramos el Dataframe
df

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Valoracion
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,Bueno
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,Bueno
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,Bueno
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,Bueno
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,Bueno
...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,Bueno
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,Bueno
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,Bueno
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,Bueno


In [6]:
# Comprobamos que se ha llevado a cabo la clasificación
df['Valoracion'].value_counts()

Valoracion
Bueno    543
Malo       7
Name: count, dtype: int64

## Crear una columna condicional con más de dos opciones

In [7]:
# Deseamos crear una nueva columna que nos indique si el libre tiene
# una buena calificación o no, según el siguiente criterio:

# 'Muy Bueno': 4.5 < User Rating
# 'Bueno':     4.5 > User Rating >= 3.5
# 'Regular'    3.5 > User Rating >= 2.5
# 'Malo'       2.5 > User Rating

# Establecemos las condiciones y los valores

condiciones = [
    df['User Rating'] >= 4.5,
    (df['User Rating'] < 4.5) & (df['User Rating'] >= 3.5),
    (df['User Rating'] < 3.5) & (df['User Rating'] >= 2.5),
    df['User Rating'] < 2.5,
]

valores = [
    'Muy Bueno',
    'Bueno',
    'Regular',
    'Malo'
]

In [8]:
# Añadimos la nueva columna
df['Valoracion'] = np.select(condiciones, valores)

In [9]:
# Mostramos el Dataframe
df

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre,Valoracion
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350,8,2016,Non Fiction,Muy Bueno
1,11/22/63: A Novel,Stephen King,4.6,2052,22,2011,Fiction,Muy Bueno
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979,15,2018,Non Fiction,Muy Bueno
3,1984 (Signet Classics),George Orwell,4.7,21424,6,2017,Fiction,Muy Bueno
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665,12,2019,Non Fiction,Muy Bueno
...,...,...,...,...,...,...,...,...
545,Wrecking Ball (Diary of a Wimpy Kid Book 14),Jeff Kinney,4.9,9413,8,2019,Fiction,Muy Bueno
546,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2016,Non Fiction,Muy Bueno
547,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2017,Non Fiction,Muy Bueno
548,You Are a Badass: How to Stop Doubting Your Gr...,Jen Sincero,4.7,14331,8,2018,Non Fiction,Muy Bueno


In [10]:
# Comprobamos que se ha llevado a cabo la clasificación
df['Valoracion'].value_counts()

Valoracion
Muy Bueno    452
Bueno         97
Regular        1
Name: count, dtype: int64

In [11]:
df.describe()

Unnamed: 0,User Rating,Reviews,Price,Year
count,550.0,550.0,550.0,550.0
mean,4.618364,11953.281818,13.1,2014.0
std,0.22698,11731.132017,10.842262,3.165156
min,3.3,37.0,0.0,2009.0
25%,4.5,4058.0,7.0,2011.0
50%,4.7,8580.0,11.0,2014.0
75%,4.8,17253.25,16.0,2017.0
max,4.9,87841.0,105.0,2019.0
