In [1]:
ColabNotebook = 'google.colab' in str(get_ipython())

if ColabNotebook:
    # monta G-drive en entorno COLAB
    from google.colab import drive
    drive.mount('/content/drive/')

    DATOS_DIR = '/content/drive/MyDrive/Colab Notebooks/DATOS/'  # carpeta donde se encuentran los datasets
else:
    DATOS_DIR   = '../../Datos/' # carpeta LOCAL donde se encuentran los datasets

In [2]:
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import chardet

# Set the option to opt-in to the future behavior
pd.set_option('future.no_silent_downcasting', True)

In [3]:
nomArch = DATOS_DIR + 'Premios2020.csv'
#-- detectando la codificación de caracteres usada ----
with open(nomArch, 'rb') as f:
    result = chardet.detect(f.read())  # or readline if the file is large

df= pd.read_csv(nomArch, encoding=result['encoding'])

# Datos faltantes

**Completando los datos faltantes del atributo release**

In [5]:
moda = df['release'].mode()
df['release'] = df['release'].replace([np.nan], moda)

df['release'].value_counts()

release
December     32
January      25
November     22
February     22
October      20
June         12
September    11
April         9
July          9
August        8
March         8
May           8
Name: count, dtype: int64

In [6]:
df

Unnamed: 0,Year,Age,Actor,Sex,Film,nominations,rating,duration,genre1,genre2,release,synopsis
0,1928,44,Emil Jannings,M,The Last Command,2.0,8.0,88,Drama,History,April,A former Imperial Russian general and cousin o...
1,1928,22,Laura Gainor (aka Janet Gaynor),F,Sunrise,5.0,7.8,110,Drama,Romance,December,A street cleaner saves a young womans life and...
2,1929,37,Mary Pickford,F,Coquette,1.0,7.3,76,Drama,Romance,April,A flirtatious southern belle is compromised wi...
3,1929,38,Warner Baxter,M,In Old Arizona,5.0,5.8,95,Romance,Western,January,A charming happy-go-lucky bandit in old Arizon...
4,1930,62,George Arliss,M,Disraeli,3.0,6.5,90,Biography,Drama,November,Prime Minister of Great Britain Benjamin Disra...
...,...,...,...,...,...,...,...,...,...,...,...,...
181,2018,44,Olivia Colman,F,The Favourite,10.0,7.5,119,Comedy,Drama,December,In the early 18th century England is at war wi...
182,2019,50,Renée Zellweger,F,Judy,2.0,6.8,118,Biography,Drama,February,"Thirty years after starring in ""The Wizard of ..."
183,2019,45,Joaquin Phoenix,M,Joker,11.0,8.5,122,Drama,Thriller,October,Arthur Fleck loves to make people laugh but hi...
184,2020,63,Frances McDormand,F,Nomadland,6.0,7.4,108,Drama,,September,Nomadland es una película estadounidense de dr...


# Numerización

## Numerización como entero único

In [7]:
mapeo = {"release": {"January":1, "February":2, "March":3,"April":4,
                "May":5, "June":6,"July":7, "August":8, "September":9,
                     "October":10, "November":11, "December":12}}

df.replace(mapeo, inplace=True)
print(df['release'].describe())


count     186
unique     12
top        12
freq       32
Name: release, dtype: int64


In [8]:
df

Unnamed: 0,Year,Age,Actor,Sex,Film,nominations,rating,duration,genre1,genre2,release,synopsis
0,1928,44,Emil Jannings,M,The Last Command,2.0,8.0,88,Drama,History,4,A former Imperial Russian general and cousin o...
1,1928,22,Laura Gainor (aka Janet Gaynor),F,Sunrise,5.0,7.8,110,Drama,Romance,12,A street cleaner saves a young womans life and...
2,1929,37,Mary Pickford,F,Coquette,1.0,7.3,76,Drama,Romance,4,A flirtatious southern belle is compromised wi...
3,1929,38,Warner Baxter,M,In Old Arizona,5.0,5.8,95,Romance,Western,1,A charming happy-go-lucky bandit in old Arizon...
4,1930,62,George Arliss,M,Disraeli,3.0,6.5,90,Biography,Drama,11,Prime Minister of Great Britain Benjamin Disra...
...,...,...,...,...,...,...,...,...,...,...,...,...
181,2018,44,Olivia Colman,F,The Favourite,10.0,7.5,119,Comedy,Drama,12,In the early 18th century England is at war wi...
182,2019,50,Renée Zellweger,F,Judy,2.0,6.8,118,Biography,Drama,2,"Thirty years after starring in ""The Wizard of ..."
183,2019,45,Joaquin Phoenix,M,Joker,11.0,8.5,122,Drama,Thriller,10,Arthur Fleck loves to make people laugh but hi...
184,2020,63,Frances McDormand,F,Nomadland,6.0,7.4,108,Drama,,9,Nomadland es una película estadounidense de dr...


## Numerización binaria

In [None]:
# atributo sexo con codificación binaria
NuevasColumnas = pd.get_dummies(df['Sex'], prefix= 'Sex', drop_first=True).astype(int)

# Agregamos las nuevas columnas al DataFrame
df = pd.concat([NuevasColumnas, df], axis=1)

# Borramos la columna anterior
df.drop(['Sex'],axis=1, inplace=True)

df

Unnamed: 0,Sex_M,Year,Age,Actor,Film,nominations,rating,duration,genre1,genre2,release,synopsis
0,1,1928,44,Emil Jannings,The Last Command,2.0,8.0,88,Drama,History,4,A former Imperial Russian general and cousin o...
1,0,1928,22,Laura Gainor (aka Janet Gaynor),Sunrise,5.0,7.8,110,Drama,Romance,12,A street cleaner saves a young womans life and...
2,0,1929,37,Mary Pickford,Coquette,1.0,7.3,76,Drama,Romance,4,A flirtatious southern belle is compromised wi...
3,1,1929,38,Warner Baxter,In Old Arizona,5.0,5.8,95,Romance,Western,1,A charming happy-go-lucky bandit in old Arizon...
4,1,1930,62,George Arliss,Disraeli,3.0,6.5,90,Biography,Drama,11,Prime Minister of Great Britain Benjamin Disra...
...,...,...,...,...,...,...,...,...,...,...,...,...
181,0,2018,44,Olivia Colman,The Favourite,10.0,7.5,119,Comedy,Drama,12,In the early 18th century England is at war wi...
182,0,2019,50,Renée Zellweger,Judy,2.0,6.8,118,Biography,Drama,2,"Thirty years after starring in ""The Wizard of ..."
183,1,2019,45,Joaquin Phoenix,Joker,11.0,8.5,122,Drama,Thriller,10,Arthur Fleck loves to make people laugh but hi...
184,0,2020,63,Frances McDormand,Nomadland,6.0,7.4,108,Drama,,9,Nomadland es una película estadounidense de dr...
