# Creación del Dataset para entrenamiento del Modelo

In [1]:
# Importar librerías
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

In [2]:
# Configuración
np.random.seed(0)
random.seed(0)
n_datos = 5000  # Cantidad de datos en el dataframe

In [3]:
# Generación de datos
ids = np.arange(1, n_datos + 1)
sexos = np.random.choice(['Masculino', 'Femenino'], size=n_datos)
fechas_nacimiento = [(datetime.today() - timedelta(days=random.randint(365*18, 365*70))).strftime('%Y-%m-%d') for _ in range(n_datos)]

In [4]:
# Creación del DataFrame
df = pd.DataFrame({
    'ID': ids,
    'Sexo': sexos,
    'Fecha de nacimiento': fechas_nacimiento
})

In [5]:
df.head()

Unnamed: 0,ID,Sexo,Fecha de nacimiento
0,1,Masculino,1971-11-03
1,2,Femenino,1968-09-01
2,3,Femenino,2002-10-08
3,4,Masculino,1983-03-04
4,5,Femenino,1960-07-13


In [6]:
def zodiaco(fecha):
    fecha = datetime.strptime(fecha, '%Y-%m-%d')
    dia = fecha.day
    mes = fecha.month
    if mes == 12: signo = 'Sagitario' if (dia < 22) else 'Capricornio'
    elif mes == 1: signo = 'Capricornio' if (dia < 20) else 'Acuario'
    elif mes == 2: signo = 'Acuario' if (dia < 19) else 'Piscis'
    elif mes == 3: signo = 'Piscis' if (dia < 21) else 'Aries'
    elif mes == 4: signo = 'Aries' if (dia < 20) else 'Tauro'
    elif mes == 5: signo = 'Tauro' if (dia < 21) else 'Geminis'
    elif mes == 6: signo = 'Geminis' if (dia < 21) else 'Cancer'
    elif mes == 7: signo = 'Cancer' if (dia < 23) else 'Leo'
    elif mes == 8: signo = 'Leo' if (dia < 23) else 'Virgo'
    elif mes == 9: signo = 'Virgo' if (dia < 23) else 'Libra'
    elif mes == 10: signo = 'Libra' if (dia < 23) else 'Escorpio'
    elif mes == 11: signo = 'Escorpio' if (dia < 22) else 'Sagitario'
    return signo


# Agregando el signo zodiacal al DataFrame
df['Signo Zodiacal'] = df['Fecha de nacimiento'].apply(zodiaco)

df.head()

Unnamed: 0,ID,Sexo,Fecha de nacimiento,Signo Zodiacal
0,1,Masculino,1971-11-03,Escorpio
1,2,Femenino,1968-09-01,Virgo
2,3,Femenino,2002-10-08,Libra
3,4,Masculino,1983-03-04,Piscis
4,5,Femenino,1960-07-13,Cancer


In [7]:
generos = ['Acción', 'Aventura', 'Animación', 'Comedia', 'Crimen', 'Documental', 'Drama', 'Familiar', 'Fantasía', 'Historia', 'Horror', 'Musical', 'Misterio', 'Romance', 'Ciencia Ficción', 'Suspense', 'Guerra', 'Western']

gustos_por_signo = {
    'Aries': [('Acción', (70, 100)), ('Aventura', (60, 90)), ('Suspenso', (50, 80)), ('Horror', (40, 70))],
    'Tauro': [('Familiar', (70, 100)), ('Drama', (60, 90)), ('Romance', (50, 80)), ('Musical', (40, 70))],
    'Geminis': [('Comedia', (70, 100)), ('Documental', (60, 90)), ('Misterio', (50, 80)), ('Aventura', (40, 70))],
    'Cancer': [('Familiar', (70, 100)), ('Drama', (60, 90)), ('Romance', (50, 80)), ('Fantasía', (40, 70))],
    'Leo': [('Drama', (70, 100)), ('Acción', (60, 90)), ('Musical', (50, 80)), ('Comedia', (40, 70))],
    'Virgo': [('Documental', (70, 100)), ('Misterio', (60, 90)), ('Drama', (50, 80)), ('Ciencia Ficción', (40, 70))],
    'Libra': [('Romance', (70, 100)), ('Drama', (60, 90)), ('Comedia', (50, 80)), ('Musical', (40, 70))],
    'Escorpio': [('Misterio', (70, 100)), ('Suspense', (60, 90)), ('Crimen', (50, 80)), ('Horror', (40, 70))],
    'Sagitario': [('Aventura', (70, 100)), ('Comedia', (60, 90)), ('Documental', (50, 80)), ('Historia', (40, 70))],
    'Capricornio': [('Historia', (70, 100)), ('Drama', (60, 90)), ('Documental', (50, 80)), ('Guerra', (40, 70))],
    'Acuario': [('Ciencia Ficción', (70, 100)), ('Documental', (60, 90)), ('Fantasía', (50, 80)), ('Misterio', (40, 70))],
    'Piscis': [('Fantasía', (70, 100)), ('Romance', (60, 90)), ('Drama', (50, 80)), ('Ciencia Ficción', (40, 70))],
}

In [8]:
# Función para asignar los gustos
def asignar_gustos(signo):
    gustos = {}
    # Obtener gustos específicos para el signo
    gustos_especificos = gustos_por_signo.get(signo, [])
    generos_especificos = [g[0] for g in gustos_especificos]
    
    for genero in generos:
        if genero in generos_especificos:
            # Asignar rango de gusto específico
            indice = generos_especificos.index(genero)
            gusto_rango = gustos_especificos[indice][1]
            gustos[genero] = np.random.randint(gusto_rango[0], gusto_rango[1]+1) / 100.0
        else:
            # Asignar un gusto aleatorio para los no específicos
            gustos[genero] = np.random.randint(0, 51) / 100.0
    return pd.Series(gustos)

# Aplicar la función para cada usuario basado en su signo zodiacal y unir los resultados al DataFrame original
usuarios_gustos = df['Signo Zodiacal'].apply(asignar_gustos)
usuarios_final = pd.concat([df, usuarios_gustos], axis=1)

In [9]:
usuarios_gustos.head()

Unnamed: 0,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,Familiar,Fantasía,Historia,Horror,Musical,Misterio,Romance,Ciencia Ficción,Suspense,Guerra,Western
0,0.16,0.25,0.3,0.27,0.61,0.4,0.0,0.33,0.25,0.14,0.56,0.34,0.9,0.4,0.5,0.9,0.5,0.43
1,0.38,0.19,0.42,0.34,0.38,0.91,0.7,0.26,0.35,0.27,0.03,0.25,0.88,0.07,0.44,0.04,0.09,0.04
2,0.33,0.49,0.1,0.73,0.48,0.32,0.73,0.21,0.28,0.23,0.48,0.45,0.02,0.7,0.09,0.07,0.29,0.21
3,0.16,0.14,0.19,0.37,0.26,0.12,0.65,0.19,0.88,0.42,0.02,0.16,0.11,0.63,0.49,0.09,0.3,0.22
4,0.42,0.2,0.41,0.19,0.04,0.46,0.75,0.71,0.4,0.47,0.15,0.13,0.09,0.67,0.42,0.1,0.49,0.39


In [10]:
usuarios_final.head()

Unnamed: 0,ID,Sexo,Fecha de nacimiento,Signo Zodiacal,Acción,Aventura,Animación,Comedia,Crimen,Documental,...,Fantasía,Historia,Horror,Musical,Misterio,Romance,Ciencia Ficción,Suspense,Guerra,Western
0,1,Masculino,1971-11-03,Escorpio,0.16,0.25,0.3,0.27,0.61,0.4,...,0.25,0.14,0.56,0.34,0.9,0.4,0.5,0.9,0.5,0.43
1,2,Femenino,1968-09-01,Virgo,0.38,0.19,0.42,0.34,0.38,0.91,...,0.35,0.27,0.03,0.25,0.88,0.07,0.44,0.04,0.09,0.04
2,3,Femenino,2002-10-08,Libra,0.33,0.49,0.1,0.73,0.48,0.32,...,0.28,0.23,0.48,0.45,0.02,0.7,0.09,0.07,0.29,0.21
3,4,Masculino,1983-03-04,Piscis,0.16,0.14,0.19,0.37,0.26,0.12,...,0.88,0.42,0.02,0.16,0.11,0.63,0.49,0.09,0.3,0.22
4,5,Femenino,1960-07-13,Cancer,0.42,0.2,0.41,0.19,0.04,0.46,...,0.4,0.47,0.15,0.13,0.09,0.67,0.42,0.1,0.49,0.39


In [11]:
#usuarios_final.to_csv(r"usuariosfinal.csv", index=False)

In [12]:
# Función para generar una película aleatoria

def generar_pelicula(generos):
    # Ajuste en la distribución de calificaciones para asegurar que sumen 1
    calificacion = np.round(np.clip(np.random.normal(loc=0.80, scale=0.10), 0, 1), 2)
    
    # Seleccionar 2 o 3 géneros al azar sin repetición
    generos_pelicula = np.random.choice(generos, np.random.choice([2, 3], p=[0.7, 0.3]), replace=False)
    
    # Crear diccionario de géneros con 0 y 1
    pertenencia_generos = {genero: 1 if genero in generos_pelicula else 0 for genero in generos}
    
    return calificacion, pertenencia_generos

In [13]:
def evaluar_y_recomendar(row, generos):
    calificacion, generos_pelicula = generar_pelicula(generos)
    gustos_generos = [row[gen] for gen, pertenece in generos_pelicula.items() if pertenece]
    promedio_gustos = np.mean(gustos_generos) if gustos_generos else 0  # Asegurar no dividir por cero

    # Calcular el promedio combinado de gustos y calificación de la película
    if gustos_generos:  # Asegurar que haya géneros seleccionados
        promedio_final = np.round((promedio_gustos + calificacion) / 2, 2)
    else:  # Si no hay géneros (caso teórico), usar solo la calificación de la película
        promedio_final = calificacion

    # Agregar datos de la película recomendada
    for gen in generos:
        row[f'P_{gen}'] = generos_pelicula[gen]
    
    row['Calificación'] = calificacion
    row['Promedio'] = promedio_final
    row['Le Gustó'] = 1 if promedio_final >= 0.6 else 0
    
    return row

In [14]:
# Preparación del DataFrame para aplicar la función
# Asegúrate de que 'usuarios_final' ya contiene las columnas para los gustos de los géneros
usuarios_con_recomendaciones = usuarios_final.apply(lambda row: evaluar_y_recomendar(row, generos), axis=1)

usuarios_con_recomendaciones.head()

Unnamed: 0,ID,Sexo,Fecha de nacimiento,Signo Zodiacal,Acción,Aventura,Animación,Comedia,Crimen,Documental,...,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Promedio,Le Gustó
0,1,Masculino,1971-11-03,Escorpio,0.16,0.25,0.3,0.27,0.61,0.4,...,0,0,0,0,0,0,0,0.75,0.59,0
1,2,Femenino,1968-09-01,Virgo,0.38,0.19,0.42,0.34,0.38,0.91,...,0,0,0,0,0,1,0,0.82,0.66,1
2,3,Femenino,2002-10-08,Libra,0.33,0.49,0.1,0.73,0.48,0.32,...,0,0,0,0,0,0,0,0.72,0.49,0
3,4,Masculino,1983-03-04,Piscis,0.16,0.14,0.19,0.37,0.26,0.12,...,0,0,0,0,0,0,1,0.75,0.5,0
4,5,Femenino,1960-07-13,Cancer,0.42,0.2,0.41,0.19,0.04,0.46,...,0,0,0,0,0,0,0,0.63,0.45,0


In [15]:
#usuarios_con_recomendaciones.to_csv(r"usuariosconrecomendaciones.csv", index=False)

Preparación del Dataset

In [16]:
datasetfinal = usuarios_con_recomendaciones

In [17]:
datasetfinal = datasetfinal.drop('Promedio', axis=1)
datasetfinal = datasetfinal.drop('ID', axis=1)

datasetfinal.head()

Unnamed: 0,Sexo,Fecha de nacimiento,Signo Zodiacal,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,...,P_Horror,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Le Gustó
0,Masculino,1971-11-03,Escorpio,0.16,0.25,0.3,0.27,0.61,0.4,0.0,...,0,0,0,0,0,0,0,0,0.75,0
1,Femenino,1968-09-01,Virgo,0.38,0.19,0.42,0.34,0.38,0.91,0.7,...,0,0,0,0,0,0,1,0,0.82,1
2,Femenino,2002-10-08,Libra,0.33,0.49,0.1,0.73,0.48,0.32,0.73,...,0,0,0,0,0,0,0,0,0.72,0
3,Masculino,1983-03-04,Piscis,0.16,0.14,0.19,0.37,0.26,0.12,0.65,...,0,0,0,0,0,0,0,1,0.75,0
4,Femenino,1960-07-13,Cancer,0.42,0.2,0.41,0.19,0.04,0.46,0.75,...,1,0,0,0,0,0,0,0,0.63,0


In [18]:
# Convertir la columna 'Fecha de nacimiento' a datetime
datasetfinal['Fecha de nacimiento'] = pd.to_datetime(datasetfinal['Fecha de nacimiento'])

# Calcular la edad
hoy = pd.Timestamp('now')
datasetfinal['Edad'] = hoy.year - datasetfinal['Fecha de nacimiento'].dt.year - ((hoy.month < datasetfinal['Fecha de nacimiento'].dt.month) | ((hoy.month == datasetfinal['Fecha de nacimiento'].dt.month) & (hoy.day < datasetfinal['Fecha de nacimiento'].dt.day)))

# Eliminar la columna 'Fecha de nacimiento'
datasetfinal.drop('Fecha de nacimiento', axis=1, inplace=True)

# Mostrar las primeras filas para verificar los cambios
datasetfinal.head()


Unnamed: 0,Sexo,Signo Zodiacal,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,Familiar,...,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Le Gustó,Edad
0,Masculino,Escorpio,0.16,0.25,0.3,0.27,0.61,0.4,0.0,0.33,...,0,0,0,0,0,0,0,0.75,0,52
1,Femenino,Virgo,0.38,0.19,0.42,0.34,0.38,0.91,0.7,0.26,...,0,0,0,0,0,1,0,0.82,1,55
2,Femenino,Libra,0.33,0.49,0.1,0.73,0.48,0.32,0.73,0.21,...,0,0,0,0,0,0,0,0.72,0,21
3,Masculino,Piscis,0.16,0.14,0.19,0.37,0.26,0.12,0.65,0.19,...,0,0,0,0,0,0,1,0.75,0,41
4,Femenino,Cancer,0.42,0.2,0.41,0.19,0.04,0.46,0.75,0.71,...,0,0,0,0,0,0,0,0.63,0,63


In [19]:
# Mover la columna 'Edad' al inicio del DataFrame
columna_edad = datasetfinal.pop('Edad')  # Extraer la columna de Edad
datasetfinal.insert(0, 'Edad', columna_edad)  # Insertarla en la primera posición

# Mostrar las primeras filas para verificar el cambio
datasetfinal.head()

Unnamed: 0,Edad,Sexo,Signo Zodiacal,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,...,P_Horror,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Le Gustó
0,52,Masculino,Escorpio,0.16,0.25,0.3,0.27,0.61,0.4,0.0,...,0,0,0,0,0,0,0,0,0.75,0
1,55,Femenino,Virgo,0.38,0.19,0.42,0.34,0.38,0.91,0.7,...,0,0,0,0,0,0,1,0,0.82,1
2,21,Femenino,Libra,0.33,0.49,0.1,0.73,0.48,0.32,0.73,...,0,0,0,0,0,0,0,0,0.72,0
3,41,Masculino,Piscis,0.16,0.14,0.19,0.37,0.26,0.12,0.65,...,0,0,0,0,0,0,0,1,0.75,0
4,63,Femenino,Cancer,0.42,0.2,0.41,0.19,0.04,0.46,0.75,...,1,0,0,0,0,0,0,0,0.63,0


In [20]:
# Eliminar la columna 'Signo Zodiacal'
datasetfinal = datasetfinal.drop('Signo Zodiacal', axis=1)

datasetfinal.head()

Unnamed: 0,Edad,Sexo,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,Familiar,...,P_Horror,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Le Gustó
0,52,Masculino,0.16,0.25,0.3,0.27,0.61,0.4,0.0,0.33,...,0,0,0,0,0,0,0,0,0.75,0
1,55,Femenino,0.38,0.19,0.42,0.34,0.38,0.91,0.7,0.26,...,0,0,0,0,0,0,1,0,0.82,1
2,21,Femenino,0.33,0.49,0.1,0.73,0.48,0.32,0.73,0.21,...,0,0,0,0,0,0,0,0,0.72,0
3,41,Masculino,0.16,0.14,0.19,0.37,0.26,0.12,0.65,0.19,...,0,0,0,0,0,0,0,1,0.75,0
4,63,Femenino,0.42,0.2,0.41,0.19,0.04,0.46,0.75,0.71,...,1,0,0,0,0,0,0,0,0.63,0


In [21]:
datasetfinal['Sexo'] = datasetfinal['Sexo'].map({'Masculino': 1, 'Femenino': 2})

In [22]:
datasetfinal.head()

Unnamed: 0,Edad,Sexo,Acción,Aventura,Animación,Comedia,Crimen,Documental,Drama,Familiar,...,P_Horror,P_Musical,P_Misterio,P_Romance,P_Ciencia Ficción,P_Suspense,P_Guerra,P_Western,Calificación,Le Gustó
0,52,1,0.16,0.25,0.3,0.27,0.61,0.4,0.0,0.33,...,0,0,0,0,0,0,0,0,0.75,0
1,55,2,0.38,0.19,0.42,0.34,0.38,0.91,0.7,0.26,...,0,0,0,0,0,0,1,0,0.82,1
2,21,2,0.33,0.49,0.1,0.73,0.48,0.32,0.73,0.21,...,0,0,0,0,0,0,0,0,0.72,0
3,41,1,0.16,0.14,0.19,0.37,0.26,0.12,0.65,0.19,...,0,0,0,0,0,0,0,1,0.75,0
4,63,2,0.42,0.2,0.41,0.19,0.04,0.46,0.75,0.71,...,1,0,0,0,0,0,0,0,0.63,0


In [23]:
datasetfinal.to_csv(r"datasetrecomendadorpeliculas.csv", index=False)