# Instalando Dependecias

In [None]:
pip install rdflib

In [None]:
pip install pandas

In [None]:
pip install SPARQLWrapper

In [None]:
pip install pyvis

# Importaciones

In [1]:
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON, N3
from pprint import pprint
import pandas as pd

In [2]:
import pyvis
from pyvis.network import Network
import networkx as nx
import json

# Desarrollo

## 1. Filtrando los datos de la película (usando SPARQL)

In [3]:
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detallePelicula(titulo):
    sparql.setQuery('''
        SELECT ?name ?director ?country ?starring ?abstract ?studio ?producer ?music
        WHERE { dbr:'''+titulo+''' dbp:name ?name .
                dbr:'''+titulo+''' dbo:director ?director .
                dbr:'''+titulo+''' dbp:country ?country .
                dbr:'''+titulo+''' dbp:starring ?starring .
                dbr:'''+titulo+''' dbo:abstract ?abstract .
                dbr:'''+titulo+''' dbp:studio ?studio .
                dbr:'''+titulo+''' dbp:producer ?producer .
                dbr:'''+titulo+''' dbp:music ?music .

                FILTER (lang(?abstract) = 'en')
        }''')
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    
    diccionarios = []
    for res in qres["results"]["bindings"]:
        dic ={
            "name":res["name"]["value"],
            "director":res["director"]["value"].split("/")[-1],
            "country":res["country"]["value"],
            "abstract":res["abstract"]["value"],
            "starring":res["starring"]["value"].split("/")[-1],
            "studio":res["studio"]["value"].split("/")[-1],
            "producer":res["producer"]["value"].split("/")[-1],
            "music":res["music"]["value"].split("/")[-1],
        }
        diccionarios.append(dic)
    print(len(diccionarios))
    
    dics_string = json.dumps(diccionarios)  
    a_json = json.loads(dics_string)
    dataframe = pd.DataFrame.from_dict(a_json)
    dataframe.drop_duplicates()
    
    return dataframe

In [4]:
def nuevo(titulo):
    dataframe = detallePelicula(titulo)
    dataframe.reset_index()
    dataframe.drop_duplicates()
    return dataframe

def enriquecer(dataframe,titulo):
    df=dataframe.append(detallePelicula(titulo))
    df.reset_index()
    df.drop_duplicates()
    return df

In [5]:
detallePelicula("Cars_2")

21


Unnamed: 0,name,director,country,abstract,starring,studio,producer,music
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,,,Denise Ream,Michael_Giacchino
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Emily Mortimer,,Denise Ream,Michael_Giacchino
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Larry the Cable Guy,,Denise Ream,Michael_Giacchino
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Michael Caine,,Denise Ream,Michael_Giacchino
5,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Owen Wilson,,Denise Ream,Michael_Giacchino
6,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,John Turturro,,Denise Ream,Michael_Giacchino
7,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,Walt Disney Pictures,Denise Ream,Michael_Giacchino
8,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,,Walt Disney Pictures,Denise Ream,Michael_Giacchino
9,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Emily Mortimer,Walt Disney Pictures,Denise Ream,Michael_Giacchino


## 2. Filtrando los datos de los actores (usando SPARQL)

In [6]:
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detalleActor(nombre):
    sparql.setQuery('''
        SELECT ?name ?birthDate ?birthPlace ?abstract
        WHERE { 
                dbr:'''+nombre+''' dbp:name ?name .
                dbr:'''+nombre+''' dbp:birthDate ?birthDate .
                dbr:'''+nombre+''' dbp:birthPlace ?birthPlace .
                dbr:'''+nombre+''' dbo:abstract ?abstract .

                FILTER (lang(?abstract) = 'en')
        }''')
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    
    diccionarios = []
    for res in qres["results"]["bindings"]:
        dic ={
            "name":res["name"]["value"].split("/")[-1],
            "birthDate":res["birthDate"]["value"],
            "abstract":res["abstract"]["value"],
            "birthPlace":res["birthPlace"]["value"].split("/")[-1],
        }
        diccionarios.append(dic)
    print(len(diccionarios))
    
    dics_string = json.dumps(diccionarios)  
    a_json = json.loads(dics_string)
    dataframe = pd.DataFrame.from_dict(a_json)
    dataframe.drop_duplicates()
    
    return dataframe

In [7]:
def nuevoActor(nombre):
    dataframe = detalleActor(nombre)
    dataframe.reset_index()
    dataframe.drop_duplicates()
    return dataframe

def enriquecerActor(dataframe,nombre):
    df=dataframe.append(detalleActor(nombre))
    df.reset_index()
    df.drop_duplicates()
    return df

In [8]:
detalleActor("Eddie_Izzard")

2


Unnamed: 0,name,birthDate,abstract,birthPlace
0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony


## 3. Método de Enriquecimiento Semántico

### 3.1 Enriquecimiento Películas

In [9]:
dataframe = nuevo ("Cars_2")

21


In [10]:
dataframe = enriquecer(dataframe,"Toy_Story_3")

36


  df=dataframe.append(detallePelicula(titulo))


In [11]:
dataframe = enriquecer(dataframe,"The_Good_Dinosaur")

54


  df=dataframe.append(detallePelicula(titulo))


In [12]:
dataframe.shape

(111, 8)

In [13]:
dataframe.head(5)

Unnamed: 0,name,director,country,abstract,starring,studio,producer,music
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,,,Denise Ream,Michael_Giacchino
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Emily Mortimer,,Denise Ream,Michael_Giacchino
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Larry the Cable Guy,,Denise Ream,Michael_Giacchino
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Michael Caine,,Denise Ream,Michael_Giacchino


### 3.2 Realizando búsqueda actores de las películas

In [29]:
dataframe['new_starring'] = dataframe['starring'].str.replace(' ', '_')
dataframe.head(5)

Unnamed: 0,name,director,country,abstract_x,starring,studio,producer,music,new_starring,actorName,birthDate,abstract_y,birthPlace,birthYear
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony,1962.0
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony,1962.0
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,,,Denise Ream,Michael_Giacchino,,-1,-1,-1,-1,-1.0


In [30]:
dataframe.iloc[:1 , :]['new_starring'][0]

'Eddie_Izzard'

In [31]:
nuevoActor("Eddie_Izzard")

2


Unnamed: 0,name,birthDate,abstract,birthPlace
0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony


In [32]:
df_persona = nuevoActor( dataframe.iloc[:1 , :]['new_starring'][0] )

2


In [33]:
df_persona

Unnamed: 0,name,birthDate,abstract,birthPlace
0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony


In [34]:
for actor in dataframe.iloc[1: , :]['new_starring'].unique():
    try:
        df_persona = enriquecerActor(df_persona, actor)
    except:  
        print(actor)

2


  df=dataframe.append(detalleActor(nombre))


0


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


0


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


0


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


1


  df=dataframe.append(detalleActor(nombre))


In [35]:
dataframe.iloc[1: , :]['new_starring'].unique()

array(['Eddie_Izzard', '', 'Emily_Mortimer', 'Larry_the_Cable_Guy',
       'Michael_Caine', 'Owen_Wilson', 'John_Turturro', 'Tom_Hanks',
       'John_Morris', 'Ned_Beatty', 'Tim_Allen', 'Wallace_Shawn',
       'Joan_Cusack', 'Michael_Keaton', 'Jodi_Benson', 'Estelle_Harris',
       'Don_Rickles', 'John_Ratzenberger', 'Sam_Elliott', 'Jack_Bright',
       'Anna_Paquin', 'Frances_McDormand', 'Steve_Zahn', 'A._J._Buckley',
       'Jeffrey_Wright', 'Raymond_Ochoa'], dtype=object)

In [36]:
df_persona.rename( columns={'name':'actorName'}, inplace=True )

In [37]:
df_persona.head(5)

Unnamed: 0,actorName,birthDate,abstract,birthPlace
0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
0,Emily Mortimer,1971-10-06,Emily Kathleen Anne Mortimer (born 6 October 1...,"Hammersmith, London, England"


### 3.3 Uniendo Tabla Película - Personajes

In [38]:
dataframe = pd.merge( dataframe, df_persona, how='left', left_on='starring', right_on='actorName'  )

In [39]:
dataframe.head(5)

Unnamed: 0,name,director,country,abstract_x,starring,studio,producer,music,new_starring,actorName_x,birthDate_x,abstract_y,birthPlace_x,birthYear,actorName_y,birthDate_y,abstract,birthPlace_y
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden


In [40]:
dataframe.fillna(-1, inplace=True)

In [41]:
dataframe.shape

(156, 18)

In [42]:
dataframe.head(5)

Unnamed: 0,name,director,country,abstract_x,starring,studio,producer,music,new_starring,actorName_x,birthDate_x,abstract_y,birthPlace_x,birthYear,actorName_y,birthDate_y,abstract,birthPlace_y
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,1962-02-07 00:00:00,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony,1962.0,Eddie Izzard,1962-02-07,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden


In [45]:
dataframe.drop(dataframe.columns[[10, 11, 12, 13, 14, 15]], axis=1) 

Unnamed: 0,name,director,country,abstract_x,starring,studio,producer,music,new_starring,actorName_x,abstract,birthPlace_y
0,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
1,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
2,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
3,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden_Colony
4,Cars 2,John_Lasseter,United States,Cars 2 is a 2011 American computer-animated sp...,Eddie Izzard,,Denise Ream,Michael_Giacchino,Eddie_Izzard,Eddie Izzard,Eddie Izzard (/ˈɪzɑːrd/; born 7 February 1962)...,Aden
...,...,...,...,...,...,...,...,...,...,...,...,...
151,The Good Dinosaur,Peter_Sohn,United States,The Good Dinosaur is a 2015 American computer-...,Frances McDormand,Warner Bros. Eastwood Scoring Stage in Burbank...,Chris Montan,Jeff Danna,Frances_McDormand,Frances McDormand,Frances Louise McDormand (born Cynthia Ann Smi...,"Gibson City, Illinois, U.S."
152,The Good Dinosaur,Peter_Sohn,United States,The Good Dinosaur is a 2015 American computer-...,Steve Zahn,Warner Bros. Eastwood Scoring Stage in Burbank...,Chris Montan,Jeff Danna,Steve_Zahn,Steve Zahn,"Steven James Zahn (/zɑːn/; born November 13, 1...","Marshall, Minnesota, U.S."
153,The Good Dinosaur,Peter_Sohn,United States,The Good Dinosaur is a 2015 American computer-...,A. J. Buckley,Warner Bros. Eastwood Scoring Stage in Burbank...,Chris Montan,Jeff Danna,A._J._Buckley,A. J. Buckley,"Alan John ""A. J."" Buckley (born February 9, 19...","Dublin, Ireland"
154,The Good Dinosaur,Peter_Sohn,United States,The Good Dinosaur is a 2015 American computer-...,Jeffrey Wright,Warner Bros. Eastwood Scoring Stage in Burbank...,Chris Montan,Jeff Danna,Jeffrey_Wright,Jeffrey Wright,"Jeffrey Wright (born December 7, 1965) is an A...","Washington, D.C., U.S."


In [None]:
dataframe.head(5)

## Relación Actor - Película - País de Origen - Estudio - Productor - Productor Musical

In [None]:
def grafo(df):
    G = nx.Graph()

    for a in df.itertuples():
        if a.starring != "":
            G.add_node(a.starring, size="20",labelHighlightBold=True)
            G.add_edge(a.starring, a.name)
            G.add_edge(a.name, a.country)
            G.add_edge(a.name, a.studio)
            G.add_edge(a.name, a.producer)
            G.add_edge(a.name, a.music)

    
    net = Network(notebook=True,directed=True)

    net.from_nx(G)
    #net.show_buttons()
    net.repulsion(node_distance=100, spring_length=100,central_gravity=0.5)
    return net.show("example.html")