# Instalando Dependecias

In [None]:
pip install rdflib

In [None]:
pip install pandas

In [None]:
pip install SPARQLWrapper

In [None]:
pip install pyvis

# Importaciones

In [None]:
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON, N3
from pprint import pprint
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
import pyvis
from pyvis.network import Network
import networkx as nx
import json

# Desarrollo

## 1. Filtrando los datos de la película (usando SPARQL)

In [None]:
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detallePelicula(titulo):
    sparql.setQuery('''
        SELECT ?name ?director ?country ?starring ?abstract ?studio ?producer ?music
        WHERE { dbr:'''+titulo+''' dbp:name ?name .
                dbr:'''+titulo+''' dbo:director ?director .
                dbr:'''+titulo+''' dbp:country ?country .
                dbr:'''+titulo+''' dbp:starring ?starring .
                dbr:'''+titulo+''' dbo:abstract ?abstract .
                dbr:'''+titulo+''' dbp:studio ?studio .
                dbr:'''+titulo+''' dbp:producer ?producer .
                dbr:'''+titulo+''' dbp:music ?music .

                FILTER (lang(?abstract) = 'en')
        }''')
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    
    diccionarios = []
    for res in qres["results"]["bindings"]:
        dic ={
            "name":res["name"]["value"],
            "director":res["director"]["value"].split("/")[-1],
            "country":res["country"]["value"],
            "abstract":res["abstract"]["value"],
            "starring":res["starring"]["value"].split("/")[-1],
            "studio":res["studio"]["value"].split("/")[-1],
            "producer":res["producer"]["value"].split("/")[-1],
            "music":res["music"]["value"].split("/")[-1],
        }
        diccionarios.append(dic)
    print(len(diccionarios))
    
    dics_string = json.dumps(diccionarios)  
    a_json = json.loads(dics_string)
    dataframe = pd.DataFrame.from_dict(a_json)
    dataframe.drop_duplicates()
    
    return dataframe

In [None]:
def nuevo(titulo):
    dataframe = detallePelicula(titulo)
    dataframe.reset_index()
    dataframe.drop_duplicates()
    return dataframe

def enriquecer(dataframe,titulo):
    df=dataframe.append(detallePelicula(titulo))
    df.reset_index()
    df.drop_duplicates()
    return df

## 2. Filtrando los datos de los actores (usando SPARQL)

In [None]:
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
def detalleActor(nombre):
    sparql.setQuery('''
        SELECT ?name ?birthDate ?birthPlace ?abstract
        WHERE { 
                dbr:'''+nombre+''' dbp:name ?name .
                dbr:'''+nombre+''' dbp:birthDate ?birthDate .
                dbr:'''+nombre+''' dbp:birthPlace ?birthPlace .
                dbr:'''+nombre+''' dbo:abstract ?abstract .

                FILTER (lang(?abstract) = 'en')
        }''')
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    
    diccionarios = []
    for res in qres["results"]["bindings"]:
        dic ={
            "name":res["name"]["value"].split("/")[-1],
            "birthDate":res["birthDate"]["value"],
            "abstract":res["abstract"]["value"],
            "birthPlace":res["birthPlace"]["value"].split("/")[-1],
        }
        diccionarios.append(dic)
    print(len(diccionarios))
    
    dics_string = json.dumps(diccionarios)  
    a_json = json.loads(dics_string)
    dataframe = pd.DataFrame.from_dict(a_json)
    dataframe.drop_duplicates()
    
    return dataframe

In [None]:
def nuevoActor(nombre):
    dataframe = detalleActor(nombre)
    dataframe.reset_index()
    dataframe.drop_duplicates()
    return dataframe

def enriquecerActor(dataframe,nombre):
    df=dataframe.append(detalleActor(nombre))
    df.reset_index()
    df.drop_duplicates()
    return df

## 3. Método de Enriquecimiento Semántico

### 3.1 Enriquecimiento Películas

In [None]:
dataframe = nuevo ("Cars_2")

In [None]:
dataframe = enriquecer(dataframe,"Toy_Story_3")

In [None]:
dataframe = enriquecer(dataframe,"The_Good_Dinosaur")

In [None]:
dataframe = enriquecer(dataframe,"The_Lizzie_McGuire_Movie")

In [None]:
dataframe = enriquecer(dataframe,"Wild_Hogs")

In [None]:
dataframe = enriquecer(dataframe,"Forrest_Gump")

In [None]:
dataframe.head(50)

### 3.2 Realizando búsqueda actores de las películas

In [None]:
dataframe['only_starring'] = dataframe['starring'].str.replace(' ', '_')
dataframe.head(50)

In [None]:
dataframe.iloc[:1 , :]['only_starring'][0]

In [None]:
df_actor = nuevoActor( dataframe.iloc[:1 , :]['only_starring'][0] )

In [None]:
for actor in dataframe.iloc[1: , :]['only_starring'].unique():
    try:
        df_actor = enriquecerActor(df_actor, actor)
    except:  
        print(actor)

In [None]:
dataframe.iloc[1: , :]['only_starring'].unique()

In [None]:
df_actor.rename( columns={'name':'actorName'}, inplace=True )

In [None]:
df_actor.head(100)

### 3.3 Uniendo Tabla Película - Personajes

In [None]:
dataframe = pd.merge( dataframe, df_actor, how='left', left_on='starring', right_on='actorName'  )

In [None]:
dataframe.head(50)

In [None]:
dataframe.fillna(-1, inplace=True)

In [None]:
dataframe.head(200)

## Relación Actor - Pelicula - Edad - Estudio - LugarNacimiento

In [None]:
def grafo(df):
    G = nx.Graph()

    for a in df.itertuples():
        if a.starring != "":
            G.add_node(a.starring, size="20",labelHighlightBold=True)
            G.add_edge(a.starring, a.name)
            G.add_edge(a.name, a.country)
            G.add_edge(a.name, a.studio)
            G.add_edge(a.name, a.producer)
            G.add_edge(a.name, a.music)
            G.add_edge(a.starring, a.birthDate)
            G.add_edge(a.starring, a.birthPlace)

    
    net = Network(notebook=True,directed=True)

    net.from_nx(G)
    #net.show_buttons()
    net.repulsion(node_distance=100, spring_length=100,central_gravity=0.5)
    """
    pos = nx.spring_layout(G)
    nx.draw(G, pos, node_size=50, node_color='blue', font_size=5, font_weight='bold', with_labels=True)
    plt.savefig('plotgraph.png', dpi=1000, bbox_inches='tight')
    plt.show()
    """
    return net.show("example.html")

In [None]:
dataframe.head(100)
grafo(dataframe)