<a href="https://colab.research.google.com/github/baizhankyzy/female-directors/blob/main/Project_new.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

RDF GRaph

In [None]:
from rdflib import ConjunctiveGraph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, RDFS
from google.colab import files
import requests
from urllib.parse import quote

# Fetch JSON data
url = "https://raw.githubusercontent.com/baizhankyzy/female-directors/refs/heads/main/finaldataset.json"
response = requests.get(url)
data = response.json()['results']['bindings']  # Access the bindings list

# Initialize a ConjunctiveGraph
g = ConjunctiveGraph()

# Define namespaces
wd = Namespace("http://www.wikidata.org/entity/")
schema = Namespace("http://schema.org/")

# Add data to the graph
for item in data:
    # Extract relevant details
    film = URIRef(item['film']['value'])
    director = URIRef(item['director']['value'])
    release_date = Literal(item['releaseDate']['value'])

    # Encode genre and country to ensure valid URIs
    genre = item.get('genreLabel', {}).get('value', "Unknown Genre")
    genre_uri = URIRef(f"http://example.org/genre/{quote(genre)}")

    country = item.get('countryLabel', {}).get('value', "Unknown Country")
    country_uri = URIRef(f"http://example.org/country/{quote(country)}")

    # Add RDF triples
    g.add((film, RDF.type, wd.Q11424))  # Film type
    g.add((film, schema.director, director))
    g.add((film, schema.releaseDate, release_date))
    g.add((film, schema.genre, genre_uri))
    g.add((film, schema.countryOfOrigin, country_uri))
    g.add((genre_uri, RDFS.label, Literal(genre)))
    g.add((country_uri, RDFS.label, Literal(country)))

# Serialize the graph to N-Quads format
output_file = "output.nq"
g.serialize(output_file, format="nquads")

# Download the N-Quads file in Google Colab
files.download(output_file)
print(f"Data saved as {output_file} and ready for download.")