Graph Mutations with the DataFrameImporter
==========================================

New feature added to DataFrameImporter: graph mutation operations (insert, update, upsert, delete)

In [None]:
import os

import dask.dataframe
import pandas
from katana import remote
from katana.remote.import_data import DataFrameImporter

# Connect to the Katana Server
client = remote.Client()

print(f"katana version: {client.server_version}")

__Breaking API Change:__ Users are now required to declare the operation in all DataFrameImporter contexts.

### Operation: Insert
Create a new graph and add movie nodes with votes as a node property 

In [None]:
# Construct a graph with 2 paritions
g = client.create_graph(num_partitions=2)


# Add movies as nodes
# Add votes as node property to the movies as part of the same dataframe
movies = pandas.DataFrame(
    {
        "name": [
            "The Matrix",
            "The Matrix Reloaded",
            "The Matrix Revolutions",
            "The Devil's Advocate",
            "A Few Good Men",
            "Top Gun",
            "Jerry Maguire",
            "Stand By Me",
            "As Good as It Gets",
            "What Dreams May Come",
            "Snow Falling on Cedars",
            "You've Got Mail",
            "Sleepless in Seattle",
            "Joe Versus the Volcano",
            "When Harry Met Sally",
            "That Thing You Do",
            "The Replacements",
            "RescueDawn",
            "The Birdcage",
            "Unforgiven",
            "Johnny Mnemonic",
            "Cloud Atlas",
            "The Da Vinci Code",
            "V for Vendetta",
            "Speed Racer",
            "Ninja Assassin",
            "The Green Mile",
            "Frost/Nixon",
            "Hoffa",
            "Apollo 13",
            "Twister",
            "Cast Away",
            "One Flew Over the Cuckoo's Nest",
            "Something's Gotta Give",
            "Bicentennial Man",
            "Charlie Wilson's War",
            "The Polar Express",
            "A League of Their Own",
        ],
        "votes": [
            2762,
            594,
            608,
            86,
            200,
            281,
            72,
            38,
            55,
            17,
            21,
            21,
            34,
            14,
            10,
            8,
            8,
            11,
            22,
            10,
            25,
            82,
            23,
            22,
            5,
            7,
            10,
            0,
            12,
            40,
            19,
            61,
            5,
            16,
            11,
            8,
            22,
            30,
        ],
    }
)


with DataFrameImporter(g) as df:
    df.nodes_dataframe(movies, id_space="movie", id_column="name")

    # insert into an empty graph used to be the implicit, now that other
    # modes are supported you have to declare this explicitly
    df.insert()

assert g.num_nodes() == len(movies)

__New Feature:__ we can declare other operation types to manipulate data

### Operation: Update
Add more properties to the movie nodes 

In [None]:
# add a new properties to existing nodes by just adding them to the datafame and updating the graph.
movies["released"] = [
    1999,
    2003,
    2003,
    1997,
    1992,
    1986,
    2000,
    1986,
    1997,
    1998,
    1999,
    1998,
    1993,
    1990,
    1998,
    1996,
    2000,
    2006,
    1996,
    1992,
    1995,
    2012,
    2006,
    2006,
    2008,
    2009,
    1999,
    2008,
    1992,
    1995,
    1996,
    2000,
    1975,
    2003,
    1999,
    2007,
    2004,
    1992,
]
movies["released"] = movies["released"].astype("int32")


movies["tagline"] = [
    "Welcome to the Real World",
    "Free your mind",
    "Everything that has a beginning has an end",
    "Evil has its winning ways",
    "In the heart of the nation's capital, in a courthouse of the U.S. government, one man will stop at nothing to keep his honor, and one will stop at nothing to find the truth.",
    "I feel the need, the need for speed.",
    "The rest of his life begins now.",
    "For some, it's the last real taste of innocence, and the first real taste of life. But for everyone, it's the time that memories are made of.",
    "A comedy from the heart that goes for the throat.",
    "After life there is more. The end is just the beginning.",
    "First loves last. Forever.",
    "At odds in life... in love on-line.",
    "What if someone you never met, someone you never saw, someone you never knew was the only someone for you?",
    "A story of love, lava and burning desire.",
    "Can two friends sleep together and still love each other in the morning?",
    "In every life there comes a time when that thing you dream becomes that thing you do",
    "Pain heals, Chicks dig scars... Glory lasts forever",
    "Based on the extraordinary true story of one man's fight for freedom",
    "Come as you are",
    "It's a hell of a thing, killing a man",
    "The hottest data on earth. In the coolest head in town",
    "Everything is connected",
    "Break The Codes",
    "Freedom! Forever!",
    "Speed has no limits",
    "Prepare to enter a secret world of assassins",
    "Walk a mile you'll never forget.",
    "400 million people were waiting for the truth.",
    "He didn't want law. He wanted justice.",
    "Houston, we have a problem.",
    "Don't Breathe. Don't Look Back.",
    "At the edge of the world, his journey begins.",
    "If he's crazy, what does that make you?",
    "null",
    "One robot's 200 year journey to become an ordinary man.",
    "A stiff drink. A little mascara. A lot of nerve. Who said they couldn't bring down the Soviet empire.",
    "This Holiday Season… Believe",
    "Once in a lifetime you get a chance to do something different.",
]
movies["tagline"] = movies["tagline"].astype("string")

with DataFrameImporter(g) as df:
    df.nodes_dataframe(movies, id_space="movie", id_column="name")
    df.update()

result = g.query("match (n:movie) return n")

### Operation: Insert
Add **new** node type Person to the same graph

In [None]:
# Add person as nodes
persons = pandas.DataFrame(
    {
        "name": [
            "Keanu Reeves",
            "Carrie-Anne Moss",
            "Laurence Fishburne",
            "Hugo Weaving",
            "Lilly Wachowski",
            "Lana Wachowski",
            "Joel Silver",
            "Emil Eifrem",
            "Charlize Theron",
            "Al Pacino",
            "Taylor Hackford",
            "Tom Cruise",
            "Jack Nicholson",
            "Demi Moore",
            "Kevin Bacon",
            "Kiefer Sutherland",
            "Noah Wyle",
            "Cuba Gooding Jr.",
            "Kevin Pollak",
            "J.T. Walsh",
            "James Marshall",
            "Christopher Guest",
            "Rob Reiner",
            "Aaron Sorkin",
            "Kelly McGillis",
            "Val Kilmer",
            "Anthony Edwards",
            "Tom Skerritt",
            "Meg Ryan",
            "Tony Scott",
            "Jim Cash",
            "Renee Zellweger",
            "Kelly Preston",
            "Jerry O'Connell",
            "Jay Mohr",
            "Bonnie Hunt",
            "Regina King",
            "Jonathan Lipnicki",
            "Cameron Crowe",
            "River Phoenix",
            "Corey Feldman",
            "Wil Wheaton",
            "John Cusack",
            "Marshall Bell",
            "Helen Hunt",
            "Greg Kinnear",
            "James L. Brooks",
            "Annabella Sciorra",
            "Max von Sydow",
            "Werner Herzog",
            "Robin Williams",
            "Vincent Ward",
            "Ethan Hawke",
            "Rick Yune",
            "James Cromwell",
            "Scott Hicks",
            "Parker Posey",
            "Dave Chappelle",
            "Steve Zahn",
            "Tom Hanks",
            "Nora Ephron",
            "Rita Wilson",
            "Bill Pullman",
            "Victor Garber",
            "Rosie O'Donnell",
            "John Patrick Stanley",
            "Nathan Lane",
            "Billy Crystal",
            "Carrie Fisher",
            "Bruno Kirby",
            "Liv Tyler",
            "Brooke Langton",
            "Gene Hackman",
            "Orlando Jones",
            "Howard Deutch",
            "Christian Bale",
            "Zach Grenier",
            "Mike Nichols",
            "Richard Harris",
            "Clint Eastwood",
            "Takeshi Kitano",
            "Dina Meyer",
            "Ice-T",
            "Robert Longo",
            "Halle Berry",
            "Jim Broadbent",
            "Tom Tykwer",
            "David Mitchell",
            "Stefan Arndt",
            "Ian McKellen",
            "Audrey Tautou",
            "Paul Bettany",
            "Ron Howard",
            "Natalie Portman",
            "Stephen Rea",
            "John Hurt",
            "Ben Miles",
            "Emile Hirsch",
            "John Goodman",
            "Susan Sarandon",
            "Matthew Fox",
            "Christina Ricci",
            "Rain",
            "Naomie Harris",
            "Michael Clarke Duncan",
            "David Morse",
            "Sam Rockwell",
            "Gary Sinise",
            "Patricia Clarkson",
            "Frank Darabont",
            "Frank Langella",
            "Michael Sheen",
            "Oliver Platt",
            "Danny DeVito",
            "John C. Reilly",
            "Ed Harris",
            "Bill Paxton",
            "Philip Seymour Hoffman",
            "Jan de Bont",
            "Robert Zemeckis",
            "Milos Forman",
            "Diane Keaton",
            "Nancy Meyers",
            "Chris Columbus",
            "Julia Roberts",
            "Madonna",
            "Geena Davis",
            "Lori Petty",
            "Penny Marshall",
            "Paul Blythe",
            "Angela Scope",
            "Jessica Thompson",
            "James Thompson",
        ]
    }
)

persons["born"] = [
    1964,
    1967,
    1961,
    1960,
    1967,
    1965,
    1952,
    1978,
    1975,
    1940,
    1944,
    1962,
    1937,
    1962,
    1958,
    1966,
    1971,
    1968,
    1957,
    1943,
    1967,
    1948,
    1947,
    1961,
    1957,
    1959,
    1962,
    1933,
    1961,
    1944,
    1941,
    1969,
    1962,
    1974,
    1970,
    1961,
    1971,
    1996,
    1957,
    1970,
    1971,
    1972,
    1966,
    1942,
    1963,
    1963,
    1940,
    1960,
    1929,
    1942,
    1951,
    1956,
    1970,
    1971,
    1940,
    1953,
    1968,
    1973,
    1967,
    1956,
    1941,
    1956,
    1953,
    1949,
    1962,
    1950,
    1956,
    1948,
    1956,
    1949,
    1977,
    1970,
    1930,
    1968,
    1950,
    1974,
    1954,
    1931,
    1930,
    1930,
    1947,
    1968,
    1958,
    1953,
    1966,
    1949,
    1965,
    1969,
    1961,
    1939,
    1976,
    1971,
    1954,
    1981,
    1946,
    1940,
    1967,
    1985,
    1960,
    1946,
    1966,
    1980,
    1982,
    0,
    1957,
    1953,
    1968,
    1955,
    1959,
    1959,
    1938,
    1969,
    1960,
    1944,
    1965,
    1950,
    1955,
    1967,
    1943,
    1951,
    1932,
    1946,
    1949,
    1958,
    1967,
    1954,
    1956,
    1963,
    1943,
    0,
    0,
    0,
    0,
]

with DataFrameImporter(g) as df:
    df.nodes_dataframe(persons, id_space="person", id_column="name")

    # insert into an empty graph used to be the implicit, now that other
    # modes are supported you have to declare this explicitly
    df.insert()

assert g.num_nodes() == (len(persons) + len(movies))

__New Feature:__ Add edges without specifying nodes

### Operation: Insert Edges
Create edge type **ACTED_IN** from **Person** nodes to **Movie** nodes

In [None]:
# add some new edges to the graph
# Edge Type: acted_in
actor_list = [
    "Emil Eifrem",
    "Hugo Weaving",
    "Laurence Fishburne",
    "Carrie-Anne Moss",
    "Keanu Reeves",
    "Hugo Weaving",
    "Laurence Fishburne",
    "Carrie-Anne Moss",
    "Keanu Reeves",
    "Hugo Weaving",
    "Laurence Fishburne",
    "Carrie-Anne Moss",
    "Keanu Reeves",
    "Al Pacino",
    "Charlize Theron",
    "Keanu Reeves",
    "James Marshall",
    "Kevin Pollak",
    "J.T. Walsh",
    "Aaron Sorkin",
    "Cuba Gooding Jr.",
    "Christopher Guest",
    "Noah Wyle",
    "Kiefer Sutherland",
    "Kevin Bacon",
    "Demi Moore",
    "Jack Nicholson",
    "Tom Cruise",
    "Val Kilmer",
    "Meg Ryan",
    "Tom Skerritt",
    "Kelly McGillis",
    "Tom Cruise",
    "Anthony Edwards",
    "Jerry O'Connell",
    "Bonnie Hunt",
    "Jay Mohr",
    "Cuba Gooding Jr.",
    "Jonathan Lipnicki",
    "Renee Zellweger",
    "Kelly Preston",
    "Regina King",
    "Tom Cruise",
    "Jerry O'Connell",
    "River Phoenix",
    "Marshall Bell",
    "Wil Wheaton",
    "Kiefer Sutherland",
    "John Cusack",
    "Corey Feldman",
    "Helen Hunt",
    "Jack Nicholson",
    "Cuba Gooding Jr.",
    "Greg Kinnear",
    "Robin Williams",
    "Cuba Gooding Jr.",
    "Max von Sydow",
    "Werner Herzog",
    "Annabella Sciorra",
    "Ethan Hawke",
    "Rick Yune",
    "Max von Sydow",
    "James Cromwell",
    "Tom Hanks",
    "Parker Posey",
    "Greg Kinnear",
    "Meg Ryan",
    "Steve Zahn",
    "Dave Chappelle",
    "Meg Ryan",
    "Victor Garber",
    "Tom Hanks",
    "Bill Pullman",
    "Rita Wilson",
    "Rosie O'Donnell",
    "Tom Hanks",
    "Nathan Lane",
    "Meg Ryan",
    "Carrie Fisher",
    "Billy Crystal",
    "Bruno Kirby",
    "Meg Ryan",
    "Tom Hanks",
    "Liv Tyler",
    "Charlize Theron",
    "Brooke Langton",
    "Keanu Reeves",
    "Orlando Jones",
    "Gene Hackman",
    "Zach Grenier",
    "Steve Zahn",
    "Christian Bale",
    "Marshall Bell",
    "Robin Williams",
    "Nathan Lane",
    "Gene Hackman",
    "Clint Eastwood",
    "Gene Hackman",
    "Richard Harris",
    "Ice-T",
    "Dina Meyer",
    "Keanu Reeves",
    "Takeshi Kitano",
    "Tom Hanks",
    "Jim Broadbent",
    "Halle Berry",
    "Hugo Weaving",
    "Tom Hanks",
    "Ian McKellen",
    "Audrey Tautou",
    "Paul Bettany",
    "John Hurt",
    "Stephen Rea",
    "Natalie Portman",
    "Hugo Weaving",
    "Ben Miles",
    "Emile Hirsch",
    "Rain",
    "Christina Ricci",
    "Ben Miles",
    "Susan Sarandon",
    "John Goodman",
    "Matthew Fox",
    "Rain",
    "Ben Miles",
    "Rick Yune",
    "Naomie Harris",
    "Sam Rockwell",
    "Bonnie Hunt",
    "Patricia Clarkson",
    "James Cromwell",
    "Tom Hanks",
    "Michael Clarke Duncan",
    "David Morse",
    "Gary Sinise",
    "Sam Rockwell",
    "Michael Sheen",
    "Frank Langella",
    "Oliver Platt",
    "Kevin Bacon",
    "John C. Reilly",
    "Danny DeVito",
    "J.T. Walsh",
    "Jack Nicholson",
    "Tom Hanks",
    "Ed Harris",
    "Gary Sinise",
    "Kevin Bacon",
    "Bill Paxton",
    "Helen Hunt",
    "Bill Paxton",
    "Philip Seymour Hoffman",
    "Zach Grenier",
    "Helen Hunt",
    "Tom Hanks",
    "Danny DeVito",
    "Jack Nicholson",
    "Keanu Reeves",
    "Diane Keaton",
    "Jack Nicholson",
    "Robin Williams",
    "Oliver Platt",
    "Julia Roberts",
    "Tom Hanks",
    "Philip Seymour Hoffman",
    "Tom Hanks",
    "Tom Hanks",
    "Madonna",
    "Rosie O'Donnell",
    "Geena Davis",
    "Bill Paxton",
    "Lori Petty",
]


movie_list = [
    "The Matrix",
    "The Matrix",
    "The Matrix",
    "The Matrix",
    "The Matrix",
    "The Matrix Reloaded",
    "The Matrix Reloaded",
    "The Matrix Reloaded",
    "The Matrix Reloaded",
    "The Matrix Revolutions",
    "The Matrix Revolutions",
    "The Matrix Revolutions",
    "The Matrix Revolutions",
    "The Devil's Advocate",
    "The Devil's Advocate",
    "The Devil's Advocate",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "A Few Good Men",
    "Top Gun",
    "Top Gun",
    "Top Gun",
    "Top Gun",
    "Top Gun",
    "Top Gun",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Jerry Maguire",
    "Stand By Me",
    "Stand By Me",
    "Stand By Me",
    "Stand By Me",
    "Stand By Me",
    "Stand By Me",
    "Stand By Me",
    "As Good as It Gets",
    "As Good as It Gets",
    "As Good as It Gets",
    "As Good as It Gets",
    "What Dreams May Come",
    "What Dreams May Come",
    "What Dreams May Come",
    "What Dreams May Come",
    "What Dreams May Come",
    "Snow Falling on Cedars",
    "Snow Falling on Cedars",
    "Snow Falling on Cedars",
    "Snow Falling on Cedars",
    "You've Got Mail",
    "You've Got Mail",
    "You've Got Mail",
    "You've Got Mail",
    "You've Got Mail",
    "You've Got Mail",
    "Sleepless in Seattle",
    "Sleepless in Seattle",
    "Sleepless in Seattle",
    "Sleepless in Seattle",
    "Sleepless in Seattle",
    "Sleepless in Seattle",
    "Joe Versus the Volcano",
    "Joe Versus the Volcano",
    "Joe Versus the Volcano",
    "When Harry Met Sally",
    "When Harry Met Sally",
    "When Harry Met Sally",
    "When Harry Met Sally",
    "That Thing You Do",
    "That Thing You Do",
    "That Thing You Do",
    "The Replacements",
    "The Replacements",
    "The Replacements",
    "The Replacements",
    "RescueDawn",
    "RescueDawn",
    "RescueDawn",
    "RescueDawn",
    "The Birdcage",
    "The Birdcage",
    "The Birdcage",
    "Unforgiven",
    "Unforgiven",
    "Unforgiven",
    "Johnny Mnemonic",
    "Johnny Mnemonic",
    "Johnny Mnemonic",
    "Johnny Mnemonic",
    "Cloud Atlas",
    "Cloud Atlas",
    "Cloud Atlas",
    "Cloud Atlas",
    "The Da Vinci Code",
    "The Da Vinci Code",
    "The Da Vinci Code",
    "The Da Vinci Code",
    "V for Vendetta",
    "V for Vendetta",
    "V for Vendetta",
    "V for Vendetta",
    "V for Vendetta",
    "Speed Racer",
    "Speed Racer",
    "Speed Racer",
    "Speed Racer",
    "Speed Racer",
    "Speed Racer",
    "Speed Racer",
    "Ninja Assassin",
    "Ninja Assassin",
    "Ninja Assassin",
    "Ninja Assassin",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "The Green Mile",
    "Frost/Nixon",
    "Frost/Nixon",
    "Frost/Nixon",
    "Frost/Nixon",
    "Frost/Nixon",
    "Hoffa",
    "Hoffa",
    "Hoffa",
    "Hoffa",
    "Apollo 13",
    "Apollo 13",
    "Apollo 13",
    "Apollo 13",
    "Apollo 13",
    "Twister",
    "Twister",
    "Twister",
    "Twister",
    "Cast Away",
    "Cast Away",
    "One Flew Over the Cuckoo's Nest",
    "One Flew Over the Cuckoo's Nest",
    "Something's Gotta Give",
    "Something's Gotta Give",
    "Something's Gotta Give",
    "Bicentennial Man",
    "Bicentennial Man",
    "Charlie Wilson's War",
    "Charlie Wilson's War",
    "Charlie Wilson's War",
    "The Polar Express",
    "A League of Their Own",
    "A League of Their Own",
    "A League of Their Own",
    "A League of Their Own",
    "A League of Their Own",
    "A League of Their Own",
]


acted_in_movies = pandas.DataFrame({"start": actor_list, "end": movie_list})

with DataFrameImporter(g) as df:
    df.edges_dataframe(
        acted_in_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="ACTED_IN",
    )

    # since we can't infer node id property we need to declare
    # what we should use

    df.node_id_property_name("name")
    df.insert()

assert g.num_edges() == len(acted_in_movies)

### Operation: Update Edges
Add edge property **Role** to the existing edge type **ACTED_IN**

In [None]:
acted_in_movies["role"] = [
    "Emil",
    "Agent Smith",
    "Morpheus",
    "Trinity",
    "Neo",
    "Agent Smith",
    "Morpheus",
    "Trinity",
    "Neo",
    "Agent Smith",
    "Morpheus",
    "Trinity",
    "Neo",
    "John Milton",
    "Mary Ann Lomax",
    "Kevin Lomax",
    "Pfc. Louden Downey",
    "Lt. Sam Weinberg",
    "Lt. Col. Matthew Andrew Markinson",
    "Man in Bar",
    "Cpl. Carl Hammaker",
    "Dr. Stone",
    "Cpl. Jeffrey Barnes",
    "Lt. Jonathan Kendrick",
    "Capt. Jack Ross",
    "Lt. Cdr. JoAnne Galloway",
    "Col. Nathan R. Jessup",
    "Lt. Daniel Kaffee",
    "Iceman",
    "Carole",
    "Viper",
    "Charlie",
    "Maverick",
    "Goose",
    "Frank Cushman",
    "Laurel Boyd",
    "Bob Sugar",
    "Rod Tidwell",
    "Ray Boyd",
    "Dorothy Boyd",
    "Avery Bishop",
    "Marcee Tidwell",
    "Jerry Maguire",
    "Vern Tessio",
    "Chris Chambers",
    "Mr. Lachance",
    "Gordie Lachance",
    "Ace Merrill",
    "Denny Lachance",
    "Teddy Duchamp",
    "Carol Connelly",
    "Melvin Udall",
    "Frank Sachs",
    "Simon Bishop",
    "Chris Nielsen",
    "Albert Lewis",
    "The Tracker",
    "The Face",
    "Annie Collins-Nielsen",
    "Ishmael Chambers",
    "Kazuo Miyamoto",
    "Nels Gudmundsson",
    "Judge Fielding",
    "Joe Fox",
    "Patricia Eden",
    "Frank Navasky",
    "Kathleen Kelly",
    "George Pappas",
    "Kevin Jackson",
    "Annie Reed",
    "Greg",
    "Sam Baldwin",
    "Walter",
    "Suzy",
    "Becky",
    "Joe Banks",
    "Baw",
    "DeDe,Angelica Graynamore,Patricia Graynamore",
    "Marie",
    "Harry Burns",
    "Jess",
    "Sally Albright",
    "Mr. White",
    "Faye Dolan",
    "Tina",
    "Annabelle Farrell",
    "Shane Falco",
    "Clifford Franklin",
    "Jimmy McGinty",
    "Squad Leader",
    "Duane",
    "Dieter Dengler",
    "Admiral",
    "Armand Goldman",
    "Albert Goldman",
    "Sen. Kevin Keeley",
    "Bill Munny",
    "Little Bill Daggett",
    "English Bob",
    "J-Bone",
    "Jane",
    "Johnny Mnemonic",
    "Takahashi",
    "Zachry,Dr. Henry Goose,Isaac Sachs,Dermot Hoggins",
    "Vyvyan Ayrs,Captain Molyneux,Timothy Cavendish",
    "Luisa Rey,Jocasta Ayrs,Ovid,Meronym",
    "Bill Smoke,Haskell Moore,Tadeusz Kesselring,Nurse Noakes,Boardman Mephi,Old Georgie",
    "Dr. Robert Langdon",
    "Sir Leight Teabing",
    "Sophie Neveu",
    "Silas",
    "High Chancellor Adam Sutler",
    "Eric Finch",
    "Evey Hammond",
    "V",
    "Dascomb",
    "Speed Racer",
    "Taejo Togokahn",
    "Trixie",
    "Cass Jones",
    "Mom",
    "Pops",
    "Racer X",
    "Raizo",
    "Ryan Maslow",
    "Takeshi",
    "Mika Coretti",
    "Wild Bill Wharton",
    "Jan Edgecomb",
    "Melinda Moores",
    "Warden Hal Moores",
    "Paul Edgecomb",
    "John Coffey",
    "Brutus Brutal Howell",
    "Burt Hammersmith",
    "James Reston, Jr.",
    "David Frost",
    "Richard Nixon",
    "Bob Zelnick",
    "Jack Brennan",
    "Peter Pete Connelly",
    "Robert Bobby Ciaro",
    "Frank Fitzsimmons",
    "Hoffa",
    "Jim Lovell",
    "Gene Kranz",
    "Ken Mattingly",
    "Jack Swigert",
    "Fred Haise",
    "Dr. Jo Harding",
    "Bill Harding",
    "Dustin Dusty Davis",
    "Eddie",
    "Kelly Frears",
    "Chuck Noland",
    "Martini",
    "Randle McMurphy",
    "Julian Mercer",
    "Erica Barry",
    "Harry Sanborn",
    "Andrew Marin",
    "Rupert Burns",
    "Joanne Herring",
    "Rep. Charlie Wilson",
    "Gust Avrakotos",
    "Hero Boy,Father,Conductor,Hobo,Scrooge,Santa Claus",
    "Jimmy Dugan",
    "All the Way Mae Mordabito",
    "Doris Murphy",
    "Dottie Hinson",
    "Bob Hinson",
    "Kit Keller",
]

with DataFrameImporter(g) as df:
    df.edges_dataframe(
        acted_in_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="ACTED_IN",
    )

    # since we can't infer node id property we need to declare
    # what we should use

    df.node_id_property_name("name")
    df.update()

### Operation: Insert Edges
Add new edge types **REVIEWED**, **PRODUCED**, **WROTE**, **DIRECTED** and **FOLLOWS** to the same graph

In [None]:
reviewed_actor = [
    "Jessica Thompson",
    "James Thompson",
    "Angela Scope",
    "Jessica Thompson",
    "Jessica Thompson",
    "Jessica Thompson",
    "Jessica Thompson",
    "Jessica Thompson",
    "James Thompson",
]
reviewed_movie = [
    "Jerry Maguire",
    "The Replacements",
    "The Replacements",
    "The Replacements",
    "The Birdcage",
    "Unforgiven",
    "Cloud Atlas",
    "The Da Vinci Code",
    "The Da Vinci Code",
]

reviewed_actor_movies = pandas.DataFrame({"start": reviewed_actor, "end": reviewed_movie})
reviewed_actor_movies["summary"] = [
    "You had me at Jerry",
    "The coolest football movie ever",
    "Pretty funny at times",
    "Silly, but fun",
    "Slapstick redeemed only by the Robin Williams and Gene Hackman's stellar performances",
    "Dark, but compelling",
    "An amazing journey",
    "A solid romp",
    "Fun, but a little far fetched",
]
reviewed_actor_movies["rating"] = [92, 100, 62, 65, 45, 85, 95, 68, 65]


with DataFrameImporter(g) as df:
    df.edges_dataframe(
        reviewed_actor_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="REVIEWED",
    )

    # since we can't infer node id property we need to declare
    # what we should use
    df.node_id_property_name("name")
    df.insert()


assert g.num_edges() == (len(acted_in_movies) + len(reviewed_actor_movies))

In [None]:
produced_actor = [
    "Joel Silver",
    "Joel Silver",
    "Joel Silver",
    "Cameron Crowe",
    "Nora Ephron",
    "Rob Reiner",
    "Stefan Arndt",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Joel Silver",
    "Joel Silver",
    "Lana Wachowski",
    "Joel Silver",
    "Lilly Wachowski",
    "Nancy Meyers",
]
produced_movie = [
    "The Matrix",
    "The Matrix Reloaded",
    "The Matrix Revolutions",
    "Jerry Maguire",
    "When Harry Met Sally",
    "When Harry Met Sally",
    "Cloud Atlas",
    "V for Vendetta",
    "V for Vendetta",
    "V for Vendetta",
    "Speed Racer",
    "Ninja Assassin",
    "Ninja Assassin",
    "Ninja Assassin",
    "Something's Gotta Give",
]

produced_person_movies = pandas.DataFrame({"start": produced_actor, "end": produced_movie})


with DataFrameImporter(g) as df:
    df.edges_dataframe(
        produced_person_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="PRODUCED",
    )

    # since we can't infer node id property we need to declare
    # what we should use
    df.node_id_property_name("name")
    df.insert()

assert g.num_edges() == (len(acted_in_movies) + len(reviewed_actor_movies) + len(produced_person_movies))

In [None]:
wrote_person = [
    "Aaron Sorkin",
    "Jim Cash",
    "Cameron Crowe",
    "Nora Ephron",
    "David Mitchell",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Nancy Meyers",
]
wrote_movie = [
    "A Few Good Men",
    "Top Gun",
    "Jerry Maguire",
    "When Harry Met Sally",
    "Cloud Atlas",
    "V for Vendetta",
    "V for Vendetta",
    "Speed Racer",
    "Speed Racer",
    "Something's Gotta Give",
]

wrote_person_movies = pandas.DataFrame({"start": wrote_person, "end": wrote_movie})


with DataFrameImporter(g) as df:
    df.edges_dataframe(
        wrote_person_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="WROTE",
    )

    # since we can't infer node id property we need to declare
    # what we should use
    df.node_id_property_name("name")
    df.insert()

assert g.num_edges() == (
    len(acted_in_movies) + len(reviewed_actor_movies) + len(produced_person_movies) + len(wrote_person_movies)
)

In [None]:
directed_person = [
    "Lana Wachowski",
    "Lilly Wachowski",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Taylor Hackford",
    "Rob Reiner",
    "Tony Scott",
    "Cameron Crowe",
    "Rob Reiner",
    "James L. Brooks",
    "Vincent Ward",
    "Scott Hicks",
    "Nora Ephron",
    "Nora Ephron",
    "John Patrick Stanley",
    "Rob Reiner",
    "Tom Hanks",
    "Howard Deutch",
    "Werner Herzog",
    "Mike Nichols",
    "Clint Eastwood",
    "Robert Longo",
    "Tom Tykwer",
    "Lana Wachowski",
    "Lilly Wachowski",
    "Ron Howard",
    "James Marshall",
    "Lana Wachowski",
    "Lilly Wachowski",
    "James Marshall",
    "Frank Darabont",
    "Ron Howard",
    "Danny DeVito",
    "Ron Howard",
    "Jan de Bont",
    "Robert Zemeckis",
    "Milos Forman",
    "Nancy Meyers",
    "Chris Columbus",
    "Mike Nichols",
    "Robert Zemeckis",
    "Penny Marshall",
]
directed_movie = [
    "The Matrix",
    "The Matrix",
    "The Matrix Reloaded",
    "The Matrix Reloaded",
    "The Matrix Revolutions",
    "The Matrix Revolutions",
    "The Devil's Advocate",
    "A Few Good Men",
    "Top Gun",
    "Jerry Maguire",
    "Stand By Me",
    "As Good as It Gets",
    "What Dreams May Come",
    "Snow Falling on Cedars",
    "You've Got Mail",
    "Sleepless in Seattle",
    "Joe Versus the Volcano",
    "When Harry Met Sally",
    "That Thing You Do",
    "The Replacements",
    "RescueDawn",
    "The Birdcage",
    "Unforgiven",
    "Johnny Mnemonic",
    "Cloud Atlas",
    "Cloud Atlas",
    "Cloud Atlas",
    "The Da Vinci Code",
    "V for Vendetta",
    "Speed Racer",
    "Speed Racer",
    "Ninja Assassin",
    "The Green Mile",
    "Frost/Nixon",
    "Hoffa",
    "Apollo 13",
    "Twister",
    "Cast Away",
    "One Flew Over the Cuckoo's Nest",
    "Something's Gotta Give",
    "Bicentennial Man",
    "Charlie Wilson's War",
    "The Polar Express",
    "A League of Their Own",
]

directed_person_movies = pandas.DataFrame({"start": directed_person, "end": directed_movie})


with DataFrameImporter(g) as df:
    df.edges_dataframe(
        directed_person_movies,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="movie",
        type="DIRECTED",
    )

    # since we can't infer node id property we need to declare
    # what we should use
    df.node_id_property_name("name")
    df.insert()

assert g.num_edges() == (
    len(acted_in_movies)
    + len(reviewed_actor_movies)
    + len(produced_person_movies)
    + len(wrote_person_movies)
    + len(directed_person_movies)
)

In [None]:
schema = g.schema()
schema.visualize()

In [None]:
follows_person = pandas.DataFrame(
    {
        "start": ["Paul Blythe", "Angela Scope", "James Thompson"],
        "end": ["Angela Scope", "Jessica Thompson", "Jessica Thompson"],
    }
)


with DataFrameImporter(g) as df:
    df.edges_dataframe(
        follows_person,
        source_column="start",
        destination_column="end",
        source_id_space="person",
        destination_id_space="person",
        type="FOLLOWS",
    )

    # since we can't infer node id property we need to declare
    # what we should use
    df.node_id_property_name("name")
    df.insert()

assert g.num_edges() == (
    len(acted_in_movies)
    + len(reviewed_actor_movies)
    + len(produced_person_movies)
    + len(wrote_person_movies)
    + len(directed_person_movies)
    + len(follows_person)
)

### Operation: Schema Visualization

In [None]:
schema = g.schema()
schema.visualize()

In [None]:
result = g.query("match (a)-[e]->(b) return a, b", contextualize=True)
result.visualize()

### Operation: Delete Nodes
Delete nodes from the graph

**Note:** Deleting node, also deletes any edges associated with the node being deleted. 

In [None]:
edges_result_before = g.query("match (n:person)-[e]->() WHERE n.name = 'Lana Wachowski' return e")

### Delete node assocoated with "Lana".
delete_person = pandas.DataFrame({"name": ["Lana Wachowski"]})

with DataFrameImporter(g) as df:
    df.nodes_dataframe(delete_person, id_space="person", id_column="name")
    df.delete()

nodes_result = g.query("match (n:person) WHERE n.name = 'Lana Wachowski' return n")
edges_result = g.query("match (n:person)-[e]->() WHERE n.name = 'Lana Wachowski' return e")

assert nodes_result.empty
assert g.num_edges() == (
    (
        len(acted_in_movies)
        + len(reviewed_actor_movies)
        + len(produced_person_movies)
        + len(wrote_person_movies)
        + len(directed_person_movies)
        + len(follows_person)
    )
    - len(edges_result_before)
)