In [1]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [3]:
session = driver.session(database="neo4j")

In [4]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df


def my_neo4j_page_rank(max_iterations=20, damping_factor=0.85):
    "given a from station and to station, run and print the shortest path"
    
    query = "CALL gds.graph.drop('ds_graph', false)"
    session.run(query)

    query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: 'weight'})"
    session.run(query)

    query = """

    CALL gds.pageRank.stream('ds_graph',
                         { maxIterations: $max_iterations,
                           dampingFactor: $damping_factor}
                         )
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name AS name, score as page_rank
    ORDER BY page_rank DESC, name ASC

    """

    return my_neo4j_run_query_pandas(query, max_iterations=max_iterations, damping_factor=damping_factor)
    

In [5]:
df = my_neo4j_page_rank()

df_drop = df[~df['name'].str.contains('depart|arrive', regex=True)]
df_drop

Unnamed: 0,name,page_rank
0,blue Coliseum,0.689786
1,orange Coliseum,0.689174
2,green Coliseum,0.688970
3,blue Bay Fair,0.675695
4,yellow MacArthur,0.675481
...,...,...
122,orange Richmond,0.477049
123,red Richmond,0.477049
124,yellow Antioch,0.463809
127,blue Dublin,0.457066


In [6]:
df_drop['name'] = df_drop['name'].apply(lambda line: ' '.join(line.split()[1:]))

df_drop

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_drop['name'] = df_drop['name'].apply(lambda line: ' '.join(line.split()[1:]))


Unnamed: 0,name,page_rank
0,Coliseum,0.689786
1,Coliseum,0.689174
2,Coliseum,0.688970
3,Bay Fair,0.675695
4,MacArthur,0.675481
...,...,...
122,Richmond,0.477049
123,Richmond,0.477049
124,Antioch,0.463809
127,Dublin,0.457066


In [7]:
final_df = df_drop.groupby('name')['page_rank'].mean().reset_index().sort_values(by="page_rank", ascending=False)

final_df

Unnamed: 0,name,page_rank
11,Coliseum,0.676688
35,Pittsburg Center,0.657564
26,MacArthur,0.652098
7,Bay Fair,0.651456
34,Pittsburg,0.645698
49,West Oakland,0.644398
31,North Concord,0.64197
13,Concord,0.640679
36,Pleasant Hill,0.639852
46,Walnut Creek,0.638228
