In [1]:
import networkx as nx

def load_bipartite_graph():
    G = nx.read_edgelist("Employee_Movie_Choices.txt", delimiter="\t")
    return G


In [2]:
def add_node_attributes():
    G = load_bipartite_graph()
    
    for node in G.nodes:
        if node.startswith('E'):
            G.nodes[node]['type'] = 'employee'
        else:
            G.nodes[node]['type'] = 'movie'
            
    return G


In [3]:
def create_weighted_projection():
    G = add_node_attributes()
    employees = [node for node, attr in G.nodes(data=True) if attr['type'] == 'employee']
    
    weighted_projection = nx.bipartite.weighted_projected_graph(G, employees)
    return weighted_projection


In [4]:
import pandas as pd
import numpy as np
import networkx as nx

def calculate_correlation():
    weighted_projection = create_weighted_projection()
    
    # Load relationship scores from Employee_Relationships.txt
    relationship_scores = pd.read_csv("Employee_Relationships.txt", delimiter="\t", header=None, names=['Employee1', 'Employee2', 'Relationship'])
    
    # Initialize a list to store the common movie counts
    common_movie_counts = []
    
    # Iterate through each row in the relationship_scores dataframe
    for index, row in relationship_scores.iterrows():
        employee1 = row['Employee1']
        employee2 = row['Employee2']
        
        # Check if there is an edge between the two employees in the weighted projection graph
        if weighted_projection.has_edge(employee1, employee2):
            # Get the weight (number of common movies) of the edge
            common_movies = weighted_projection[employee1][employee2]['weight']
        else:
            common_movies = 0
        
        # Append the common movie count to the list
        common_movie_counts.append(common_movies)
    
    # Add the common_movie_counts as a new column in the relationship_scores dataframe
    relationship_scores['CommonMovies'] = common_movie_counts
    
    # Convert the 'Relationship' column to numeric type
    relationship_scores['Relationship'] = pd.to_numeric(relationship_scores['Relationship'], errors='coerce')
    
    # Calculate the Pearson correlation between Relationship and CommonMovies columns
    correlation = relationship_scores['Relationship'].corr(relationship_scores['CommonMovies'])
    
    return correlation


In [5]:
graph = load_bipartite_graph()
print(graph)

graph_with_attributes = add_node_attributes()
print(graph_with_attributes)

weighted_projection = create_weighted_projection()
print(weighted_projection)

correlation = calculate_correlation()
print(correlation)


Graph with 15 nodes and 23 edges
Graph with 15 nodes and 23 edges
Graph with 9 nodes and 27 edges
-0.12549142452696246
