In [1]:
import networkx as nx
#from networkx.algorithms import bipartite
from networkx.algorithms import bipartite
from networkx.algorithms.community.quality import modularity
from surprise import Dataset, NormalPredictor, Reader
from surprise.model_selection import cross_validate
from surprise import accuracy, SVDpp
from surprise.model_selection import KFold
#from networkx.algorithms.community import greedy_modularity_communities
import community
import time
import numpy as np
import pandas as pd
import xlsxwriter
import math

G = nx.Graph()
# Read the file and add edges between users and items
with open('animeupdate.txt', 'r') as f:
    for line in f:
        u1, v1, r= line.strip().split('\t')
        u=str("A")+u1
        v=str("B")+v1
        G.add_node(u, bipartite=0)
        G.add_node(v, bipartite=1)
        G.add_edge(u, v)

# Extract two node sets of the bipartite graph
users = set(n for n, d in G.nodes(data=True) if d["bipartite"] == 0)
items = set(G) - users

def louvain_algorithm(G, k):
    # First, run the initial Louvain algorithm to obtain the first partition
    partition = community.community_louvain.best_partition(G)
    
    # Initialize the list of partitions
    partitions = [list() for _ in range(max(partition.values())+1)]
    
    # Assign each node to its corresponding community
    for node, community_id in partition.items():
        partitions[community_id].append(node)
    
    # Iterate until the desired number of components is reached
    while len(partitions) < k:
        # Compute the modularity score for the current partition
        #prev_modularity = community.modularity(partition, G)
        prev_modularity = community.modularity(partition, G)
        # Iterate over each community in the current partition
        for community_id in range(len(partitions)):
            # Create a subgraph of the current community
            subgraph = G.subgraph(partitions[community_id])
            
            # Compute the modularity score for each possible new community for this subgraph
            best_modularity = -1
            best_community_id = community_id
            for neighbor in subgraph.nodes:
                neighbor_community_id = partition[neighbor]
                if neighbor_community_id != community_id:
                    new_partition = partition.copy()
                    new_partition[neighbor] = community_id
                    #new_modularity = community.modularity(new_partition, G)
                    new_modularity = community.modularity(new_partition, G)
                    if new_modularity > best_modularity:
                        best_modularity = new_modularity
                        best_community_id = neighbor_community_id
            
            # If a better partition was found, update the partition
            if best_modularity > prev_modularity:
                for node in partitions[community_id]:
                    partition[node] = best_community_id
                partitions[best_community_id].extend(partitions[community_id])
                partitions[community_id].clear()
        
        # Compute the modularity score for the current partition
        #new_modularity = community.modularity(partition, G)
        new_modularity = community.modularity(partition, G)
        # If no further improvement can be made, break out of the loop
        if new_modularity == prev_modularity:
            break
    
    # Remove any empty partitions and return the final list of partitions
    partitions = [p for p in partitions if p]
    
    # If the number of partitions is greater than k, merge partitions until only k remain
    while len(partitions) > k:
        # Compute the modularity score for each possible pair of partitions to merge
        best_modularity = -1
        best_partition_ids = None
        for i in range(len(partitions)):
            for j in range(i+1, len(partitions)):
                new_partition = partition.copy()
                for node in partitions[i]:
                    new_partition[node] = j
                #new_modularity = community.modularity(new_partition, G)
                new_modularity = community.modularity(new_partition, G)
                if new_modularity > best_modularity:
                    best_modularity = new_modularity
                    best_partition_ids = (i, j)
        
        # Merge the best pair of partitions
        i, j = best_partition_ids
        partition = {node: j if partition[node] == i else partition[node] for node in partition}
        partitions[j].extend(partitions[i])
        partitions[i].clear()
        partitions = [p for p in partitions if p]
    
    # Return the final list of partitions
    return partitions[:k]


#print(G)
df = pd.read_csv('animeupdate.txt', delimiter='\t', header=None, names=['user', 'item', 'rating'])

#data.rename(columns=lambda x: 'B'+ x, inplace = True)
df['user'] = 'A' + df['user'].astype(str)
df['item'] = 'B' + df['item'].astype(str)
#print(df)
matrix = df.pivot(index='user', columns='item', values='rating')
#print(matrix)
# # Fill missing values with zeros
matrix = matrix.fillna(0)

# Print the resulting matrix
#print(type(matrix))
# Convert DataFrame to numpy array
matrix_R = matrix.to_numpy()


# Read the file content
file_path = 'anime_info.txt'

with open(file_path, 'r', encoding='utf-8') as file:
    animes_data = file.readlines()
    

# Define the list of genres
genres_list = [
    "Josei", "Hentai", "Ecchi", "Romance", "Samurai", "Sci-Fi", "Magic", "Drama", "Mecha", "Parody", "Demons", 
"Game", "Seinen", "Martial Arts", "Yuri", "Dementia", "Shoujo", "Military", "Fantasy", "Adventure", "Historical", 
"Yaoi", "Music", "Sports", "Super Power", "Kids", "Space", "Police", "Mystery", "Cars", "Comedy", "Supernatural", 
"School", "Vampire", "Thriller", "Shounen Ai", "Action", "Slice of Life", "Harem", "Shoujo Ai", "Shounen", 
"Psychological", "Horror"
]

# Convert the 'items' set to a list
items_list=list(items)
df1 = pd.DataFrame(0, index=items_list, columns=genres_list) 
#print(df1)


for line in animes_data:
    parts = line.strip().split("\t")
    animes_id = str(parts[0])  # Convert to 0-based index
    genres = parts[2].split(',')
    #print(genres)
    
    for genre in genres:
            if 'B'+animes_id in items and genre in genres_list:
                df1.at['B'+animes_id, str(genre)] = 1


#print("genre",df1)
genre_data=df1
print('genre data', genre_data)
print(genre_data.shape)   


max_communities=25
iterations=25
D={}
D["steps"]=[]
#cramse={}
#c=1
for com in range(2, max_communities+1):
    D[com]=[]
    avg=0
    rows, cols = (iterations,10)
    arr = [[0]*cols]*rows
    for it in range(0, iterations):
        P = louvain_algorithm(G, com)
        #print(P)
        dfmat={}
        for part in P:
        #print("part matrix1",part)
            part_users = [u for u in part if u in matrix.index]
            part_items = [i for i in part if i in matrix.columns]
            #print("Users",part_users)
            #print("items",part_items)
            part_matrix = matrix.loc[part_users, part_items]
            part_genre=genre_data.loc[part_items,genres_list]
            r,c=part_matrix.shape
            R = part_matrix.to_numpy()
            genre=part_genre.to_numpy()
            #print("R",R.shape)
            #print("genre", genre.shape)
            # Construct the user profile matrix
            user_profile_matrix = np.zeros((len(part_users), 43))
            for user_index in range(len(part_users)):
                for genre_index in range(43):
                    user_profile_matrix[user_index, genre_index] = np.dot(R[user_index, :], genre[:, genre_index])
            #print("user_profile_matrix", user_profile_matrix.shape)
            dotproduct_matrix = np.dot(user_profile_matrix, genre.T)
            #print("dotproduct_matrix", dotproduct_matrix.shape)
            user_magnitudes = np.sqrt(np.sum(user_profile_matrix**2, axis=1))
            #print("usermagniudes", user_magnitudes.shape)
            item_magnitudes = np.sqrt(np.sum(genre**2, axis=1))
            #print("itemmagniudes", item_magnitudes.shape)
            numerator_matrix = dotproduct_matrix
            cosine_similarity_matrix = np.zeros_like(dotproduct_matrix, dtype=float)
            for user_index in range(len(part_users)):
                for item_index in range(len(part_items)):
                    denominator = user_magnitudes[user_index] * item_magnitudes[item_index]
                    #print(denominator)
                    if denominator!=0:
                        cosine_similarity_matrix[user_index, item_index] = numerator_matrix[user_index, item_index] / denominator
                    else:
                        cosine_similarity_matrix[user_index, item_index] = 0
                    # Display the cosine similarity matrix
            #print("\nCosine Similarity Matrix:")
            #print(cosine_similarity_matrix)
            #print(cosine_similarity_matrix.shape)
            # Scale the values from range 0-1 to range 1-5
            min_val = 0
            #print(min_val)
            max_val = 1
            #print(max_val)
            ratingmin=1
            ratingmax=10
            updated_cosine_similarity_matrix = ratingmin + (ratingmax-ratingmin) * ((cosine_similarity_matrix - min_val) / (max_val - min_val))
            #print("\nupdated_Cosine Similarity Matrix:")
            #print(updated_cosine_similarity_matrix)
            #print(updated_cosine_similarity_matrix.shape)        
            user_items = pd.DataFrame(R, columns=part_items, index=part_users).reset_index()
            user_items = pd.melt(user_items, id_vars=["index"], value_name="rating", var_name="item")
            user_items.columns = ["user", "item", "rating"]
            #user_items = user_items[user_items["rating"] != 0].reset_index(drop=True)
            #user_items.columns = ["user", "item", "rating"]
            df = user_items[user_items.rating != 0].reset_index(drop=True)
            reader = Reader(rating_scale=(1, 5))  #change the rating range even data set changes
            data = Dataset.load_from_df(df[["user", "item", "rating"]], reader)
            predictions_df = pd.DataFrame(index=df['user'].unique(), columns=df['item'].unique())
            name=25
            kf = KFold(n_splits=2)
            n_factors=10
            if r>1 and c>1:
                algo = SVDpp(n_factors)
                for trainset, testset in kf.split(data):
                #print(type(trainset))
                # train and test algorithm.
                    algo.fit(trainset)
                    predictions = algo.test(testset)
                    for pred in predictions:
                        predictions_df.loc[pred.uid, pred.iid] = pred.est
                predictions_df.fillna(0, inplace=True)
                dfmat[name]=predictions_df
                nR = predictions_df.to_numpy()
            #print("Rating matrix for "+str(name))
            #print(nR.shape)
            else:
                nR = part_matrix.to_numpy()
                dfmat[name]=predictions_df
            #print("Rating matrix for "+str(name))
            #print(nR)        
            #print(nR.shape)
            #cramse[c]=[]
        L=[]
        for w1 in [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]:
            sum2=0
            pred_matrix_final=w1*(updated_cosine_similarity_matrix)+(1-w1)*(nR)
            for i in range(len(pred_matrix_final)):
                for j in range(len(pred_matrix_final[0])):
                    sum2 = sum2+abs(pred_matrix_final[i][j] - R[i][j])
            MAE_final =(sum2/(len(R[0])*len(R)))      
            L.append(MAE_final)
        #c=c+1
        #D[w1]=[]
        arr[it]=L
    D[com].append(np.average(arr, axis=0))
    #print(cramse)
print(D)
#df = pd.DataFrame.from_dict({(i,j): D[i][j]
#                             for i in D.keys()
#                             for j in range(len(D[i]))},
#                            orient='index')
# Create a Pandas Excel writer using xlsxwriter as the engine
writer = pd.ExcelWriter("25iterationscombinedLouSVD++cosine_anime_k=10_MAE.xlsx", engine='xlsxwriter')

# Loop over the dictionary keys
for key in D.keys():
    # Convert the dictionary subset to a DataFrame
    df = pd.DataFrame(D[key])

    # Write DataFrame to a sheet named 'Sheet_i'
    df.to_excel(writer, sheet_name=f'Sheet_{key}', index=False)

# Save the Excel file
writer.close()
#df =pd.DataFrame(D)
#print(df)
#df.to_csv('combinedLouMFcosine_ml-100K_k=10.csv')

genre data        Josei  Hentai  Ecchi  Romance  Samurai  Sci-Fi  Magic  Drama  Mecha  \
B446       0       0      0        0        0       0      0      0      0   
B6464      0       0      0        0        0       0      0      0      0   
B3245      0       0      0        0        0       0      0      0      0   
B5119      0       0      0        0        0       0      0      0      0   
B6924      0       1      0        0        0       0      0      0      0   
...      ...     ...    ...      ...      ...     ...    ...    ...    ...   
B1731      0       0      0        0        0       0      0      0      0   
B6315      0       0      0        0        0       0      0      0      0   
B5572      0       0      0        0        0       0      0      0      0   
B51        0       0      0        0        0       0      0      0      0   
B6281      0       0      0        0        0       0      0      0      0   

       Parody  ...  Vampire  Thriller  Shounen Ai  A