# Importing Libraries


In [37]:
import pandas as pd
import numpy as np
import matplotlib as plt

# making the dataframes 

In [38]:
movies_df=pd.read_csv('Movies.csv')
movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [39]:
ratings_df=pd.read_csv('ratings.csv')
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


# drop the timestamp

In [40]:
ratings_df = ratings_df.sample(frac=0.1, random_state=42)
# Remove redundant columns if any (e.g., timestamp)
ratings_df = ratings_df.drop(columns=['timestamp'])

# Optionally, filter out rows with missing values
ratings_df = ratings_df.dropna()

In [41]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating
67037,432,77866,4.5
42175,288,474,3.0
93850,599,4351,3.0
6187,42,2987,4.0
12229,75,1610,4.0


# Community detection on movies

In [42]:

import networkx as nx
from community import community_louvain

# Create a directed graph
G = nx.DiGraph()

# Add edges for each movie-rating interaction
for _, row in ratings_df.iterrows():
    movie_id = row['movieId']
    user_id = row['userId']
    rating = row['rating']
    G.add_edge(movie_id, user_id, weight=rating)

# Convert the graph to an undirected graph for community detection
G_undirected = G.to_undirected()

# Detect communities using the Louvain algorithm
partition = community_louvain.best_partition(G_undirected)

# Merge partition data with movie information
partition_df = pd.DataFrame(partition.items(), columns=['Node', 'Community'])
partition_df['Node'] = partition_df['Node'].astype(int)  # Ensure 'Node' column is integer
partition_df = pd.merge(partition_df, movies_df, left_on='Node', right_on='movieId', how='left')
num_communities = partition_df['Community'].nunique()

print("Number of communities based on movies:", num_communities)
# Save the partition data to a CSV file
partition_df.to_csv('partition.csv', index=False)

# Print the first few rows of the partition DataFrame
print(partition_df.head())

Number of communities based on movies: 33
    Node  Community  movieId  \
0  77866          6  77866.0   
1    432          6    432.0   
2    474          1    474.0   
3    288         17    288.0   
4   4351          4   4351.0   

                                               title  \
0                                  Robin Hood (2010)   
1  City Slickers II: The Legend of Curly's Gold (...   
2                         In the Line of Fire (1993)   
3                        Natural Born Killers (1994)   
4                                 Point Break (1991)   

                               genres  
0  Action|Adventure|Drama|Romance|War  
1            Adventure|Comedy|Western  
2                     Action|Thriller  
3               Action|Crime|Thriller  
4               Action|Crime|Thriller  


# community detection on Users


In [43]:
import pandas as pd
import networkx as nx
from community import community_louvain


# Create a directed graph
G = nx.DiGraph()

# Add edges for each movie-rating interaction
for _, row in ratings_df.iterrows():
    G.add_edge(row['userId'], row['movieId'])

# Convert the graph to an undirected graph for community detection
G_undirected = G.to_undirected()

# Detect communities using the Louvain algorithm
partition = community_louvain.best_partition(G_undirected)

# Merge partition data with user information
partition_df = pd.DataFrame(partition.items(), columns=['Node', 'Community'])
partition_df['Node'] = partition_df['Node'].astype(int)  # Ensure 'Node' column is integer

# Assuming you have user information in ratings_df, merge with partition_df
partition_df = pd.merge(partition_df, ratings_df[['userId']], left_on='Node', right_on='userId', how='left')
num_communities = partition_df['Community'].nunique()

print("Number of communities based on user:", num_communities)

# Save the partition data to a CSV file
partition_df.to_csv('user_partition.csv', index=False)

# Implement the recommendation system based on user communities
# (You can load user_partition.csv and movies.csv to get user and movie information)
# Implement user interface for input and recommendation display

Number of communities based on user: 30


# modifying the partician_df


In [44]:
partition_df.head()


Unnamed: 0,Node,Community,userId
0,432,0,432.0
1,432,0,432.0
2,432,0,432.0
3,432,0,432.0
4,432,0,432.0


In [45]:
ratings_df.head()


Unnamed: 0,userId,movieId,rating
67037,432,77866,4.5
42175,288,474,3.0
93850,599,4351,3.0
6187,42,2987,4.0
12229,75,1610,4.0


In [47]:
# Drop the 'userId' column
partition_df = partition_df.drop(columns=['userId'])

# Rename the 'Node' column to 'userId'
partition_df = partition_df.rename(columns={'Node': 'userId'})
partition_df

Unnamed: 0,userId,Community
0,432,0
1,432,0
2,432,0
3,432,0
4,432,0
...,...,...
13398,2935,1
13399,4890,7
13400,134246,12
13401,97921,11


# Merging the ratings and partician df

In [48]:
import pandas as pd

# Assuming ratings_df and partition_df are already loaded

# Merge the DataFrames on 'userId'
merged_df = pd.merge(ratings_df, partition_df, on='userId')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('finalUsers.csv', index=False)


In [None]:
# now we have files called finalUsers.csv
# with columns 
# userId,movieId,rating,Community
# and we also have a partition.csv with columns 

# renaming the columns

In [50]:
import pandas as pd

# Load your CSV file
df = pd.read_csv('partition.csv')

# Drop the 'movieId' column
df = df.drop(columns=['movieId'])

# Rename the 'Node' column to 'movieId'
df = df.rename(columns={'Node': 'movieId'})

# Save the modified DataFrame back to a CSV file
df.to_csv('updated_partition.csv', index=False)


# deleting the unnecesaary files

In [52]:
import os

# Specify the path to the file
file_path = 'user_partition.csv'

# Delete the file
if os.path.exists(file_path):
    os.remove(file_path)
    print("File deleted successfully")
else:
    print("The file does not exist")


File deleted successfully


In [54]:
# Specify the path to the file
file_path = 'partition.csv'

# Delete the file
if os.path.exists(file_path):
    os.remove(file_path)
    print("File deleted successfully")
else:
    print("The file does not exist")

File deleted successfully
