In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns
from scipy import stats
import itertools


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

In [2]:
links_df= pd.read_csv("./links.csv")
movies_df = pd.read_csv("./movies.csv")
ratings_df = pd.read_csv('./ratings.csv')
#tags_df = pd.read_csv('./tags.csv')

In [3]:
links_df

Unnamed: 0,movieId,imdbId,tmdbId
0,1,114709,862.0
1,2,113497,8844.0
2,3,113228,15602.0
3,4,114885,31357.0
4,5,113041,11862.0
...,...,...,...
9737,193581,5476944,432131.0
9738,193583,5914996,445030.0
9739,193585,6397426,479308.0
9740,193587,8391976,483455.0


In [4]:
movies_df

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


In [5]:
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [6]:
ratings_df[ratings_df["movieId"]==1]["rating"].value_counts()

4.0    82
5.0    47
3.0    34
4.5    18
3.5    18
2.5     8
2.0     6
0.5     1
1.5     1
Name: rating, dtype: int64

In [7]:
ratings_df[ratings_df["userId"]==414]["rating"].value_counts()

4.0    903
3.0    658
2.0    398
5.0    248
3.5    232
2.5    122
4.5     76
1.0     40
1.5     20
0.5      1
Name: rating, dtype: int64

In [8]:
ratings_df.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,326.127564,19435.295718,3.501557,1205946000.0
std,182.618491,35530.987199,1.042529,216261000.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1199.0,3.0,1019124000.0
50%,325.0,2991.0,3.5,1186087000.0
75%,477.0,8122.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537799000.0


In [9]:
# No of users
counts = ratings_df.userId.unique()
len(counts)

610

In [10]:
movies_df["genres_list"] = movies_df["genres"].apply(lambda x: x.split("|"))
movies_df.head()

Unnamed: 0,movieId,title,genres,genres_list
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,Jumanji (1995),Adventure|Children|Fantasy,"[Adventure, Children, Fantasy]"
2,3,Grumpier Old Men (1995),Comedy|Romance,"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"[Comedy, Drama, Romance]"
4,5,Father of the Bride Part II (1995),Comedy,[Comedy]


In [11]:
def get_year_from_title(data):
    try:
        data = data[::-1].split("(")[0].replace(")","")[::-1]
        return data
    except Exception as e:
        print("Exception =", e , "Title = ", data)

def timestamp_convert(timestamp):
    date_time = datetime.fromtimestamp(timestamp)
    str_date = date_time.strftime("%d %B, %Y")
    return str_date

In [12]:
movies_df["Released_in"] = movies_df["title"].apply(get_year_from_title)
movies_df.head()

Unnamed: 0,movieId,title,genres,genres_list,Released_in
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,"[Adventure, Children, Fantasy]",1995
2,3,Grumpier Old Men (1995),Comedy|Romance,"[Comedy, Romance]",1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"[Comedy, Drama, Romance]",1995
4,5,Father of the Bride Part II (1995),Comedy,[Comedy],1995


In [13]:
ratings_df["Time"] = ratings_df["timestamp"].apply(timestamp_convert)
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp,Time
0,1,1,4.0,964982703,"30 July, 2000"
1,1,3,4.0,964981247,"30 July, 2000"
2,1,6,4.0,964982224,"30 July, 2000"
3,1,47,5.0,964983815,"30 July, 2000"
4,1,50,5.0,964982931,"30 July, 2000"
...,...,...,...,...,...
100831,610,166534,4.0,1493848402,"03 May, 2017"
100832,610,168248,5.0,1493850091,"03 May, 2017"
100833,610,168250,5.0,1494273047,"08 May, 2017"
100834,610,168252,5.0,1493846352,"03 May, 2017"


In [14]:
movies_df.isna().sum()

movieId        0
title          0
genres         0
genres_list    0
Released_in    0
dtype: int64

In [15]:
# Genre Extraction

genres = sorted(list(set().union(*(movies_df["genres_list"].values))))
print(len(genres))
genres

20


['(no genres listed)',
 'Action',
 'Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Crime',
 'Documentary',
 'Drama',
 'Fantasy',
 'Film-Noir',
 'Horror',
 'IMAX',
 'Musical',
 'Mystery',
 'Romance',
 'Sci-Fi',
 'Thriller',
 'War',
 'Western']

In [16]:
ratings_df.groupby("userId")["rating"].mean()

userId
1      4.366379
2      3.948276
3      2.435897
4      3.555556
5      3.636364
         ...   
606    3.657399
607    3.786096
608    3.134176
609    3.270270
610    3.688556
Name: rating, Length: 610, dtype: float64

In [17]:
#ratings_df = ratings_df.set_index("userId")

ratings_df['StandardizedRating'] = ratings_df["rating"] - ratings_df.groupby(["userId"])["rating"].transform("mean")
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp,Time,StandardizedRating
0,1,1,4.0,964982703,"30 July, 2000",-0.366379
1,1,3,4.0,964981247,"30 July, 2000",-0.366379
2,1,6,4.0,964982224,"30 July, 2000",-0.366379
3,1,47,5.0,964983815,"30 July, 2000",0.633621
4,1,50,5.0,964982931,"30 July, 2000",0.633621
...,...,...,...,...,...,...
100831,610,166534,4.0,1493848402,"03 May, 2017",0.311444
100832,610,168248,5.0,1493850091,"03 May, 2017",1.311444
100833,610,168250,5.0,1494273047,"08 May, 2017",1.311444
100834,610,168252,5.0,1493846352,"03 May, 2017",1.311444


In [18]:
freq = ratings_df["userId"].value_counts()
freq = freq.reset_index(name = "Frequency")
freq.rename(columns={"index":"userId"},inplace = True)
freq

Unnamed: 0,userId,Frequency
0,414,2698
1,599,2478
2,474,2108
3,448,1864
4,274,1346
...,...,...
605,442,20
606,569,20
607,320,20
608,576,20


In [19]:
def movie_profile_calculator(data, reference):
    vec = [0]*len(reference)
    for i in range(len(data)):
        if data[i] in reference:
            idx = reference.index(data[i])
            vec[idx]=1
    return np.array(vec)

In [20]:
movies_df["Movie Profile Vector"] = movies_df["genres_list"].apply(movie_profile_calculator, reference = genres)
movies_df.head()

Unnamed: 0,movieId,title,genres,genres_list,Released_in,Movie Profile Vector
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[Adventure, Animation, Children, Comedy, Fantasy]",1995,"[0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,2,Jumanji (1995),Adventure|Children|Fantasy,"[Adventure, Children, Fantasy]",1995,"[0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
2,3,Grumpier Old Men (1995),Comedy|Romance,"[Comedy, Romance]",1995,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"[Comedy, Drama, Romance]",1995,"[0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, ..."
4,5,Father of the Bride Part II (1995),Comedy,[Comedy],1995,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [21]:
movies_df.loc[movies_df["movieId"] == 1 , "Movie Profile Vector"].iloc[0]

array([0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [22]:
ratings_df = ratings_df.merge(movies_df[["Movie Profile Vector","movieId"]], on = 'movieId',how = 'left')
ratings_df

Unnamed: 0,userId,movieId,rating,timestamp,Time,StandardizedRating,Movie Profile Vector
0,1,1,4.0,964982703,"30 July, 2000",-0.366379,"[0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,1,3,4.0,964981247,"30 July, 2000",-0.366379,"[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,1,6,4.0,964982224,"30 July, 2000",-0.366379,"[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,1,47,5.0,964983815,"30 July, 2000",0.633621,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,1,50,5.0,964982931,"30 July, 2000",0.633621,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, ..."
...,...,...,...,...,...,...,...
100831,610,166534,4.0,1493848402,"03 May, 2017",0.311444,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, ..."
100832,610,168248,5.0,1493850091,"03 May, 2017",1.311444,"[0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
100833,610,168250,5.0,1494273047,"08 May, 2017",1.311444,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
100834,610,168252,5.0,1493846352,"03 May, 2017",1.311444,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


## 3 types of averages

### 1- simple average but fails in considering bias of one movie over other
### 2- Jaccard similarity 
### 3- weighted average

### Collaborative Filtering

In [23]:
rating_matrix_df = ratings_df[["userId","movieId","rating"]].pivot(index = "userId", columns = "movieId", values = "rating").fillna(0)
rating_matrix_df

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
rating_matrix_df.shape

(610, 9724)

In [25]:
np.count_nonzero(rating_matrix_df.iloc[0].gt(0) & rating_matrix_df.iloc[1].gt(0))

2

In [26]:
np.array(rating_matrix_df.loc[1,rating_matrix_df.columns[(rating_matrix_df.loc[1].gt(0) & rating_matrix_df.loc[2].gt(0))]])

array([5., 5.])

In [27]:
rating_matrix_df.columns[(rating_matrix_df.loc[1].gt(0) & rating_matrix_df.loc[2].gt(0))]

Int64Index([333, 3578], dtype='int64', name='movieId')

In [28]:
np.sum(rating_matrix_df.loc[414] > 0)

2698

In [29]:
rating_matrix_df.loc[414].value_counts()

0.0    7026
4.0     903
3.0     658
2.0     398
5.0     248
3.5     232
2.5     122
4.5      76
1.0      40
1.5      20
0.5       1
Name: 414, dtype: int64

In [None]:
def pearson_corr(x, y):
    
    result = np.array([np.array([x[i], y[i]]) for i in range(len(x)) if x[i] != 0 and y[i] != 0])
    if len(result) == 0:
        return 0
    r_u, r_v = result.T[0], result.T[1]
    if np.var(r_u) == 0 or np.var(r_v) == 0:
        corr = np.nan
    else:
        corr = stats.pearsonr(r_u, r_v)
    return corr

def cosine_corr(x, y):
    
    result = np.array([np.array([x[i], y[i]]) for i in range(len(x)) if x[i] != 0 and y[i] != 0])
    if len(result) == 0:
        return 0
    r_u, r_v = result.T[0], result.T[1]
    # Calculate numerator of Pearson correlation coefficient
    numerator = np.dot(r_u, r_v)

    # Calculate denominator of Pearson correlation coefficient
    denominator = np.sqrt(np.sum(np.square(r_u))) * np.sqrt(np.sum(np.square(r_v)))

    # Calculate Pearson correlation coefficient
    corr = numerator / denominator
    return corr


def Modified_Jaccard(x, y):
    #common = len(rating_matrix_df.columns[(rating_matrix_df.loc[user1].gt(0) & rating_matrix_df.loc[user2].gt(0))])
    common = np.array([np.array([x[i], y[i]]) for i in range(len(x)) if x[i] != 0 and y[i] != 0])
    print(np.sum(np.array([1 for i in range(len(x)) if x[i] != 0 and y[i] != 0])))
    I_u = np.count_nonzero(x)
    I_v = np.count_nonzero(y) + 1
    
    return common/ (I_u * I_v)

In [31]:
pearson_corr(rating_matrix_df.loc[1].to_numpy(), rating_matrix_df.loc[2].to_numpy())

nan

In [32]:
target_movie = 1
target_user = 1

data_filtered = rating_matrix_df.loc[rating_matrix_df[target_movie]!=0]
data_filtered = data_filtered.drop([target_movie], axis = 1)
data_filtered

movieId,2,3,4,5,6,7,8,9,10,11,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,4.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,2.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
common_users = data_filtered.index

In [34]:
pearson_corr_sim = {}
cosine_corr_sim = {}
Modified_Jaccard_sim = {}
for pair in itertools.combinations(common_users, 2):
    #print(pair)
    if pair[0] == target_user or pair[1] == target_user:
        if pair[0] == target_user:
            x = data_filtered.loc[pair[0]].to_numpy()
            y = data_filtered.loc[pair[1]].to_numpy()
        else:
            x = data_filtered.loc[pair[1]].to_numpy()
            y = data_filtered.loc[pair[0]].to_numpy()    
        pearson_corr_sim[pair] = pearson_corr(x, y)
        cosine_corr_sim[pair] = cosine_corr(x, y)
        Modified_Jaccard_sim[pair] = Modified_Jaccard(x, y)

[[5. 4.]
 [4. 4.]
 [3. 5.]
 [3. 2.]
 [4. 3.]
 [4. 4.]
 [5. 4.]
 [5. 5.]
 [4. 5.]
 [4. 3.]
 [5. 5.]
 [5. 3.]]
[[5.  4.5]
 [5.  5. ]
 [4.  5. ]
 [4.  5. ]
 [4.  3. ]
 [4.  5. ]
 [3.  4. ]
 [3.  4.5]
 [5.  4. ]
 [4.  4. ]
 [5.  4. ]
 [2.  5. ]
 [5.  4.5]
 [5.  5. ]
 [5.  5. ]
 [5.  3.5]
 [5.  3. ]
 [5.  4.5]
 [5.  3.5]
 [5.  5. ]
 [4.  1.5]
 [4.  3. ]
 [5.  4. ]
 [5.  1.5]
 [5.  3.5]]
[[5.  3.5]
 [5.  5. ]
 [3.  4. ]
 [4.  5. ]
 [5.  5. ]
 [5.  2. ]
 [3.  3.5]
 [5.  5. ]
 [5.  4. ]
 [5.  5. ]
 [4.  5. ]
 [5.  4. ]
 [4.  3.5]
 [5.  5. ]
 [4.  4. ]
 [4.  3.5]
 [5.  4.5]
 [5.  4. ]
 [5.  4. ]
 [4.  4.5]
 [5.  2.5]
 [5.  5. ]
 [5.  5. ]
 [4.  1.5]]
[[5.  4. ]
 [5.  4.5]
 [4.  4.5]
 [5.  5. ]
 [3.  5. ]
 [4.  5. ]
 [4.  4.5]
 [5.  4.5]
 [4.  4.5]
 [4.  4.5]
 [4.  4.5]
 [3.  4. ]
 [5.  4.5]
 [5.  4. ]
 [4.  4.5]
 [5.  5. ]
 [5.  3.5]
 [5.  4.5]
 [5.  5. ]
 [5.  5. ]
 [5.  4.5]
 [5.  4.5]
 [5.  5. ]
 [3.  4. ]
 [5.  4.5]
 [5.  4. ]
 [5.  5. ]
 [4.  4. ]
 [5.  4. ]
 [4.  4.5]
 [5.  4.5]
 [5.  4.5

In [35]:
pearson_corr_sim[(1,5)]

PearsonRResult(statistic=0.2723939670448719, pvalue=0.391699114740482)

In [36]:
cosine_corr_sim[(1,5)]

0.9686828976934485

In [37]:
Modified_Jaccard_sim[(1,5)]

array([[0.00049193, 0.00039355],
       [0.00039355, 0.00039355],
       [0.00029516, 0.00049193],
       [0.00029516, 0.00019677],
       [0.00039355, 0.00029516],
       [0.00039355, 0.00039355],
       [0.00049193, 0.00039355],
       [0.00049193, 0.00049193],
       [0.00039355, 0.00049193],
       [0.00039355, 0.00029516],
       [0.00049193, 0.00049193],
       [0.00049193, 0.00029516]])

In [38]:


def Proximity():
    r_u1 = np.array(rating_matrix_df.loc[user1,rating_matrix_df.columns[(rating_matrix_df.loc[user1].gt(0) & rating_matrix_df.loc[user2].gt(0))]])
    r_u2 = np.array(rating_matrix_df.loc[user2,rating_matrix_df.columns[(rating_matrix_df.loc[user1].gt(0) & rating_matrix_df.loc[user2].gt(0))]])
    return
def Significance():
    return
def Singularity():
    return
def URPSim():
    return
def JPSSSim():
    return
def NHSM_Sim():
    return
def PSSSim():
    

IndentationError: expected an indented block (2495629948.py, line 16)