# This file is used to process the output of three methods to get hybrid method result.

In [None]:

import pandas as pd

input_csv = "movies_with_scores.csv"
output_csv = "movies_recommendations_all.csv"

df = pd.read_csv(input_csv)

movies = df.iloc[:, 0].tolist()
scores = df.iloc[:, 1].astype(float).tolist() 

alpha1 = 0.05

num_movies = len(movies)
group_size = 10  
recommendations = []
for i in range(0, num_movies, group_size):
    movie_group = movies[i:i+group_size] 
    score_group = scores[i:i+group_size] 
    
    if len(movie_group) < group_size:
        continue
    
    adjusted_scores = [score + alpha1 * (group_size - rank) for rank, score in enumerate(score_group)]

    
    sorted_movies_scores = sorted(zip(adjusted_scores, movie_group), reverse=True) 
    sorted_movies = [movie for _, movie in sorted_movies_scores] 
    sorted_scores = [score for score, _ in sorted_movies_scores]  
   
    recommendations.append(
        [len(recommendations) + 1] + sorted_movies[:10] + sorted_scores[:10]
    )

columns = ["Line Number", 
           "Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5",
           "Movie 6", "Movie 7", "Movie 8", "Movie 9", "Movie 10",
           "Score 1", "Score 2", "Score 3", "Score 4", "Score 5",
           "Score 6", "Score 7", "Score 8", "Score 9", "Score 10"]

df_output = pd.DataFrame(recommendations, columns=columns)
df_output.to_csv(output_csv, index=False)
print(f"✅ Processing completed, results saved to {output_csv}")

In [None]:

import pandas as pd


listlist_file = "merged_with_match_column.csv"
recommendations_file = "movies_recommendations_all.csv"
output_file = "listlist_with_miss.csv"


df_list = pd.read_csv(listlist_file)
df_recommendations = pd.read_csv(recommendations_file)


miss_columns = ["miss movie1", "miss movie2", "miss movie3", "miss movie4", "miss movie5"]
for col in miss_columns:
    df_list[col] = ""


for index, row in df_list.iterrows():
    line_number = row["Line Number"]

    
    rec_row = df_recommendations[df_recommendations["Line Number"] == line_number]
    
    if rec_row.empty:
        continue 
    
    recommended_movies = rec_row.iloc[0, 1:11].tolist() 
    
   
    for i in range(1, 6):  
        movie = row[f"Movie {i}"]
        if movie not in recommended_movies:
            df_list.at[index, f"miss movie{i}"] = movie  


df_list.to_csv(output_file, index=False)

print(f"✅Processing completed, results saved to {output_file}")

In [None]:
import pandas as pd
import random


df_miss = pd.read_csv("listlist_with_miss (21) - listlist_with_miss (21) (2).csv")
df_rec = pd.read_csv("movies_recommendations_all.csv")


df_rec.set_index("Line Number", inplace=True)


for idx, row in df_miss.iterrows():
    line_num = row["Line Number"]
    
    
    missing_cols = []
    for col in ["Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5"]:
        
        if pd.isna(row[col]) or str(row[col]).strip() == "":
            missing_cols.append(col)
    
    
    if not missing_cols:
        continue

   
    try:
        rec_row = df_rec.loc[line_num]
    except KeyError:
        print(f"Warning: Unable to find row with Line Number {line_num} in movies_recommendations_all.csv")
        continue

   
    candidate_movies = []
    for i in range(1, 11):
        col_name = f"Movie {i}"
        movie = rec_row[col_name]
        if pd.notna(movie) and str(movie).strip() != "":
            candidate_movies.append(movie)
    
   
    existing_movies = []
    for col in ["Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5"]:
        val = row[col]
        if pd.notna(val) and str(val).strip() != "":
            existing_movies.append(val)
    candidate_movies = [m for m in candidate_movies if m not in existing_movies]

   
    if row["Match"] == 0:
        movie_name = row["movie_name"]
        candidate_movies = [m for m in candidate_movies if m != movie_name]

   
    k = len(missing_cols)
    if len(candidate_movies) < k:
        print(f"Warning: Not enough movies available for Line Number {line_num}, {k} positions missing, only {len(candidate_movies)} candidate movies")
        k = len(candidate_movies) 

    
    chosen_movies = random.sample(candidate_movies, k)

    
    for col, movie in zip(missing_cols, chosen_movies):
        df_miss.at[idx, col] = movie


df_miss.to_csv("2q2.csv", index=False)
print("✅ Filling is complete and saved as 2q2.csv")


In [None]:
import pandas as pd


input_file = "2q2.csv"
output_file = "listlist_with_scores.csv"
new_file = "listnew.csv"  


df = pd.read_csv(input_file)


df_first8 = df.iloc[:, :8]


df_first8.to_csv(new_file, index=False)


alpha3 = 0.025


df_with_scores = df_first8.copy()
for i in range(1, 6):  
    df_with_scores[f"Movie {i} score"] = round(alpha3 * (5 - i), 3)


df_with_scores.to_csv(output_file, index=False)

print(f"✅ Processing completed, results saved to {output_file}  {new_file}")

print(f"✅ Processing completed, results saved to {output_file}")

In [11]:
import pandas as pd
import numpy as np
# Load the merged CSV file with the match column
df = pd.read_csv('listnew.csv')
# Function to calculate Hit@n
def calculate_hit(df, n):
    return df['Match'].apply(lambda x: 1 if 0 < x <= n else 0).mean()
# Function to calculate DCG@n and NDCG@n for each row
def calculate_dcg(row, n):
    match = row['Match']
    if 0 < match <= n:  # if there's a match in the top n
        return 1 / np.log2(match + 1)
    return 0
# Function to calculate NDCG@n
def calculate_ndcg(df, n):
    # Calculate DCG@n for each row
    df[f'DCG@{n}'] = df.apply(lambda row: calculate_dcg(row, n), axis=1)
    # Ideal DCG@n (IDCG@n) is always 1, since the best match is in the first position
    idcg = 1 / np.log2(2)
    # Calculate NDCG@n for each row
    df[f'NDCG@{n}'] = df[f'DCG@{n}'] / idcg
    return df[f'NDCG@{n}'].mean()
# Calculate Hit@3 and NDCG@3
hit_at_3 = calculate_hit(df, 3)
ndcg_at_3 = calculate_ndcg(df, 3)
# Calculate Hit@5 and NDCG@5
hit_at_5 = calculate_hit(df, 5)
ndcg_at_5 = calculate_ndcg(df, 5)
# Scale factor
scale_factor = 103 / 1162
# Scaled results
scaled_hit_at_3 = hit_at_3 * scale_factor
scaled_ndcg_at_3 = ndcg_at_3 * scale_factor
scaled_hit_at_5 = hit_at_5 * scale_factor
scaled_ndcg_at_5 = ndcg_at_5 * scale_factor
# Output the results
print(f"Hit@3: {hit_at_3}")
print(f"NDCG@3: {ndcg_at_3}")
print(f"Hit@5: {hit_at_5}")
print(f"NDCG@5: {ndcg_at_5}")
print("\n### Scaled Results (Multiplied by 103 / 1162) ###")
print(f"Scaled Hit@3: {scaled_hit_at_3}")
print(f"Scaled NDCG@3: {scaled_ndcg_at_3}")
print(f"Scaled Hit@5: {scaled_hit_at_5}")
print(f"Scaled NDCG@5: {scaled_ndcg_at_5}")

Hit@3: 0.4411764705882353
NDCG@3: 0.32912899061624995
Hit@5: 0.6666666666666666
NDCG@5: 0.42065695213373006

### Scaled Results (Multiplied by 103 / 1162) ###
Scaled Hit@3: 0.03910600384732206
Scaled NDCG@3: 0.029174084366156407
Scaled Hit@5: 0.05909351692484222
Scaled NDCG@5: 0.03728714808070068


In [None]:
import pandas as pd


listlist_file = "listlist_with_scores.csv"
movies_recommendations_file = "movies_recommendations_all.csv"
output_file = "movies_recommendations_all_updated.csv"


df_listlist = pd.read_csv(listlist_file)
df_movies = pd.read_csv(movies_recommendations_file)


for index, row in df_movies.iterrows():
    line_number = row["Line Number"]
    
    
    matching_row = df_listlist[df_listlist["Line Number"] == line_number]

    if not matching_row.empty:
        
        movie_scores = {}
        for i in range(1, 6):  # Movie 1 ~ Movie 5
            movie_name = matching_row.iloc[0][f"Movie {i}"]
            movie_score = matching_row.iloc[0][f"Movie {i} score"]
            if pd.notna(movie_name): 
                movie_scores[movie_name] = movie_score

        
        for i in range(1, 11):  # Movie 1 ~ Movie 10
            movie_name = row[f"Movie {i}"]
            score_col = f"Score {i}"
            
            
            if movie_name in movie_scores:
                df_movies.at[index, score_col] += movie_scores[movie_name]


df_movies.to_csv(output_file, index=False)

print(f"✅ Processing completed, results saved to {output_file}")

In [None]:
import pandas as pd
import random


input_file = "movies_recommendations_all_updated.csv"
output_file = "movies_recommendations_all_sorted.csv"


df = pd.read_csv(input_file)


for index, row in df.iterrows():
    
    movies = [row[f"Movie {i}"] for i in range(1, 11)]
    scores = [row[f"Score {i}"] for i in range(1, 11)]

    
    sorted_pairs = sorted(zip(scores, movies), key=lambda x: x[0], reverse=True)

    
    grouped_movies = {}
    for score, movie in sorted_pairs:
        grouped_movies.setdefault(score, []).append(movie)

    for score in grouped_movies:
        random.shuffle(grouped_movies[score])

    
    final_sorted_movies = []
    final_sorted_scores = []
    for score in sorted(grouped_movies.keys(), reverse=True):
        for movie in grouped_movies[score]:
            final_sorted_movies.append(movie)
            final_sorted_scores.append(score)

    
    for i in range(10):
        df.at[index, f"Movie {i+1}"] = final_sorted_movies[i]
        df.at[index, f"Score {i+1}"] = final_sorted_scores[i]


df.to_csv(output_file, index=False)

print(f"✅ Processing completed, results saved to{output_file}")

In [None]:
import pandas as pd


input_file = "movies_recommendations_all_sorted.csv"
output_file = "movies_recommendations_top5.csv"


df = pd.read_csv(input_file)


columns_to_keep = ["Line Number", "Movie 1", "Movie 2", "Movie 3", "Movie 4", "Movie 5"]
df_filtered = df[columns_to_keep]


df_filtered.to_csv(output_file, index=False)

print(f"✅ Processing completed, results saved to {output_file}")


In [15]:

import pandas as pd

# Load the two CSV files
movies_recommendations = pd.read_csv('movies_recommendations_top5.csv')
match_output = pd.read_csv('gt-match-output.csv')

# Merge the files based on the 'Line Number' from movies_recommendations and 'index' from match_output
merged_df = pd.merge(movies_recommendations, match_output[['index', 'movie_name']], how='left', left_on='Line Number', right_on='index')

# Drop the extra 'index' column after the merge
merged_df = merged_df.drop(columns=['index'])

# Save the merged result to a new CSV file
output_file = 'merged_with_movie_name.csv'
merged_df.to_csv(output_file, index=False)

print(f"CSV file created at: {output_file}")

CSV file created at: merged_with_movie_name.csv


In [16]:

import pandas as pd

# Load the merged CSV file
merged_df = pd.read_csv('merged_with_movie_name.csv')

# Function to check if the movie_name matches any of Movie1 to Movie5
def check_match(row):
    for i in range(1, 6):
        if row['movie_name'] == row[f'Movie {i}']:
            return i
    return 0

# Apply the check_match function to each row and create a new column for the result
merged_df['Match'] = merged_df.apply(check_match, axis=1)

# Save the updated dataframe with the new column
output_file_with_match = 'merged_with_match_column.csv'
merged_df.to_csv(output_file_with_match, index=False)

print(f"CSV file created at: {output_file_with_match}")

CSV file created at: merged_with_match_column.csv


In [None]:
import pandas as pd
import numpy as np
# Load the merged CSV file with the match column
df = pd.read_csv('merged_with_match_column.csv')
# Function to calculate Hit@n
def calculate_hit(df, n):
    return df['Match'].apply(lambda x: 1 if 0 < x <= n else 0).mean()
# Function to calculate DCG@n and NDCG@n for each row
def calculate_dcg(row, n):
    match = row['Match']
    if 0 < match <= n:  # if there's a match in the top n
        return 1 / np.log2(match + 1)
    return 0
# Function to calculate NDCG@n
def calculate_ndcg(df, n):
    # Calculate DCG@n for each row
    df[f'DCG@{n}'] = df.apply(lambda row: calculate_dcg(row, n), axis=1)
    # Ideal DCG@n (IDCG@n) is always 1, since the best match is in the first position
    idcg = 1 / np.log2(2)
    # Calculate NDCG@n for each row
    df[f'NDCG@{n}'] = df[f'DCG@{n}'] / idcg
    return df[f'NDCG@{n}'].mean()
# Calculate Hit@3 and NDCG@3
hit_at_3 = calculate_hit(df, 3)
ndcg_at_3 = calculate_ndcg(df, 3)
# Calculate Hit@5 and NDCG@5
hit_at_5 = calculate_hit(df, 5)
ndcg_at_5 = calculate_ndcg(df, 5)
# Scale factor
scale_factor = 103 / 1162
# Scaled results
scaled_hit_at_3 = hit_at_3 * scale_factor
scaled_ndcg_at_3 = ndcg_at_3 * scale_factor
scaled_hit_at_5 = hit_at_5 * scale_factor
scaled_ndcg_at_5 = ndcg_at_5 * scale_factor
# Output the results
print(f"Hit@3: {hit_at_3}")
print(f"NDCG@3: {ndcg_at_3}")
print(f"Hit@5: {hit_at_5}")
print(f"NDCG@5: {ndcg_at_5}")
print("\n### Scaled Results (Multiplied by 103 / 1162) ###")
print(f"Scaled Hit@3: {scaled_hit_at_3}")
print(f"Scaled NDCG@3: {scaled_ndcg_at_3}")
print(f"Scaled Hit@5: {scaled_hit_at_5}")
print(f"Scaled NDCG@5: {scaled_ndcg_at_5}")