# This code turns the vector of raw star scores into summed star score values based on a) raw total, b) a logarithmic function based on billing order, and c) an expontential function based on billing order

In [45]:
from tmdbv3api import TMDb
from tmdbv3api import Movie
from tmdbv3api.exceptions import TMDbException
import random
import pandas as pd
import csv
import numpy as np
import ast
from math import exp
from datetime import datetime
tmdb=TMDb()
tmdb.api_key='9cf68f4c97c8f0cc6bb9646da389a808'

In [46]:
#Calculates the weight for an individual star score based on billing order using a simple percent drop off system

In [47]:
def calculate_star_score_with_weights_simple_version(billing_order,star_score):
    if billing_order > 10:
        return 0
    else:
        mod = 1 - (billing_order-1) * .1
        star_score_mod = mod*star_score
        return star_score_mod

In [48]:
#Calculates the weight for an individual star score based on billing order using a logarithmic weighting system

In [49]:
def calculate_star_score_with_weights_log_version(billing_order,star_score):
    # Define the shape parameters
    X_0 = 4
    k = 1.5
    Z = k * (billing_order - X_0)
    # Calculate the star score using the logistic function (can be modified)
    mod = 1 - 1 / (1 + np.exp(-Z))
    star_score_mod= star_score*mod
    return star_score_mod

In [50]:
#Calculates the weight for an individual star score based on billing order using an exponential weighting system

In [51]:
def calculate_star_score_with_weights_exponential_version(billing_order,star_score):
    # Define the shape parameters
    exp = 2
    k = 0.2
    # Calculate the star score using the logistic function (can be modified)
    mod = 1 / (1 + (k)*(billing_order-1)**(exp))
    star_score_mod=star_score*mod
    return star_score_mod

In [52]:
def calculate_total_star_score_no_weights(star_scores):
    star_score_total = 0
    for index, star_score in enumerate(star_scores):
        if star_score is not None:
            star_score_total += star_score
    return star_score_total

In [53]:
def calculate_total_star_score_simple_weights(star_scores):
    star_score_total = 0
    for index, star_score in enumerate(star_scores):
        billing_order = index + 1  # Index starts from 0 but the equation needs it to start from 1
        if star_score is not None:
            star_score_mod = calculate_star_score_with_weights_simple_version(billing_order, star_score)  # Apply the above function to the star score
            star_score_total += star_score_mod
    return star_score_total

In [54]:
def calculate_total_star_score_log_weights(star_scores):
    star_score_total = 0
    for index, star_score in enumerate(star_scores):
        billing_order = index + 1  # Index starts from 0 but the equation needs it to start from 1
        if star_score is not None:
            star_score_mod = calculate_star_score_with_weights_log_version(billing_order, star_score)  # Apply the above function to the star score
            star_score_total += star_score_mod
    return star_score_total


In [55]:
def calculate_total_star_score_exp_weights(star_scores):
    star_score_total = 0
    for index, star_score in enumerate(star_scores):
        billing_order = index + 1  # Index starts from 0 but the equation needs it to start from 1
        if star_score is not None:
            star_score_mod = calculate_star_score_with_weights_exponential_version(billing_order, star_score)  # Apply the above function to the star score
            star_score_total += star_score_mod
    return star_score_total


In [56]:
# Initialize csv file paths
csv_file_path ='merged_data_mod_raw_star_score.csv' 
# Create DataFrames from both CSV files
df = pd.read_csv(csv_file_path)

In [57]:
#Initialize empty columns that will hold our scores
df['Unweighted Star Score'] = 0
df['Simple Weight Star Score'] = 0
df['Log Weight Star Score'] = 0
df['Exponential Weight Star Score'] = 0


df_output = df.copy(deep = True)

#Iterate through dataframe
for index, row in df.iterrows():
    #Get raw scores for the row
    row_raw_scores = row['Raw Star Scores']
    if not pd.isna(row_raw_scores):  # Check NaN
        row_raw_scores = ast.literal_eval(row_raw_scores)    
    #Calculate star scores using our various formulas
    row_unweighted_score = calculate_total_star_score_no_weights(row_raw_scores)
    row_simple_weight_score = calculate_total_star_score_simple_weights(row_raw_scores)
    row_log_weight_score = calculate_total_star_score_log_weights(row_raw_scores)
    row_exp_weight_score = calculate_total_star_score_exp_weights(row_raw_scores)
    
    df_output.loc[df['IMDB ID'] == row['IMDB ID'], 'Unweighted Star Score'] += row_unweighted_score
    df_output.loc[df['IMDB ID'] == row['IMDB ID'], 'Simple Weight Star Score'] += row_simple_weight_score
    df_output.loc[df['IMDB ID'] == row['IMDB ID'], 'Log Weight Star Score'] += row_log_weight_score
    df_output.loc[df['IMDB ID'] == row['IMDB ID'], 'Exponential Weight Star Score'] += row_exp_weight_score


In [58]:
df_output.to_csv('merged_data_mod_scores.csv')