In [1]:
import pandas as pd 
from sklearn.linear_model import LinearRegression 

df = pd.read_csv("data/processed/episode_full_dataset.csv")

df.head()

Unnamed: 0,Season,Episode,fear,anger,anticipation,trust,surprise,positive,negative,sadness,...,Music by,Cinematography by,Editing by,IMDb Rating,Rotten Tomatoes Rating (Percentage),Metacritic Ratings,Ordered,Filming Duration,Novel(s) Adapted,Synopsis
0,season-01,e01,0.088028,0.059859,0.090669,0.142606,0.049296,0.18662,0.159331,0.084507,...,Ramin Djawadi,Alik Sakharov,Oral Norrie Ottey,8.9,100,9.1,"March 2, 2010",Second half of 2010,A Game of Thrones,"North of the Seven Kingdoms of Westeros, Night..."
1,season-01,e02,0.099062,0.066736,0.081335,0.133472,0.047967,0.192909,0.172054,0.069864,...,Ramin Djawadi,Alik Sakharov,Oral Norrie Ottey,8.6,100,8.9,"March 2, 2010",Second half of 2010,A Game of Thrones,"Ned, the new Hand of the King, travels to King..."
2,season-01,e03,0.095676,0.068997,0.087397,0.131555,0.046918,0.166513,0.179393,0.078197,...,Ramin Djawadi,Marco Pontecorvo,Frances Parker,8.5,81,8.7,"March 2, 2010",Second half of 2010,A Game of Thrones,Ned attends the King's Small Council and learn...
3,season-01,e04,0.100559,0.057462,0.094174,0.138069,0.045491,0.189146,0.160415,0.077414,...,Ramin Djawadi,Marco Pontecorvo,Frances Parker,8.6,100,9.1,"March 2, 2010",Second half of 2010,A Game of Thrones,"While returning to King's Landing, Tyrion stop..."
4,season-01,e05,0.102941,0.069853,0.082353,0.150735,0.041912,0.172059,0.175,0.074265,...,Ramin Djawadi,Marco Pontecorvo,Frances Parker,9.0,95,9.0,"March 2, 2010",Second half of 2010,A Game of Thrones,"King Robert's eunuch spy, Varys, has uncovered..."


In [2]:
emotion_cols = [
    "fear",
    "anger",
    "anticipation",
    "trust",
    "surprise",
    "positive",
    "negative",
    "sadness",
    "disgust",
    "joy"
]

X = df[emotion_cols]
X.head()


Unnamed: 0,fear,anger,anticipation,trust,surprise,positive,negative,sadness,disgust,joy
0,0.088028,0.059859,0.090669,0.142606,0.049296,0.18662,0.159331,0.084507,0.066901,0.072183
1,0.099062,0.066736,0.081335,0.133472,0.047967,0.192909,0.172054,0.069864,0.058394,0.078206
2,0.095676,0.068997,0.087397,0.131555,0.046918,0.166513,0.179393,0.078197,0.067157,0.078197
3,0.100559,0.057462,0.094174,0.138069,0.045491,0.189146,0.160415,0.077414,0.059058,0.078212
4,0.102941,0.069853,0.082353,0.150735,0.041912,0.172059,0.175,0.074265,0.060294,0.070588


In [None]:
y = df["U.S. Viewers (Millions)"] #us viewers as target variable

model = LinearRegression()
model.fit(X, y)

print("R¬≤:", model.score(X, y))
for emotion, coef in zip(emotion_cols, model.coef_):
    print(f"{emotion}: {coef:.4f}")

R¬≤: 0.39998066901404805
fear: 37.3602
anger: 25.8833
anticipation: -5.4251
trust: -22.1653
surprise: 84.5166
positive: 24.4722
negative: -60.6798
sadness: 83.7540
disgust: -109.1599
joy: -58.5561


In [None]:
y = df["IMDb Rating"]   # imdb target variable

model = LinearRegression()
model.fit(X, y)

print("R¬≤:", model.score(X, y))
for emotion, coef in zip(emotion_cols, model.coef_):
    print(f"{emotion}: {coef:.4f}")

R¬≤: 0.12722048717740742
fear: 7.0188
anger: 6.4699
anticipation: -0.9309
trust: 6.1385
surprise: -11.3985
positive: -10.2400
negative: -14.3302
sadness: -5.8366
disgust: 10.4241
joy: 12.6849


In [None]:
y = df["Rotten Tomatoes Rating (Percentage)"] # rotten tomatoes target variable

model = LinearRegression()
model.fit(X, y)

print("R¬≤:", model.score(X, y))
for emotion, coef in zip(emotion_cols, model.coef_):
    print(f"{emotion}: {coef:.4f}")

R¬≤: 0.1558998950731283
fear: 21.4722
anger: -167.8732
anticipation: 123.8662
trust: 43.3958
surprise: -209.7301
positive: 21.1811
negative: 31.8347
sadness: -158.9945
disgust: 268.6355
joy: 26.2122


In [None]:
y = df["Metacritic Ratings"]   # metacritic target variable

model = LinearRegression()
model.fit(X, y)

print("R¬≤:", model.score(X, y))
for emotion, coef in zip(emotion_cols, model.coef_):
    print(f"{emotion}: {coef:.4f}")

R¬≤: 0.3658177991372201
fear: -15.9487
anger: 13.1679
anticipation: 1.8348
trust: 2.9287
surprise: -63.0039
positive: -21.8573
negative: -5.2150
sadness: -31.6068
disgust: 62.3579
joy: 57.3424


In [8]:
import pandas as pd
from sklearn.linear_model import LinearRegression

df = pd.read_csv("data/processed/episode_full_dataset.csv")

emotion_cols = ["fear","anger","anticipation","trust","surprise",
                "positive","negative","sadness","disgust","joy"]

# all target variables 
target_vars = [
    "IMDb Rating",
    "U.S. Viewers (Millions)",
    "Rotten Tomatoes Rating (Percentage)",
    "Metacritic Ratings",
    "Running Time (Minutes)"
]

results = []            # store R¬≤ values
coef_summaries = {}     # store emotion coefficients for each model

for target in target_vars:
    
    # drop rows where the target is missing
    df_clean = df.dropna(subset=[target])

    X = df_clean[emotion_cols]
    y = df_clean[target]

    model = LinearRegression()
    model.fit(X, y)

    # R¬≤ score
    r2 = model.score(X, y)
    results.append({"Target Variable": target, "R¬≤ Score": r2})

    # store emotion coefficients
    coefs = dict(zip(emotion_cols, model.coef_))
    coef_summaries[target] = coefs

# Convert R¬≤ results to DataFrame
results_df = pd.DataFrame(results)
print("R¬≤ Comparison Table:")
display(results_df)

# Emotion ranking summary
print("\nüîç Emotion Importance Summary (Top 3 per target):\n")

for target in target_vars:
    coefs = coef_summaries[target]
    sorted_coefs = sorted(coefs.items(), key=lambda x: abs(x[1]), reverse=True)

    print(f"=== {target} ===")
    for emotion, coef in sorted_coefs[:3]:
        print(f"  ‚Ä¢ {emotion}: {coef:.4f}")
    print()


R¬≤ Comparison Table:


Unnamed: 0,Target Variable,R¬≤ Score
0,IMDb Rating,0.12722
1,U.S. Viewers (Millions),0.399981
2,Rotten Tomatoes Rating (Percentage),0.1559
3,Metacritic Ratings,0.365818
4,Running Time (Minutes),0.360754



üîç Emotion Importance Summary (Top 3 per target):

=== IMDb Rating ===
  ‚Ä¢ negative: -14.3302
  ‚Ä¢ joy: 12.6849
  ‚Ä¢ surprise: -11.3985

=== U.S. Viewers (Millions) ===
  ‚Ä¢ disgust: -109.1599
  ‚Ä¢ surprise: 84.5166
  ‚Ä¢ sadness: 83.7540

=== Rotten Tomatoes Rating (Percentage) ===
  ‚Ä¢ disgust: 268.6355
  ‚Ä¢ surprise: -209.7301
  ‚Ä¢ anger: -167.8732

=== Metacritic Ratings ===
  ‚Ä¢ surprise: -63.0039
  ‚Ä¢ disgust: 62.3579
  ‚Ä¢ joy: 57.3424

=== Running Time (Minutes) ===
  ‚Ä¢ surprise: 399.8176
  ‚Ä¢ fear: 201.0478
  ‚Ä¢ anticipation: -172.5920

