In [1]:
# Import dependencies
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Load data
movies_df = pd.read_csv("rotten_tomatoes_movies.csv")
movies_df.head()

Unnamed: 0,rotten_tomatoes_link,movie_title,movie_info,critics_consensus,content_rating,genres,directors,authors,actors,original_release_date,...,production_company,tomatometer_status,tomatometer_rating,tomatometer_count,audience_status,audience_rating,audience_count,tomatometer_top_critics_count,tomatometer_fresh_critics_count,tomatometer_rotten_critics_count
0,m/0814255,Percy Jackson & the Olympians: The Lightning T...,"Always trouble-prone, the life of teenager Per...",Though it may seem like just another Harry Pot...,PG,"Action & Adventure, Comedy, Drama, Science Fic...",Chris Columbus,"Craig Titley, Chris Columbus, Rick Riordan","Logan Lerman, Brandon T. Jackson, Alexandra Da...",2010-02-12,...,20th Century Fox,Rotten,49.0,149.0,Spilled,53.0,254421.0,43,73,76
1,m/0878835,Please Give,Kate (Catherine Keener) and her husband Alex (...,Nicole Holofcener's newest might seem slight i...,R,Comedy,Nicole Holofcener,Nicole Holofcener,"Catherine Keener, Amanda Peet, Oliver Platt, R...",2010-04-30,...,Sony Pictures Classics,Certified-Fresh,87.0,142.0,Upright,64.0,11574.0,44,123,19
2,m/10,10,"A successful, middle-aged Hollywood songwriter...",Blake Edwards' bawdy comedy may not score a pe...,R,"Comedy, Romance",Blake Edwards,Blake Edwards,"Dudley Moore, Bo Derek, Julie Andrews, Robert ...",1979-10-05,...,Waner Bros.,Fresh,67.0,24.0,Spilled,53.0,14684.0,2,16,8
3,m/1000013-12_angry_men,12 Angry Men (Twelve Angry Men),Following the closing arguments in a murder tr...,Sidney Lumet's feature debut is a superbly wri...,NR,"Classics, Drama",Sidney Lumet,Reginald Rose,"Martin Balsam, John Fiedler, Lee J. Cobb, E.G....",1957-04-13,...,Criterion Collection,Certified-Fresh,100.0,54.0,Upright,97.0,105386.0,6,54,0
4,m/1000079-20000_leagues_under_the_sea,"20,000 Leagues Under The Sea","In 1866, Professor Pierre M. Aronnax (Paul Luk...","One of Disney's finest live-action adventures,...",G,"Action & Adventure, Drama, Kids & Family",Richard Fleischer,Earl Felton,"James Mason, Kirk Douglas, Paul Lukas, Peter L...",1954-01-01,...,Disney,Fresh,89.0,27.0,Upright,74.0,68918.0,5,24,3


In [3]:
# Select and clean only a few columns for simple model since more preprocessing needs to be done to add other features
movies_df_clean = movies_df[["content_rating", "runtime", "tomatometer_status"]]
movies_df_clean = movies_df_clean.dropna()
movies_df_clean["tomatometer_status"] = movies_df_clean["tomatometer_status"].replace("Certified-Fresh","Fresh")
movies_df_clean.head()

Unnamed: 0,content_rating,runtime,tomatometer_status
0,PG,119.0,Rotten
1,R,90.0,Fresh
2,R,122.0,Fresh
3,NR,95.0,Fresh
4,G,127.0,Fresh


In [4]:
# Define the target set
y = movies_df_clean["tomatometer_status"]
y.value_counts()

Fresh     9891
Rotten    7493
Name: tomatometer_status, dtype: int64

In [5]:
# Define the features set
X = pd.get_dummies(movies_df_clean[["content_rating","runtime"]])
X.head()

Unnamed: 0,runtime,content_rating_G,content_rating_NC17,content_rating_NR,content_rating_PG,content_rating_PG-13,content_rating_R
0,119.0,0,0,0,1,0,0
1,90.0,0,0,0,0,0,1
2,122.0,0,0,0,0,0,1
3,95.0,0,0,1,0,0,0
4,127.0,1,0,0,0,0,0


In [6]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [7]:
# Create a random forest classifier
model = RandomForestClassifier(n_estimators=128, random_state=1) 

In [8]:
# Fitting the model
model = model.fit(X_train, y_train)

In [9]:
# Making predictions using the testing data
y_pred = model.predict(X_test)
y_pred

array(['Rotten', 'Fresh', 'Fresh', ..., 'Fresh', 'Fresh', 'Fresh'],
      dtype=object)

In [10]:
# Evaluate the model
accuracy_score(y_test, y_pred)

0.6431201104463875