In [27]:
import pandas as pd
import requests
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle

In [28]:
api_endpoint = "https://api.themoviedb.org/3/discover/movie"
api_key = "c3b87632de85fe1569bd0b15603df2cc"

In [29]:
params = {
    "api_key": api_key,
    "language": "en-US",
    "sort_by": "popularity.desc",
    "include_adult": "false",
    "include_video": "false",
    "page": 1
}


In [30]:

response = requests.get(api_endpoint, params=params)
movies_data = response.json()


In [31]:
movies_list = movies_data.get('results', [])

In [32]:
movies_df = pd.DataFrame(movies_list)

In [33]:
print(movies_df.head())

   adult                     backdrop_path                genre_ids       id  \
0  False  /xg27NrXi7VXCGUr7MG75UqLl6Vg.jpg  [16, 10751, 18, 12, 35]  1022789   
1  False  /fqv8v6AycXKsivp1T5yKtLbGXce.jpg            [878, 12, 28]   653346   
2  False  /j29ekbcLpBvxnGk6LjdTc2EI5SA.jpg  [16, 10751, 12, 18, 35]   150540   
3  False  /iTWrsOVsUqcwYSxrpINNs3hG2nC.jpg       [53, 27, 28, 9648]  1001311   
4  False  /gRApXuxWmO2forYTuTmcz5RaNUV.jpg         [28, 80, 53, 35]   573435   

  original_language                     original_title  \
0                en                       Inside Out 2   
1                en  Kingdom of the Planet of the Apes   
2                en                         Inside Out   
3                fr                      Sous la Seine   
4                en              Bad Boys: Ride or Die   

                                            overview  popularity  \
0  Teenager Riley's mind headquarters is undergoi...    8445.266   
1  Several generations in the futu

In [34]:
movies_df = movies_df.dropna(subset=['genre_ids', 'vote_average', 'popularity'])


In [35]:
movies_df['genre'] = movies_df['genre_ids'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else None)

In [36]:
movies_df = movies_df.dropna(subset=['genre'])

In [37]:

label_encoder = LabelEncoder()
movies_df['genre_encoded'] = label_encoder.fit_transform(movies_df['genre'])

In [38]:

movies_df['success'] = movies_df['vote_average'] >= 7
X = movies_df[['vote_average', 'genre_encoded']]
y = movies_df['success']

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_y_pred = nb_model.predict(X_test)

nb_accuracy = accuracy_score(y_test, nb_y_pred)
nb_report = classification_report(y_test, nb_y_pred)
nb_conf_matrix = confusion_matrix(y_test, nb_y_pred)

print(f'Naive Bayes Accuracy: {nb_accuracy}')
print(f'Naive Bayes Report:\n{nb_report}')
print(f'Naive Bayes Confusion Matrix:\n{nb_conf_matrix}')

Naive Bayes Accuracy: 0.75
Naive Bayes Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00         1
        True       0.75      1.00      0.86         3

    accuracy                           0.75         4
   macro avg       0.38      0.50      0.43         4
weighted avg       0.56      0.75      0.64         4

Naive Bayes Confusion Matrix:
[[0 1]
 [0 3]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [41]:
with open('nb_model.pkl', 'wb') as file:
    pickle.dump(nb_model, file)

In [42]:
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
lr_y_pred = lr_model.predict(X_test)

lr_accuracy = accuracy_score(y_test, lr_y_pred)
lr_report = classification_report(y_test, lr_y_pred)
lr_conf_matrix = confusion_matrix(y_test, lr_y_pred)

print(f'Logistic Regression Accuracy: {lr_accuracy}')
print(f'Logistic Regression Report:\n{lr_report}')
print(f'Logistic Regression Confusion Matrix:\n{lr_conf_matrix}')

Logistic Regression Accuracy: 0.75
Logistic Regression Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00         1
        True       0.75      1.00      0.86         3

    accuracy                           0.75         4
   macro avg       0.38      0.50      0.43         4
weighted avg       0.56      0.75      0.64         4

Logistic Regression Confusion Matrix:
[[0 1]
 [0 3]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [43]:
with open('lr_model.pkl', 'wb') as file:
    pickle.dump(lr_model, file)


In [44]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)

svm_accuracy = accuracy_score(y_test, svm_y_pred)
svm_report = classification_report(y_test, svm_y_pred)
svm_conf_matrix = confusion_matrix(y_test, svm_y_pred)

print(f'SVM Accuracy: {svm_accuracy}')
print(f'SVM Report:\n{svm_report}')
print(f'SVM Confusion Matrix:\n{svm_conf_matrix}')


SVM Accuracy: 0.75
SVM Report:
              precision    recall  f1-score   support

       False       0.00      0.00      0.00         1
        True       0.75      1.00      0.86         3

    accuracy                           0.75         4
   macro avg       0.38      0.50      0.43         4
weighted avg       0.56      0.75      0.64         4

SVM Confusion Matrix:
[[0 1]
 [0 3]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [45]:
with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm_model, file)

In [46]:
with open('label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)