# SVM Model

### Import python libraries

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

### Import the data

In [None]:
# Set variable for the data
movies = pd.read_csv("Resources/imdb_final.csv")
movies.head()

#### Clean data for SVM model

 For this model we will not be using the following columns: imdb_title_id, title, country, production company,
 total_votes, median_vote, all18to29, all30to44, allover45, males, males18to29, males30to44, malesover45, females,
 females18to29, females30to44, and femalesover45.

 Keeping: year, genre, duration, director, budget, and rating_class.

In [None]:
# Drop columns using pandas and display new DataFrame.
new_df = movies.drop(columns=['imdb_title_id', 'title', 'country', 'production_company', 'total_votes', 'median_vote',
                      'all18to29', 'all30to44', 'allover45', 'males', 'males18to29', 'males30to44', 'malesover45',
                      'females', 'females18to29', 'females30to44', 'femalesover45'])
new_df

### View the data using seaborn.pairplot

In [None]:
# Set marker to + for easier viewing
sns.pairplot(
    new_df,
    plot_kws=dict(marker="+", linewidth=1),
    diag_kws=dict(fill=False),
)

In [None]:
# Set up dependent variable
target = new_df["rating_class"]
target_names = ["Bad", "Good", "Excellent"]

In [None]:
# Drop dependent variable from DataFrame
data = new_df.drop("rating_class", axis=1)
feature_names = data.columns
data.head()

In [None]:
# Binary encode list columns
data_binary_encoded = pd.get_dummies(data, columns= ["genre", "director"])
data_binary_encoded

In [None]:
# Create train/test split
X_train, X_test, y_train, y_test = train_test_split(data_binary_encoded, target, random_state=42, stratify= target)

In [None]:
# Scale the independent variables
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [None]:
# Set up the model using a linear kernel
model = SVC(kernel='linear')
model.fit(X_train, y_train)

In [None]:
# Print model accuracy
print('Test Acc: %.3f' % model.score(X_test, y_test))

In [None]:
# Calculate classification report
predictions = model.predict(X_test)
print(classification_report(y_test, predictions,
                            target_names=target_names))