# Movie/Show Recommender for Netflix, Hulu, Prime Video, and Disney+
### Author: Kolton Cox 
### March 25, 2021
### Data 4319 Machine Learning
_____

#### Import libraries:

In [1]:
using RDatasets
using Plots
using CSV

In [2]:
function distance(p1, p2)
    return sqrt(sum((p1[i] - p2[i])^2 for i = 1:length(p1)))
end

distance (generic function with 1 method)

In [3]:
# K-nearest neighbors function
function KNN(p, features, labels, k)
    distance_array = [(distance(p, features[i]), labels[i]) for i = 1:length(features)]
    sort!(distance_array, by = x -> x[1])
    
    return distance_array[1:k]
end

KNN (generic function with 1 method)

#### Read in movies from Netflix, Hulu, Prime Video, Disney+ 

In [51]:
movies = CSV.read("MoviesOnStreamingPlatforms_updated.csv", DataFrame)

Unnamed: 0_level_0,Column1,ID,Title,Year,Age,IMDb,Rotten Tomatoes
Unnamed: 0_level_1,Int64,Int64,String,Int64,String?,Float64?,String?
1,0,1,Inception,2010,13+,8.8,87%
2,1,2,The Matrix,1999,18+,8.7,87%
3,2,3,Avengers: Infinity War,2018,13+,8.5,84%
4,3,4,Back to the Future,1985,7+,8.5,96%
5,4,5,"The Good, the Bad and the Ugly",1966,18+,8.8,97%
6,5,6,Spider-Man: Into the Spider-Verse,2018,7+,8.4,97%
7,6,7,The Pianist,2002,18+,8.5,95%
8,7,8,Django Unchained,2012,18+,8.4,87%
9,8,9,Raiders of the Lost Ark,1981,7+,8.4,95%
10,9,10,Inglourious Basterds,2009,18+,8.3,89%


#### Create x and y and display to make sure it works

In [52]:
x_movie_data = [x for x in zip(movies.IMDb, movies.Netflix, movies.Hulu, movies.PrimeVideo, movies.DisneyPlus)]
y_movie_data = [x for x in movies.Title]

16744-element Array{String,1}:
 "Inception"
 "The Matrix"
 "Avengers: Infinity War"
 "Back to the Future"
 "The Good, the Bad and the Ugly"
 "Spider-Man: Into the Spider-Verse"
 "The Pianist"
 "Django Unchained"
 "Raiders of the Lost Ark"
 "Inglourious Basterds"
 "Taxi Driver"
 "3 Idiots"
 "Pan's Labyrinth"
 ⋮
 "The Boy Who Talked to Badgers"
 "Sea of Hope: America's Underwater Treasures"
 "Sultan And The Rock Star"
 "Secrets of the King Cobra"
 "Winged Seduction: Birds of Paradise"
 "The Bears and I"
 "Whispers: An Elephant's Tale"
 "The Ghosts of Buxley Hall"
 "The Poof Point"
 "Sharks of Lost Island"
 "Man Among Cheetahs"
 "In Beaver Valley"

In [53]:
x_movie_data

16744-element Array{Tuple{Any,Int64,Int64,Int64,Int64},1}:
 (8.8, 1, 0, 0, 0)
 (8.7, 1, 0, 0, 0)
 (8.5, 1, 0, 0, 0)
 (8.5, 1, 0, 0, 0)
 (8.8, 1, 0, 1, 0)
 (8.4, 1, 0, 0, 0)
 (8.5, 1, 0, 1, 0)
 (8.4, 1, 0, 0, 0)
 (8.4, 1, 0, 0, 0)
 (8.3, 1, 0, 0, 0)
 (8.3, 1, 0, 0, 0)
 (8.4, 1, 0, 1, 0)
 (8.2, 1, 0, 0, 0)
 ⋮
 (6.2, 0, 0, 0, 1)
 (7.3, 0, 0, 0, 1)
 (5.9, 0, 0, 0, 1)
 (6.4, 0, 0, 0, 1)
 (6.5, 0, 0, 0, 1)
 (6.2, 0, 0, 0, 1)
 (5.0, 0, 0, 0, 1)
 (6.2, 0, 0, 0, 1)
 (4.7, 0, 0, 0, 1)
 (5.7, 0, 0, 0, 1)
 (6.6, 0, 0, 0, 1)
 (missing, 0, 0, 0, 1)

In [54]:
# Function for recommending movies
function more_like_this(movie_name, features, labels, k)
    for i = 1:length(labels)
        if labels[i] == movie_name
            neighbors = KNN(features[i], features, labels, k)
            println("The top $k similar movies with $movie_name are:")
            for j = 1:k
                println("$j. ", neighbors[j][2])
            end
        end
    end
end

more_like_this (generic function with 1 method)

In [55]:
more_like_this("Spider-Man: Into the Spider-Verse", x_movie_data, y_movie_data, 10)

The top 10 similar movies with Spider-Man: Into the Spider-Verse are:
1. Spider-Man: Into the Spider-Verse
2. Django Unchained
3. Raiders of the Lost Ark
4. Dangal
5. Like Stars on Earth
6. Bill Burr: I'm Sorry You Feel That Way
7. Winter on Fire: Ukraine's Fight for Freedom
8. Rush: Beyond the Lighted Stage
9. Bill Burr: Let It Go
10. Bo Burnham: Make Happy
