From c6f47fb18dbba2e453b1f8c73f3b72c9aacd4eca Mon Sep 17 00:00:00 2001
From: Mrounds2 <135572097+Mrounds2@users.noreply.github.com>
Date: Mon, 27 Nov 2023 20:33:30 -0800
Subject: [PATCH 1/4] Add files via upload

---
 movie_ML.ipynb | 123 +++++++++++++++++++++++++++++--------------------
 1 file changed, 72 insertions(+), 51 deletions(-)
diff --git a/movie_ML.ipynb b/movie_ML.ipynb
index 5c9b497..4768eaa 100644
--- a/movie_ML.ipynb
+++ b/movie_ML.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,30 +13,40 @@
     "from sklearn.neighbors import NearestNeighbors\n",
     "import numpy as np\n",
     "import random\n",
-    "from joblib import Parallel, delayed"
+    "from joblib import Parallel, delayed\n",
+    "from sklearn.metrics.pairwise import cosine_similarity"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Connect to the SQLite database\n",
-    "conn = sqlite3.connect('movie_ratings_db.sqlite')\n",
-    "\n",
-    "# Query the data from the ratings table and join with the movies table\n",
+    "conn = sqlite3.connect('movie_ratings_db.sqlite')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Query a subset of the data to speed up development/testing\n",
+    "# Adjust the LIMIT clause based on your dataset size\n",
     "query = \"\"\"\n",
     "    SELECT r.userId, r.movieId, r.rating, m.title, m.genres\n",
     "    FROM ratings r\n",
     "    JOIN movies m ON r.movieId = m.movieId\n",
+    "    LIMIT 10000  -- Adjust this limit based on your dataset size\n",
     "\"\"\"\n",
     "df = pd.read_sql_query(query, conn)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -47,31 +57,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Content-Based Filtering (using Genres)\n",
     "df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))\n",
-    "tfidf_vectorizer = TfidfVectorizer(stop_words='english')\n",
-    "tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres_str'])\n"
+    "tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)  # Adjust max_features\n",
+    "tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres_str'])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
-       "<style>#sk-container-id-2 {color: black;}#sk-container-id-2 pre{padding: 0;}#sk-container-id-2 div.sk-toggleable {background-color: white;}#sk-container-id-2 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-2 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-2 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-2 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-2 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-2 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-2 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-2 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-2 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-2 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-2 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-2 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-2 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-2 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-2 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-2 div.sk-item {position: relative;z-index: 1;}#sk-container-id-2 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-2 div.sk-item::before, #sk-container-id-2 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-2 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-2 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-2 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-2 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-2 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-2 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-2 div.sk-label-container {text-align: center;}#sk-container-id-2 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-2 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-2\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>NearestNeighbors(metric=&#x27;cosine&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-2\" type=\"checkbox\" checked><label for=\"sk-estimator-id-2\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">NearestNeighbors</label><div class=\"sk-toggleable__content\"><pre>NearestNeighbors(metric=&#x27;cosine&#x27;)</pre></div></div></div></div></div>"
+       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>NearestNeighbors(metric=&#x27;cosine&#x27;)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">NearestNeighbors</label><div class=\"sk-toggleable__content\"><pre>NearestNeighbors(metric=&#x27;cosine&#x27;)</pre></div></div></div></div></div>"
       ],
       "text/plain": [
        "NearestNeighbors(metric='cosine')"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -84,20 +94,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Collaborative Filtering (User-Item Interactions)\n",
-    "user_movie_ratings = df.pivot_table(index='userId', columns='title', values='rating')\n",
-    "user_movie_ratings = user_movie_ratings.fillna(0)\n",
+    "user_movie_ratings = df.pivot_table(index='userId', columns='title', values='rating', fill_value=0)\n",
     "movie_user_ratings = user_movie_ratings.T\n",
     "movie_similarity = cosine_similarity(movie_user_ratings)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -110,22 +119,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Function to get similar items based on Collaborative Filtering\n",
     "def get_similar_items_cf(movie_title, top_n=5):\n",
-    "    movie_ratings = user_movie_ratings[movie_title]\n",
+    "    movie_ratings = user_movie_ratings[movie_title].values.reshape(1, -1)\n",
     "    \n",
-    "    # Ensure the movie_ratings is a 2D array (column vector)\n",
-    "    movie_ratings = movie_ratings.values.reshape(-1, 1)\n",
+    "    # Calculate similarity scores using cosine_similarity\n",
+    "    similar_scores = cosine_similarity(movie_ratings, movie_user_ratings)\n",
     "    \n",
-    "    # Calculate similarity scores using a loop\n",
-    "    similar_scores = [np.sum(movie_ratings.T * movie_similarity[:, i].reshape(-1, 1)) for i in range(movie_similarity.shape[0])]\n",
+    "    # Extract the similarity scores for the given movie\n",
+    "    similarity_scores_for_movie = similar_scores.flatten()\n",
     "    \n",
     "    # Create a DataFrame with movie titles and similarity scores\n",
-    "    similar_movies_df = pd.DataFrame({'movie': movie_user_ratings.index, 'similarity': similar_scores})\n",
+    "    similar_movies_df = pd.DataFrame({'movie': movie_user_ratings.index, 'similarity': similarity_scores_for_movie})\n",
     "    \n",
     "    # Sort by similarity and get the top N\n",
     "    similar_movies_df = similar_movies_df.sort_values(by='similarity', ascending=False).head(top_n)\n",
@@ -135,39 +144,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Function to get hybrid recommendations (combining CF and CB)\n",
     "def get_hybrid_recommendations(selected_movies, top_n=5):\n",
-    "    # Get similar movies using collaborative filtering for each selected movie\n",
     "    cf_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_cf)(movie_title, top_n=top_n) for movie_title in selected_movies)\n",
     "    cf_recommendations = [item for sublist in cf_recommendations for item in sublist]\n",
     "    \n",
-    "    # Get similar movies using content-based filtering with genre filter\n",
     "    cb_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_nn)(df[df['title'] == movie_title].index[0]) for movie_title in selected_movies)\n",
     "    cb_recommendations = [item for sublist in cb_recommendations for item in sublist]\n",
     "    \n",
-    "    # Filter content-based recommendations based on genre relevance\n",
     "    genre_filter = set().union(*(tuple(genre) for movie_title in selected_movies for genre in df[df['title'] == movie_title]['genres']))\n",
     "    cb_recommendations_filtered = [movie for movie in cb_recommendations if any(set(genre) & genre_filter for genre in df[df['title'] == movie]['genres'])]\n",
-    "\n",
-    "    # Combine the recommendations from both methods\n",
+    "    \n",
     "    hybrid_recommendations = list(set(cf_recommendations + cb_recommendations_filtered))[:top_n]\n",
     "    \n",
-    "    return hybrid_recommendations"
+    "    return hybrid_recommendations\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Function to allow the user to choose movies from a random list\n",
     "def choose_movies():\n",
-    "    # Get a list of 10 random movies\n",
     "    random_movies = random.sample(df['title'].tolist(), 10)\n",
     "    \n",
     "    print(\"Choose up to 5 movies from the list:\")\n",
@@ -180,7 +184,6 @@
     "            selected_movies.append(random_movies[count])\n",
     "            count += 1\n",
     "        elif choice == 'no':\n",
-    "            # Choose a different movie randomly\n",
     "            random_movies.pop(count)\n",
     "            random_movies.append(random.choice(df['title'].tolist()))\n",
     "        elif choice != 'no':\n",
@@ -190,7 +193,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -198,27 +201,45 @@
      "output_type": "stream",
      "text": [
       "Choose up to 5 movies from the list:\n",
-      "1. Made in America (1993)\n",
-      "1. Babe (1995)\n",
-      "1. Dirty Dozen, The (1967)\n",
-      "1. Into the Wild (2007)\n",
-      "2. Legend No. 17 (2013)\n",
-      "2. Cats & Dogs (2001)\n",
-      "3. Memento (2000)\n",
-      "4. Scarface (1983)\n",
-      "4. Sexy Beast (2000)\n",
-      "4. Addams Family Values (1993)\n",
-      "5. Pitch Perfect 2 (2015)\n",
+      "1. Pursuit of Happyness, The (2006)\n",
+      "Invalid choice. Please enter 'yes' or 'no'.\n",
+      "1. Pursuit of Happyness, The (2006)\n",
+      "2. Cyrano de Bergerac (1990)\n",
+      "2. Candyman: Farewell to the Flesh (1995)\n",
+      "Invalid choice. Please enter 'yes' or 'no'.\n",
+      "2. Candyman: Farewell to the Flesh (1995)\n",
+      "2. Batman Forever (1995)\n",
+      "3. Shanghai Noon (2000)\n",
+      "4. Untouchables, The (1987)\n",
+      "4. To Live and Die in L.A. (1985)\n",
+      "4. Gattaca (1997)\n",
+      "Invalid choice. Please enter 'yes' or 'no'.\n",
+      "4. Gattaca (1997)\n",
+      "Invalid choice. Please enter 'yes' or 'no'.\n",
+      "4. Gattaca (1997)\n",
+      "4. Ernest Goes to Camp (1987)\n",
+      "4. Goodfellas (1990)\n",
+      "4. Million Dollar Baby (2004)\n",
+      "5. Pulp Fiction (1994)\n",
       "Hybrid recommendations based on user-selected movies:\n",
-      "['Woman in Red, The (1984)', 'Seven (a.k.a. Se7en) (1995)', 'Flintstones, The (1994)', 'Secret of My Succe$s, The (a.k.a. The Secret of My Success) (1987)', 'Earth Girls Are Easy (1988)']\n"
+      "['Fargo (1996)', '3:10 to Yuma (2007)', 'Donnie Darko (2001)', 'The Revenant (2015)', 'True Lies (1994)']\n"
      ]
     }
    ],
    "source": [
-    "# Example: Get user-selected movies and generate recommendations\n",
-    "selected_movies = choose_movies()\n",
-    "hybrid_recommendations = get_hybrid_recommendations(selected_movies, top_n=5)\n",
-    "print(f'Hybrid recommendations based on user-selected movies:\\n{hybrid_recommendations}')"
+    "# Function to allow the user to redraw recommendations\n",
+    "def redraw_recommendations():\n",
+    "    while True:\n",
+    "        selected_movies = choose_movies()\n",
+    "        hybrid_recommendations = get_hybrid_recommendations(selected_movies, top_n=5)\n",
+    "        print(f'Hybrid recommendations based on user-selected movies:\\n{hybrid_recommendations}')\n",
+    "\n",
+    "        redraw = input(\"Do you want to redraw recommendations? Enter 'yes' or 'no': \").lower()\n",
+    "        if redraw != 'yes':\n",
+    "            break\n",
+    "\n",
+    "# Call the function to start the recommendation process\n",
+    "redraw_recommendations()"
    ]
   }
  ],

From 0f9a1f215a53b5485444ccb81a2efb4d13f53a3e Mon Sep 17 00:00:00 2001
From: Mrounds2 <135572097+Mrounds2@users.noreply.github.com>
Date: Tue, 28 Nov 2023 20:23:53 -0800
Subject: [PATCH 2/4] Add files via upload

---
 app.py | 130 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 app.py

diff --git a/app.py b/app.py
new file mode 100644
index 0000000..3bc8947
--- /dev/null
+++ b/app.py
@@ -0,0 +1,130 @@
+from flask import Flask, render_template, request, jsonify
+import sqlite3
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.neighbors import NearestNeighbors
+from sklearn.metrics.pairwise import cosine_similarity
+import numpy as np
+import random
+from joblib import Parallel, delayed
+
+app = Flask(__name__)
+
+# Connect to the SQLite database
+conn = sqlite3.connect('movie_ratings_db.sqlite')
+
+# Query a subset of the data to speed up development/testing
+# Adjust the LIMIT clause based on your dataset size
+query = """
+    SELECT r.userId, r.movieId, r.rating, m.title, m.genres
+    FROM ratings r
+    JOIN movies m ON r.movieId = m.movieId
+    LIMIT 10000  -- Adjust this limit based on your dataset size
+"""
+df = pd.read_sql_query(query, conn)
+
+# Assuming the genres column is in the format "Genre1|Genre2|Genre3"
+# Convert genres into a list
+df['genres'] = df['genres'].str.split('|')
+
+# Content-Based Filtering (using Genres)
+df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))
+tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)  # Adjust max_features
+tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres_str'])
+
+# Approximate Nearest Neighbors with NearestNeighbors
+nn = NearestNeighbors(n_neighbors=5, algorithm='auto', metric='cosine')
+nn.fit(tfidf_matrix)
+
+# Collaborative Filtering (User-Item Interactions)
+user_movie_ratings = df.pivot_table(index='userId', columns='title', values='rating', fill_value=0)
+movie_user_ratings = user_movie_ratings.T
+movie_similarity = cosine_similarity(movie_user_ratings)
+
+# Function to get similar items based on NearestNeighbors
+def get_similar_items_nn(movie_index):
+    distances, indices = nn.kneighbors(tfidf_matrix[movie_index])
+    similar_items = df.iloc[indices[0]]['title'].tolist()
+    return similar_items
+
+# Function to get similar items based on Collaborative Filtering
+def get_similar_items_cf(movie_title, top_n=5):
+    if movie_title not in user_movie_ratings.columns:
+        return []  # Return an empty list if the movie has no ratings
+
+    movie_ratings = user_movie_ratings[movie_title].values.reshape(1, -1)
+
+    # Calculate similarity scores using dot product
+    similar_scores = np.dot(movie_ratings, movie_similarity)
+
+    # Extract the similarity scores for the given movie
+    similarity_scores_for_movie = similar_scores.flatten()
+
+    # Create a DataFrame with movie titles and similarity scores
+    similar_movies_df = pd.DataFrame({'movie': movie_user_ratings.index, 'similarity': similarity_scores_for_movie})
+
+    # Sort by similarity and get the top N
+    similar_movies_df = similar_movies_df.sort_values(by='similarity', ascending=False).head(top_n)
+
+    return similar_movies_df['movie'].tolist()
+
+# Function to get hybrid recommendations (combining CF and CB)
+def get_hybrid_recommendations(selected_movies, top_n=5):
+    cf_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_cf)(movie_title, top_n=top_n) for movie_title in selected_movies)
+    cf_recommendations = [item for sublist in cf_recommendations for item in sublist]
+    
+    cb_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_nn)(df[df['title'] == movie_title].index[0]) for movie_title in selected_movies)
+    cb_recommendations = [item for sublist in cb_recommendations for item in sublist]
+    
+    genre_filter = set().union(*(tuple(genre) for movie_title in selected_movies for genre in df[df['title'] == movie_title]['genres']))
+    cb_recommendations_filtered = [movie for movie in cb_recommendations if any(set(genre) & genre_filter for genre in df[df['title'] == movie]['genres'])]
+    
+    hybrid_recommendations = list(set(cf_recommendations + cb_recommendations_filtered))[:top_n]
+    
+    return hybrid_recommendations
+
+# Initialize variables to keep track of liked movies
+liked_movies = []
+max_likes = 5  # Set the maximum number of liked movies
+
+# Function to get random movie for rating
+def get_random_movie():
+    return df.sample(1).iloc[0]
+
+# Routes
+@app.route('/')
+def index():
+    global liked_movies
+    if len(liked_movies) < max_likes:
+        random_movie = get_random_movie()
+        return render_template('index.html', random_movie=random_movie)
+    else:
+        return "You have reached the maximum number of liked movies. Check your recommendations!"
+
+@app.route('/rate_movie', methods=['POST'])
+def rate_movie():
+    global liked_movies
+    movie_id = request.form['movie_id']
+    feedback = request.form['feedback']  # Change 'rating' to 'feedback'
+
+    # Here you can save the user's feedback to the database if needed
+    # For simplicity, let's just print the movie ID and feedback
+    print(f"User provided feedback for Movie ID {movie_id}: {feedback}")
+
+    # Add the movie to the liked movies list if feedback is 'like'
+    if feedback == 'like':
+        liked_movies.append(movie_id)
+
+    # Get another random movie for the user to provide feedback
+    if len(liked_movies) < max_likes:
+        random_movie = get_random_movie()
+        return jsonify({'random_movie': random_movie.to_dict()})
+
+    else:
+        # Provide recommendations when the user reaches the maximum liked movies
+        hybrid_recommendations = get_hybrid_recommendations(liked_movies, top_n=5)
+        return jsonify({'recommendations': hybrid_recommendations})
+
+if __name__ == '__main__':
+    conn.close()
+    app.run(debug=True)
\ No newline at end of file

From 62233d2e0bc002fde2dcb93583521fc4bf9bbace Mon Sep 17 00:00:00 2001
From: Mrounds2 <135572097+Mrounds2@users.noreply.github.com>
Date: Tue, 28 Nov 2023 20:57:33 -0800
Subject: [PATCH 3/4] Add files via upload

---
 rec_engine_interactive.html | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 rec_engine_interactive.html

diff --git a/rec_engine_interactive.html b/rec_engine_interactive.html
new file mode 100644
index 0000000..4e4e008
--- /dev/null
+++ b/rec_engine_interactive.html
@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Movie Recommendations</title>
+</head>
+<body>
+    <h1>Movie Recommendations</h1>
+    <p>{{ movie_title }}</p>
+    <form method="post" action="{{ url_for('rec_engine') }}">
+        <button type="submit" name="action" value="like">Like</button>
+        <button type="submit" name="action" value="dislike">Dislike</button>
+        <button type="submit" name="action" value="unsure">Unsure</button>
+    </form>
+</body>
+</html>
\ No newline at end of file

From 90fda1dc37403a74886d32007a73c9ef18f7d4c0 Mon Sep 17 00:00:00 2001
From: Mrounds2 <135572097+Mrounds2@users.noreply.github.com>
Date: Wed, 29 Nov 2023 00:23:51 -0800
Subject: [PATCH 4/4] Add files via upload

---
 app2.py                     | 184 ++++++++++++++++++++++++++++++++++++
 rec_engine.html             |  17 ++++
 rec_engine_interactive.html |  18 +++-
 3 files changed, 215 insertions(+), 4 deletions(-)
 create mode 100644 app2.py
 create mode 100644 rec_engine.html

diff --git a/app2.py b/app2.py
new file mode 100644
index 0000000..b950f5e
--- /dev/null
+++ b/app2.py
@@ -0,0 +1,184 @@
+import warnings
+warnings.filterwarnings("ignore")
+
+import numpy as np
+import datetime as dt
+import os
+
+import sqlite3
+
+import sqlalchemy as db
+from sqlalchemy.ext.automap import automap_base
+from sqlalchemy.orm import Session
+from sqlalchemy import create_engine, func, inspect
+
+from flask import Flask, jsonify, render_template, request, session
+
+import pandas as pd
+
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.neighbors import NearestNeighbors
+import numpy as np
+import random
+from joblib import Parallel, delayed
+
+engine = create_engine("sqlite:///movie_ratings_db.sqlite")
+
+conn = sqlite3.connect('movie_ratings_db.sqlite')
+
+# Query a subset of the data to speed up development/testing
+# Adjust the LIMIT clause based on your dataset size
+query = """
+    SELECT r.userId, r.movieId, r.rating, m.title, m.genres
+    FROM ratings r
+    JOIN movies m ON r.movieId = m.movieId
+    LIMIT 10000  -- Adjust this limit based on your dataset size
+"""
+df = pd.read_sql_query(query, conn)
+
+# Assuming the genres column is in the format "Genre1|Genre2|Genre3"
+# Convert genres into a list
+df['genres'] = df['genres'].str.split('|')
+
+# Content-Based Filtering (using Genres)
+df['genres_str'] = df['genres'].apply(lambda x: ' '.join(x))
+tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)  # Adjust max_features
+tfidf_matrix = tfidf_vectorizer.fit_transform(df['genres_str'])
+
+# Approximate Nearest Neighbors with NearestNeighbors
+nn = NearestNeighbors(n_neighbors=5, algorithm='auto', metric='cosine')
+nn.fit(tfidf_matrix)
+
+app = Flask(__name__)
+app.secret_key = os.urandom(24)
+
+@app.route("/")
+def welcome():
+    return render_template("index2.html")
+
+@app.route("/genre")
+def genre():
+    return render_template("reregenre.html")
+
+@app.route("/title")
+def title():
+    return render_template("reretitle.html")
+
+# Counter to keep track of the number of movie selections
+selection_counter = 0
+
+@app.route("/recommendation_engine", methods=['GET', 'POST'])
+def rec_engine():
+    global result
+    global selection_counter
+
+    if 'like_counter' not in session:
+        session['like_counter'] = 0
+
+    if request.method == 'POST':
+        action = request.form.get('action')
+        if action in ['like', 'dislike', 'skip', 'unsure']:
+            # Handle the user's action (like, dislike, skip, unsure)
+            # Update user preferences or store data accordingly
+
+            if action == 'like':
+                session['like_counter'] += 1
+
+            # Increment the selection counter
+            selection_counter += 1
+
+            # If the user has liked 5 movies, generate recommendations
+            if session['like_counter'] == 5:
+                # Call the function to start the recommendation process
+                redraw_recommendations()
+
+                # Reset the like counter
+                session['like_counter'] = 0
+
+    if request.method == 'POST':
+        action = request.form.get('action')
+        if action in ['like', 'dislike', 'skip', 'unsure']:
+            # Handle the user's action (like, dislike, skip, unsure)
+            # Update user preferences or store data accordingly
+            pass
+
+    if request.method == 'POST':
+        selected_movies = request.form.getlist('selected_movies')
+
+    # Collaborative Filtering (User-Item Interactions)
+    user_movie_ratings = df.pivot_table(index='userId', columns='title', values='rating', fill_value=0)
+    movie_user_ratings = user_movie_ratings.T
+    movie_similarity = cosine_similarity(movie_user_ratings)
+
+    # Function to get similar items based on NearestNeighbors
+    def get_similar_items_nn(movie_index):
+        distances, indices = nn.kneighbors(tfidf_matrix[movie_index])
+        similar_items = df.iloc[indices[0]]['title'].tolist()
+        return similar_items
+
+    # Function to get similar items based on Collaborative Filtering
+    def get_similar_items_cf(movie_title, top_n=5):
+        movie_ratings = user_movie_ratings[movie_title].values.reshape(1, -1)
+
+        # Calculate similarity scores using cosine_similarity
+        similar_scores = cosine_similarity(movie_ratings, movie_user_ratings)
+
+        # Extract the similarity scores for the given movie
+        similarity_scores_for_movie = similar_scores.flatten()
+
+        # Create a DataFrame with movie titles and similarity scores
+        similar_movies_df = pd.DataFrame({'movie': movie_user_ratings.index, 'similarity': similarity_scores_for_movie})
+
+        # Sort by similarity and get the top N
+        similar_movies_df = similar_movies_df.sort_values(by='similarity', ascending=False).head(top_n)
+
+        return similar_movies_df['movie'].tolist()
+
+    # Function to get hybrid recommendations (combining CF and CB)
+    def get_hybrid_recommendations(selected_movies, top_n=5):
+        cf_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_cf)(movie_title, top_n=top_n) for movie_title in selected_movies)
+        cf_recommendations = [item for sublist in cf_recommendations for item in sublist]
+
+        cb_recommendations = Parallel(n_jobs=-1)(delayed(get_similar_items_nn)(df[df['title'] == movie_title].index[0]) for movie_title in selected_movies)
+        cb_recommendations = [item for sublist in cb_recommendations for item in sublist]
+
+        genre_filter = set().union(*(tuple(genre) for movie_title in selected_movies for genre in df[df['title'] == movie_title]['genres']))
+        cb_recommendations_filtered = [movie for movie in cb_recommendations if any(set(genre) & genre_filter for genre in df[df['title'] == movie]['genres'])]
+
+        hybrid_recommendations = list(set(cf_recommendations + cb_recommendations_filtered))[:top_n]
+
+        return hybrid_recommendations
+
+    # Function to allow the user to choose movies from a random list
+    def choose_movies():
+        random_movies = random.sample(df['title'].tolist(), 10)
+        return random_movies
+
+    # Function to allow the user to redraw recommendations
+    def redraw_recommendations():
+        while True:
+            selected_movies = choose_movies()
+            hybrid_recommendations = get_hybrid_recommendations(selected_movies, top_n=5)
+            global result
+            result = hybrid_recommendations.copy()
+            print(f'Hybrid recommendations based on user-selected movies:\n{hybrid_recommendations}')
+
+            redraw = input("Do you want to redraw recommendations? Enter 'yes' or 'no': ").lower()
+            if redraw != 'yes':
+                break
+
+        print(result)
+        print(type(hybrid_recommendations))
+        print(hybrid_recommendations)
+
+    # If the user has not liked 5 movies, continue choosing movies
+    if session['like_counter'] < 5:
+        random_movie = df['title'].sample().iloc[0]
+        return render_template("rec_engine_interactive.html", movie_title=random_movie, like_counter=session['like_counter'])
+
+    # If the user has liked 5 movies, display recommendations
+    return render_template("rec_engine_interactive.html", movie_title=result[0], like_counter=session['like_counter'])
+
+if __name__ == "__main__":
+    app.run(debug=True)
diff --git a/rec_engine.html b/rec_engine.html
new file mode 100644
index 0000000..5fef501
--- /dev/null
+++ b/rec_engine.html
@@ -0,0 +1,17 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Movie Recommendations</title>
+</head>
+<body>
+    <h1>Movie Recommendations</h1>
+    <ul>
+        {% for movie in recs %}
+            <li>{{ movie }}</li>
+        {% endfor %}
+    </ul>
+</body>
+</html>
diff --git a/rec_engine_interactive.html b/rec_engine_interactive.html
index 4e4e008..51e5e4b 100644
--- a/rec_engine_interactive.html
+++ b/rec_engine_interactive.html
@@ -9,10 +9,20 @@
 <body>
     <h1>Movie Recommendations</h1>
     <p>{{ movie_title }}</p>
+    <p>Likes: {{ like_counter }}</p>
     <form method="post" action="{{ url_for('rec_engine') }}">
-        <button type="submit" name="action" value="like">Like</button>
-        <button type="submit" name="action" value="dislike">Dislike</button>
-        <button type="submit" name="action" value="unsure">Unsure</button>
+        <input type="hidden" name="action" value="like">
+        <button type="submit">Like</button>
+    </form>
+
+    <form method="post" action="{{ url_for('rec_engine') }}">
+        <input type="hidden" name="action" value="dislike">
+        <button type="submit">Dislike</button>
+    </form>
+
+    <form method="post" action="{{ url_for('rec_engine') }}">
+        <input type="hidden" name="action" value="skip">
+        <button type="submit">Skip</button>
     </form>
 </body>
-</html>
\ No newline at end of file
+</html>