# Visual RecSys for Streaming Platforms

Visual similarity recommendation refers to the process of suggesting items or content based on their visual similarity to a reference item. This type of recommendation system is commonly used in various domains, such as e-commerce, image search engines, and content recommendation platforms.

Methodology:-
1. Data Collection
2. Feature Extraction
3. Similarity Calculation
4. Ranking and Recommendation

## 1.Problem
To recommend movie posters from the dataset of movies posters given an image of the movie poster.

## 2.Data Collection
We are using the dataset taken from the Kaggle
https://www.kaggle.com/datasets/akshaypawar7/millions-of-movies

We have used the refined movie dataset `movies.csv`
 shared along with this code.

### Loading the data from the movie excel 

In [None]:
# loading the data from the movie excel 
import pandas as pd
from smart_open import open
from azure.storage.blob import BlobServiceClient

connect_str = 'DefaultEndpointsProtocol=https;AccountName=visrecstorage;AccountKey=q3Wvmg9bF4oPqZYdXV6PJ2+XPDfD3z4FckngdyHGMyCGE5zHMgqKPVNVk3AxGdjERc28EHGBVEE2+AStDSPpVw==;EndpointSuffix=core.windows.net'
transport_params = {
    'client': BlobServiceClient.from_connection_string(connect_str),
}

csvfile = open("azure://visrec/movies.csv", 'rb', transport_params=transport_params)
moviedata = pd.read_csv(csvfile, delimiter = ',')
moviedata.head()

In [None]:
import snowflake.connector

# Gets the version
ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws')
cs = ctx.cursor()
try:
    cs.execute("USE WAREHOUSE COMPUTE_WH")
    cs.execute("DROP DATABASE IF EXISTS MOVIES_DB")
    cs.execute("CREATE DATABASE MOVIES_DB")
    cs.execute("DROP SCHEMA IF EXISTS MOVIES_TABLES")
    cs.execute("CREATE SCHEMA MOVIES_TABLES")
finally:
    cs.close()
ctx.close()

In [3]:
from snowflake.snowpark.session import Session
import snowflake.snowpark.functions as F
import snowflake.snowpark.types as T

connection_parameters = {
    "account": 'hb81971.ca-central-1.aws',
    "user": 'SARAWANPERNETI193',
    "password": '@Temp2023',
    "role": "ACCOUNTADMIN",
    "warehouse": "COMPUTE_WH",
    "database": "MOVIES_DB",
    "schema": "MOVIES_TABLES"
    
}

session = Session.builder.configs(connection_parameters).create()

### Loading the data to the database into the movie table 

In [None]:
#Loading the data to the database into the movie table 
try:
    ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws',role="ACCOUNTADMIN",warehouse="COMPUTE_WH",database="MOVIES_DB",schema="MOVIES_TABLES")
    cursor = ctx.cursor()
    cursor.execute('DROP TABLE IF EXISTS MOVIE_DATA;')
    print('Creating table....')
    # in the below line please pass the create table statement which you want #to create
    cursor.execute("CREATE TABLE MOVIE_DATA(movie_id int primary key,movie_title TEXT, genres TEXT, original_language varchar(255),overview TEXT,production TEXT,release_date varchar(255),runtime varchar(255),voter_rating varchar(255),voters_count varchar(255),credits TEXT,keywords TEXT,Poster_path varchar(255))")
    print("Table is created....")
    #loop through the data frame
    for i,row in moviedata.iterrows():
        #here %S means string values 
        sql = "INSERT INTO MOVIE_DATA VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
        cursor.execute(sql, list(row))
        print("Record inserted:-" + row[1])
    cursor.execute("ALTER TABLE movie_data add poster_image binary;")
    print("Table is altered....")
finally:
    cursor.close()
ctx.close()

### Downloading and inserting the poster images to the movie table in the movie database created

The below code can be tweak by setting the limits to the movie id column such that we can insert the desired number of movie poster images required. 

In [None]:
import requests
import shutil
from smart_open import open
from azure.storage.blob import BlobServiceClient

connect_str = 'DefaultEndpointsProtocol=https;AccountName=visrecstorage;AccountKey=q3Wvmg9bF4oPqZYdXV6PJ2+XPDfD3z4FckngdyHGMyCGE5zHMgqKPVNVk3AxGdjERc28EHGBVEE2+AStDSPpVw==;EndpointSuffix=core.windows.net'
transport_params = {
    'client': BlobServiceClient.from_connection_string(connect_str),
}


def convert_data(file_name):
    with open(file_name, 'rb', transport_params=transport_params) as file:
        binary_data = file.read()
    return binary_data

try:
    ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws',role="ACCOUNTADMIN",warehouse="COMPUTE_WH",database="MOVIES_DB",schema="MOVIES_TABLES")
    cursor = ctx.cursor()
    cursor.execute("SELECT * FROM MOVIE_DATA where movie_id > 0 and  movie_id < 100000")
    myresult = cursor.fetchall()
    print(len(myresult))
    for x in myresult:
        filename = "azure://posters/"+str(x[0])+".jpg"
        # Open the url image, set stream to True, this will return the stream content.
        print(str(x[0])+":- "+x[1] + ":-"+ x[12])
        r = requests.get(x[12], stream = True)
        # Check if the image was retrieved successfully
        if r.status_code == 200:
            # Set decode_content value to True, otherwise the downloaded image file's size will be zero.
            r.raw.decode_content = True
            # Open a local file with wb ( write binary ) permission.
            with open(filename,'wb', transport_params=transport_params) as f:
                shutil.copyfileobj(r.raw, f)
            imagedata=convert_data(filename)
            sql = "Update movie_data set poster_image = %s where movie_id = %s"
            cursor.execute(sql, (imagedata, x[0]))
            print(str(x[0])+":- "+x[1] + " :- image inserted")
finally:
    cursor.close()
ctx.close()

## 3.Feature Extraction
Here we are using the `ResNet50` model and `ImageNet` weights for the feature extraction through transfer learning.

### `ResNet50` 
ResNet-50 is a convolutional neural network (CNN) model, that has a deep architecture consisting of 50 layers, including convolutional layers, pooling layers, fully connected layers, and shortcut connections known as skip connections.

The input to ResNet-50 is typically a 224x224 RGB image, and the output is a vector of probabilities representing the predicted probabilities of different classes. The model is trained using a large dataset, such as ImageNet, where it learns to classify images into one of the 1,000 predefined classes. ResNet-50 has also been used as a starting point for transfer learning, where the pre-trained model is fine-tuned on a specific task using a smaller dataset

###`ImageNet`
The ImageNet weights refer to the pre-trained weights of a neural network model, specifically trained on the ImageNet dataset. The ImageNet dataset is a large-scale dataset containing millions of labeled images belonging to thousands of different categories.


### Genres list

There are approximation of 19 genres associated in the movie data base 
The following are the list of the genres 

1.Action, 
2.Adventure,
3.Animation,
4.Comedy,
5.Crime,
6.Documentary,
7.Drama,
8.Family,
9.Fantasy,
10.History,
11.Horror,
12.Music,
13.Mystery,
14.Romance,
15.Science Fiction,
16.Thriller,
17.TV Movie,
18.War,
19.Western,

### Genre wise feature extraction from the poster images


In [None]:
import tensorflow_hub as hub
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
import numpy as np
from numpy.linalg import norm
import pickle
import os
import time
from tqdm import tqdm
from PIL import Image

from smart_open import open


from azure.storage.blob import BlobServiceClient

connect_str = 'DefaultEndpointsProtocol=https;AccountName=visrecstorage;AccountKey=q3Wvmg9bF4oPqZYdXV6PJ2+XPDfD3z4FckngdyHGMyCGE5zHMgqKPVNVk3AxGdjERc28EHGBVEE2+AStDSPpVw==;EndpointSuffix=core.windows.net'
transport_params = {
    'client': BlobServiceClient.from_connection_string(connect_str),
}

# Create a function to write the image file in local machine 
def write_file(data, filename):
    with open(filename, 'wb', transport_params=transport_params) as f:
        f.write(data)

# Create a function to extract the feature of the image using model
def extract_features(img_path,model):
    #img = image.load_img(img_path,target_size=(224,224))
    img1 = open(img_path, 'rb', transport_params=transport_params)
    img = Image.open(img1).resize((224,224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)

    return normalized_result

#Create a ResNet Model
model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

genres=["Action", "Adventure", "Animation", "Comedy", "Crime", "Documentary", "Drama", "Family", "Fantasy", "History", "Horror", "Music", "Mystery", "Romance", "Science Fiction", "Thriller", "TV Movie", "War", "Western"]
try:
    ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws',role="ACCOUNTADMIN",warehouse="COMPUTE_WH",database="MOVIES_DB",schema="MOVIES_TABLES")
    cursor = ctx.cursor()
    for genre in genres:
        feature_list =[]
        filenumber=[]
        sql="SELECT movie_id,movie_title,poster_image FROM MOVIE_DATA where movie_id > 0 and movie_id < 50000 and poster_image Is Not Null and genres like (%s)"
        cursor.execute(sql,("%"+genre+"%",))
        #cursor.execute("SELECT movie_id FROM movies.movie_data where movie_id < 10000 && poster_image Is Not Null")
        myresult = cursor.fetchall()
        for x in tqdm(myresult):
            filenumber.append(x[0])
            #Provide the poster image path for the feature extraction:-
            poster_image_path="azure://posters/"+str(x[0])+".jpg"
            write_file(x[2], poster_image_path)
            feature_list.append(extract_features(poster_image_path,model))
        print(np.array(feature_list).shape)
        # provide the path for the feature extraction file 
        feature_extraction_file='azure://extraction/'+genre+'_imageFeaturesEmbeddings.pkl'
        pickle.dump(feature_list,open(feature_extraction_file,'wb', transport_params=transport_params))
        time.sleep(30)
        print(len(filenumber))
        # provide the path for the feature extraction file 
        feature_filenumber_file='azure://extraction/'+genre+'_imageFeaturesFileNumber.pkl'
        pickle.dump(filenumber,open(feature_filenumber_file,'wb', transport_params=transport_params))
        print("file has been loaded")
        time.sleep(30)
finally:
    cursor.close()
ctx.close()
print("program has terminated")

In [None]:
session.sql('CREATE OR REPLACE STAGE MRS_MODEL').show()

In [None]:
session.sql('LIST @ML_MODELS').show()

In [4]:
def mrs(session: Session) -> T.Variant:
    #import tensorflow_hub as hub
    import tensorflow
    from tensorflow.keras.preprocessing import image
    from tensorflow.keras.layers import GlobalMaxPooling2D
    from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
    import numpy as np
    from numpy.linalg import norm
    import pickle
    import os
    import time
    #from tqdm import tqdm
    from PIL import Image

    from smart_open import open


    from azure.storage.blob import BlobServiceClient

    connect_str = 'DefaultEndpointsProtocol=https;AccountName=visrecstorage;AccountKey=q3Wvmg9bF4oPqZYdXV6PJ2+XPDfD3z4FckngdyHGMyCGE5zHMgqKPVNVk3AxGdjERc28EHGBVEE2+AStDSPpVw==;EndpointSuffix=core.windows.net'
    transport_params = {
        'client': BlobServiceClient.from_connection_string(connect_str),
    }

    # Create a function to write the image file in local machine 
    def write_file(data, filename):
        with open(filename, 'wb', transport_params=transport_params) as f:
            f.write(data)

    # Create a function to extract the feature of the image using model
    def extract_features(img_path,model):
        #img = image.load_img(img_path,target_size=(224,224))
        img1 = open(img_path, 'rb', transport_params=transport_params)
        img = Image.open(img1).resize((224,224))
        img_array = image.img_to_array(img)
        expanded_img_array = np.expand_dims(img_array, axis=0)
        preprocessed_img = preprocess_input(expanded_img_array)
        result = model.predict(preprocessed_img).flatten()
        normalized_result = result / norm(result)

        return normalized_result

    #Create a ResNet Model
    model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
    model.trainable = False

    model = tensorflow.keras.Sequential([
        model,
        GlobalMaxPooling2D()
    ])

    genres=["Action", "Adventure", "Animation", "Comedy", "Crime", "Documentary", "Drama", "Family", "Fantasy", "History", "Horror", "Music", "Mystery", "Romance", "Science Fiction", "Thriller", "TV Movie", "War", "Western"]
    try:
        ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws',role="ACCOUNTADMIN",warehouse="COMPUTE_WH",database="MOVIES_DB",schema="MOVIES_TABLES")
        cursor = ctx.cursor()
        for genre in genres:
            feature_list =[]
            filenumber=[]
            sql="SELECT movie_id,movie_title,poster_image FROM MOVIE_DATA where movie_id > 0 and movie_id < 50000 and poster_image Is Not Null and genres like (%s)"
            cursor.execute(sql,("%"+genre+"%",))
            #cursor.execute("SELECT movie_id FROM movies.movie_data where movie_id < 10000 && poster_image Is Not Null")
            myresult = cursor.fetchall()
            for x in myresult:
                filenumber.append(x[0])
                #Provide the poster image path for the feature extraction:-
                poster_image_path="azure://posters/"+str(x[0])+".jpg"
                write_file(x[2], poster_image_path)
                feature_list.append(extract_features(poster_image_path,model))
            print(np.array(feature_list).shape)
            # provide the path for the feature extraction file 
            feature_extraction_file='azure://extraction/'+genre+'_imageFeaturesEmbeddings.pkl'
            pickle.dump(feature_list,open(feature_extraction_file,'wb', transport_params=transport_params))
            time.sleep(30)
            print(len(filenumber))
            # provide the path for the feature extraction file 
            feature_filenumber_file='azure://extraction/'+genre+'_imageFeaturesFileNumber.pkl'
            pickle.dump(filenumber,open(feature_filenumber_file,'wb', transport_params=transport_params))
            print("file has been loaded")
            time.sleep(30)
    finally:
        cursor.close()
    ctx.close()
    #print("program has terminated")
    result = "program has terminated"
    return result

In [18]:
sproc_train_dt_model = session.sproc.register(
                    func=mrs, 
                    name='sproc_mrs', 
                    is_permanent=True, 
                    replace=True, 
                    stage_location='@MRS_MODEL', 
                    packages=[
                        'snowflake-snowpark-python',
                        'scikit-learn',
                        'joblib',
                        'tensorflow',
                        'keras-applications',
                        'pillow',
                        'smart_open',
                        'azure-storage-blob']
)

package tensorflow-datasets is not installed in the local environmentYour UDF might not work when the package is installed on the server but not on your local environment.
package keras-applications is not installed in the local environmentYour UDF might not work when the package is installed on the server but not on your local environment.


In [12]:
session.sql('show procedures').show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"created_on"                      |"name"                            |"schema_name"  |"is_builtin"  |"is_aggregate"  |"is_ansi"  |"min_num_arguments"  |"max_num_arguments"  |"arguments"                                         |"description"           |"catalog_name"  |"is_table_function"  |"valid_for_clustering"  |"is_secure"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|201

In [19]:
model_response = sproc_train_dt_model(session=session)

Failed to execute query [queryID: 01ad4898-3200-cc62-0003-5d520002a10e] CALL sproc_mrs()
100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 1354, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1256, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1302, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1251, in endheaders
    self._send_output(message_body, encode_chunke

SnowparkSQLException: (1304): 100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 1354, in do_open
    h.request(req.get_method(), req.selector, req.data, headers,
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1256, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1302, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1251, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1011, in _send_output
    self.send(msg)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 951, in send
    self.connect()
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 1418, in connect
    super().connect()
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/http/client.py", line 922, in connect
    self.sock = self._create_connection(
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/socket.py", line 787, in create_connection
    for res in getaddrinfo(host, port, 0, SOCK_STREAM):
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/socket.py", line 918, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -3] Temporary failure in name resolution

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/site-packages/keras/utils/data_utils.py", line 300, in get_file
    urlretrieve(origin, fpath, DLProgbar())
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/site-packages/keras/utils/data_utils.py", line 84, in urlretrieve
    response = urlopen(url, data)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 222, in urlopen
    return opener.open(url, data, timeout)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 525, in open
    response = self._open(req, data)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 542, in _open
    result = self._call_chain(self.handle_open, protocol, protocol +
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 1397, in https_open
    return self.do_open(http.client.HTTPSConnection, req,
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/urllib/request.py", line 1357, in do_open
    raise URLError(err)
urllib.error.URLError: <urlopen error [Errno -3] Temporary failure in name resolution>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "_udf_code.py", line 7, in compute
  File "C:\Users\ds.perneti\AppData\Local\Temp\ipykernel_15008\1508263901.py", line 44, in mrs
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/site-packages/keras/applications/resnet.py", line 521, in ResNet50
    return ResNet(
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/site-packages/keras/applications/resnet.py", line 232, in ResNet
    weights_path = data_utils.get_file(
  File "/usr/lib/python_udf/8ecf914fb80eb1bda2e5044e222a41ad44603ddefe8937cd35de3d2eb0c20334/lib/python3.8/site-packages/keras/utils/data_utils.py", line 304, in get_file
    raise Exception(error_msg.format(origin, e.errno, e.reason))
Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5: None -- [Errno -3] Temporary failure in name resolution
 in function SPROC_MRS with handler compute

Note:- The above code is a once time execution

##4.Similarity calculation:
The 'brute' algorithm has been used to determine the nearest neighbor images for the reference, using the Euclidean distance metric to measure visual similarity. 




###`Euclidean distance`

also known as Euclidean metric, is a measure of the straight-line distance between two points in Euclidean space.

###`Brute-Force Algorithm`

also known as an exhaustive search algorithm, is a straightforward approach to problem-solving that systematically tries every possible solution

## 5.Ranking & recommendation:
The best 5 visually similar images from each following genre of the reference film are suggested using the scores from the Euclidean distance metric computation.

# Movie Recommendation Front end application

The front end application has been developed using the Streamlit platform

To view the Streamlit app on a browser, run it with the following
  command:

    streamlit run pythonfile

Example:-

    streamlit run c:\users\vivek.kakumanu\desktop\python_learnings\python_script\project\poster_recomendation_system\movie_recomendation_based_on_genres.py


In [None]:
import streamlit as st
from PIL import Image
import numpy as np
import pickle
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
from sklearn.neighbors import NearestNeighbors
from numpy.linalg import norm
import snowflake.connector
import random

from smart_open import open


from azure.storage.blob import BlobServiceClient

connect_str = 'DefaultEndpointsProtocol=https;AccountName=visrecstorage;AccountKey=q3Wvmg9bF4oPqZYdXV6PJ2+XPDfD3z4FckngdyHGMyCGE5zHMgqKPVNVk3AxGdjERc28EHGBVEE2+AStDSPpVw==;EndpointSuffix=core.windows.net'
transport_params = {
    'client': BlobServiceClient.from_connection_string(connect_str),
}

st.title('Movie Poster Recommender System')


#Create a file method 
def file_name(uploaded_file):
    return "azure://uploads/"+ str(uploaded_file)+".jpg"


#Create a save file method 
def save_uploaded_file(data, uploaded_file):
    try:
        with open(file_name(uploaded_file),'wb', transport_params=transport_params) as f:
            f.write(data)
        return 1
    except:
        return 0

# Create a function to extract the feature of the image using model
def feature_extraction(img_path,model):
    img1 = open(img_path, 'rb', transport_params=transport_params)
    img = Image.open(img1).resize((224,224))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    result = model.predict(preprocessed_img).flatten()
    normalized_result = result / norm(result)

    return normalized_result

# Create a function to recommend the images based on the features extract by the model.
def recommend(features,genre):
  # provide the path for the feature extraction file 
    feature_extraction_file='azure://extraction/'+genre+'_imageFeaturesEmbeddings.pkl'        
    feature_list = np.array(pickle.load(open(feature_extraction_file,'rb', transport_params=transport_params)))
    neighbors = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='euclidean')
    neighbors.fit(feature_list)

    distances, indices = neighbors.kneighbors([features])

    return indices

model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])


def predict_movies(movie_id):
    # feature extract
        features = feature_extraction(file_name(movie_id),model)
        sql = "SELECT movie_title, genres FROM MOVIE_DATA where movie_id = "+str(movie_id)
        print(sql)
        cursor.execute(sql)
        txt=cursor.fetchall()
        print(txt[0][1])
        genres = txt[0][1].split('-')
        st.write(txt[0][0])
        st.write(txt[0][1])
        for genre in genres:
            # recommendention
            indices = recommend(features,genre)
            where_in = ','.join(['%s'] * len(indices[0]))
            # provide the path for the feature extraction file 
            feature_filenumber_file='azure://extraction/'+genre+'_imageFeaturesFileNumber.pkl'
            filenames1 = pickle.load(open(feature_filenumber_file,'rb', transport_params=transport_params))
            sql = "SELECT movie_id,movie_title,poster_image FROM movie_data where movie_id in (%s)" % (where_in)
            sql = sql+ "and genres like (%s) and movie_id not in (%s)"
            indices_list=[]
            length = len(indices[0])
            for i in range(0,length):
                indices_list.append(filenames1[indices[0][i]])
            tuple_list = tuple(indices_list) + ("%"+genre+"%", movie_id,)
            cursor.execute(sql,tuple_list)
            recomended_results = cursor.fetchall()
            recomended_result=[]
            for i in range (0,len(recomended_results)):
                for j in range(length):
                    if indices_list[j]==recomended_results[i][0]:
                        recomended_result.append(recomended_results[i])
                if len(recomended_result)==6:
                    break
            st.header(genre)
            col1,col2,col3,col4,col5 = st.columns(5)
            
            with col1:
                if save_uploaded_file(recomended_result[0][2],recomended_result[0][0]):
                # display the file
                    display_image0 = open(file_name(recomended_result[0][0]), 'rb', transport_params=transport_params)
                    display_image = Image.open(display_image0)
                    st.image(display_image)
                    st.write(recomended_result[0][1])
            with col2:
                if save_uploaded_file(recomended_result[1][2],recomended_result[1][0]):
                # display the file
                    display_image1 = open(file_name(recomended_result[1][0]), 'rb', transport_params=transport_params)
                    display_image = Image.open(display_image1)
                    st.image(display_image)
                    st.write(recomended_result[1][1])
            with col3:
                if save_uploaded_file(recomended_result[2][2],recomended_result[2][0]):
                # display the file
                    display_image2 = open(file_name(recomended_result[2][0]), 'rb', transport_params=transport_params)
                    display_image = Image.open(display_image2)
                    st.image(display_image)
                    st.write(recomended_result[2][1])
            with col4:
                if save_uploaded_file(recomended_result[3][2],recomended_result[3][0]):
                # display the file
                    display_image3 = open(file_name(recomended_result[3][0]), 'rb', transport_params=transport_params)
                    display_image = Image.open(display_image3)
                    st.image(display_image)
                    st.write(recomended_result[3][1]) 
            with col5:
                if save_uploaded_file(recomended_result[4][2],recomended_result[4][0]):
                # display the file
                    display_image4 = open(file_name(recomended_result[4][0]), 'rb', transport_params=transport_params)
                    display_image = Image.open(display_image4)
                    st.image(display_image)
                    st.write(recomended_result[4][1])

if "refreshclick" not in st.session_state:
    st.session_state.refreshclick=False
    if "movie_id" not in st.session_state:
        randomlist=[]
        for i in range(0,30):
            n = random.randint(1,122)
            randomlist.append(n)
        st.session_state.movie_id=randomlist

        
try:
    ctx = snowflake.connector.connect(user='SARAWANPERNETI193',password='@Temp2023',account='hb81971.ca-central-1.aws',role="ACCOUNTADMIN",warehouse="COMPUTE_WH",database="MOVIES_DB",schema="MOVIES_TABLES")
    cursor = ctx.cursor()
    print ("randon number",tuple(st.session_state.movie_id))
    where_in = ','.join(['%s'] * len(st.session_state['movie_id']))
    sql = "SELECT movie_id,movie_title,poster_image FROM movie_data where movie_id in (%s) " % (where_in)
    sql = sql+ "and genres not like (%s) and genres not like (%s) and genres not like (%s)"
    print(sql)
    tuple_list = tuple(st.session_state.movie_id) + ("%TV Movie%","%Romance%","%Drama%")
    print(len(tuple_list))
    cursor.execute(sql, tuple_list)
    myresult = cursor.fetchall()
    col1,col2,col3,col4,col5 = st.columns(5)
    st.session_state.refreshclick = False
    if(len(myresult)!=0):
        with col1:
            if save_uploaded_file(myresult[0][2],myresult[0][0]):
            # display the file
                display_image0 = open(file_name(myresult[0][0]), 'rb', transport_params=transport_params)
                display_image = Image.open(display_image0)
                clicked_0 = st.button(myresult[0][1] , st.image(display_image))
        with col2:
            if save_uploaded_file(myresult[1][2],myresult[1][0]):
            # display the file
                display_image1 = open(file_name(myresult[1][0]), 'rb', transport_params=transport_params)
                display_image = Image.open(display_image1)
                clicked_1 = st.button(myresult[1][1] , st.image(display_image))

        with col3:
            if save_uploaded_file(myresult[2][2],myresult[2][0]):
            # display the file
                display_image2 = open(file_name(myresult[2][0]), 'rb', transport_params=transport_params)
                display_image = Image.open(display_image2)
                clicked_2 = st.button(myresult[2][1] , st.image(display_image))
        with col4:
            if save_uploaded_file(myresult[3][2],myresult[3][0]):
            # display the file
                display_image3 = open(file_name(myresult[3][0]), 'rb', transport_params=transport_params)
                display_image = Image.open(display_image3)
                clicked_3 = st.button(myresult[3][1] , st.image(display_image))

        with col5:
            if save_uploaded_file(myresult[4][2],myresult[4][0]):
            # display the file
                display_image4 = open(file_name(myresult[4][0]), 'rb', transport_params=transport_params)
                display_image = Image.open(display_image4)
                clicked_4 = st.button(myresult[4][1] , st.image(display_image))

        if(clicked_0):
            predict_movies(myresult[0][0])
        if(clicked_1):
            predict_movies(myresult[1][0])
        if(clicked_2):
            predict_movies(myresult[2][0])
        if(clicked_3):
            predict_movies(myresult[3][0])
        if(clicked_4):
            predict_movies(myresult[4][0])
finally:
    cursor.close()
ctx.close()

clicked = st.button("Refresh")

In [None]:
# !streamlit run mrs.py

In [None]:
!streamlit run mrssf.py

In [None]:
# pip install azure-storage-blob azure-identity

In [None]:
# pip install smart_open

In [None]:
# pip install tensorflow-hub

In [None]:
# pip install tensorflow

In [None]:
# pip install streamlit

In [None]:
pip list