In [None]:
!pip -q install "milvus[client]"
!pip -q install gradio

# Installations, Restart the runtime and before proceeding further


In [None]:
# Imports

from milvus import default_server
from pymilvus import connections, utility, CollectionSchema, FieldSchema, DataType, Collection
import io
import pickle
import gradio as gr
import numpy as np
import pandas as pd
from PIL import Image


In [None]:
# Necessary Shell Commands

# %%shell
# mkdir -p /content/milvus_data
# cp -r /content/drive/MyDrive/Recommender_project_2/milvus_data/* /content/milvus_data/
# chmod -R 777 /content/milvus_data


In [None]:
## Setting the base directory of Milvus for logs, data etc. Change the location to data location accordingly

default_server.set_base_dir('/content/drive/MyDrive/Recommender_project_2/milvus_data')

# default_server.set_base_dir('/content/milvus_data')


In [None]:
# Start your Milvus server

default_server.start()


In [None]:
# Connect to Milvus

connections.connect(
    
  alias = 'default',
  host = 'localhost',
  port = '19530'
    
)

# Collection creation

In [None]:
unq_id = FieldSchema(
  name = "unq_id",
  dtype = DataType.INT64,
  is_primary = True,
  auto_id = True
)

game_id = FieldSchema(
  name = "game_id",
  dtype = DataType.VARCHAR,
  max_length = 200,
)

image_sub_id = FieldSchema(
  name = "image_sub_id",
  dtype = DataType.VARCHAR,
  max_length = 200,
)

vector = FieldSchema(
  name = "vector",
  dtype = DataType.FLOAT_VECTOR,
  dim = 2048
)

schema = CollectionSchema(
  fields = [unq_id, game_id, image_sub_id, vector],
  description = "Recommendation project images search",
  enable_dynamic_field = True
)

collection_name = "rs_project2_img"

collection = Collection(
    name = collection_name,
    schema = schema,
    using = "default"
)


### Insert Image Vectors loaded from PKL file to collection

In [None]:
df = pd.read_pickle('/content/drive/MyDrive/Recommender_project_2/image_results/top_10k_results_vec.pkl')

df.head()


In [None]:
for i in range(0, df.shape[0], 1000):
    
    mr = collection.insert(df.iloc[i:i + 1000])
    
    print(mr)
    

In [None]:
# We have to flush the inserted data then only the data will actually inserted to collection

collection.flush()


### List collections and load vectors

In [None]:
print(utility.list_collections())


In [None]:
collection = Collection('rs_project2_img')


### Index creation

In [None]:
index_params = {
  "metric_type": "L2",
  "index_type": "IVF_FLAT",
  "params": {"nlist":20} 
}

collection.create_index(
  field_name = "vector",
  index_params = index_params
)


In [None]:
utility.index_building_progress("rs_project2_img")


### Search the image vectors

In [None]:
# Load the collection to memory

collection.load()


In [None]:
search_params = {"metric_type": "L2", "params": {"nprobe": 20}}


In [None]:
# Sample

vector_to_search = [df.iloc[0].vector]


In [None]:
# Searching for top 30 similar vectors

results = collection.search(
    
    data = vector_to_search,
    anns_field = "vector",
    param = search_params,
    limit = 30,
    expr = None,
    output_fields = ["image_sub_id", "game_id"],
    consistency_level = "Strong"
    
)


In [None]:
for val in results[0]:
    
    distance = val.distance
    
    image_sub_id = val.entity.get("image_sub_id")
    
    print(image_sub_id, " ", distance)
    

### Gradio

In [None]:
# Load PKL

with open("/content/drive/MyDrive/Recommender_project_2/image_results/all_results.pkl", "rb") as file:
    
    all_images = pickle.load(file)

ft_df = pd.read_pickle('/content/drive/MyDrive/Recommender_project_2/image_results/top_10k_results_vec.pkl')

ft_df.head()

In [None]:
collection.load()

search_params = {"metric_type": "L2", "params": {"nprobe": 20}}

# def milvus_results(vector_to_search):
    
#     results = collection.search(
        
#         data = vector_to_search,
#         anns_field = "vector",
#         param = search_params,
#         limit = 30,
#         expr = None,
#         output_fields = ["image_sub_id", "game_id"],
#         consistency_level = "Strong"
    
#     )
    
#     sim_imgs = []
    
#     for val in results[0]:
        
#         #distance = val.distance
        
#         image_sub_id = val.entity.get("image_sub_id")
        
#         print(image_sub_id, " ", distance)
        
#         image_sub_id_md = [int(i) for i in image_sub_id.split('_')]
        
#         sim_imgs.append(image_sub_id_md)
        
#     return sim_imgs

def milvus_results(vectors_to_search):
    
  '''
  The input is the array of image vectors in a particular game.
  It returns the image_id, sub_id and distances as array of arrays. Ex: [[730,0,230.00],[4320,1,4567]]
  '''

  final_res = []
    
    for each_vec in vectors_to_search:

        results = collection.search(
            
            data = [each_vec],
            anns_field = "vector",
            param = search_params,
            limit = 5, # Returns 5 top similar vectors. Change accordingly
            expr = None,
            output_fields = ["image_sub_id", "game_id"], # Return the necessary fields from the milvus database. No need game_id here
            consistency_level = "Strong"
            
        )
        
    #sim_imgs = []
    
    for i,val in enumerate(results[0]):
        
        if i != 0:
            
            distance = val.distance
            
            image_sub_id = val.entity.get("image_sub_id")
            
            # Splitting the image_id and sub_id Ex: 1234_1 => [123,1]
            
            image_id_subid_dis = [int(i) for i in image_sub_id.split('_')]
            
            image_id_subid_dis.append(distance)
            
            final_res.append(image_id_subid_dis)
            
    # Sorting the games based on the overall distances
    
    final_res.sort(key = lambda x: x[2])
    
    return final_res


In [None]:
# def get_inputs(game_id,img_sub_id):
    
#     game_id = int(game_id)
    
#     img_sub_id = int(img_sub_id)
    
#     res_vec_id = str(game_id) + '_' + str(img_sub_id)
    
#     res_vec = ft_df[ft_df['image_sub_id'] == res_vec_id].vector.values
    
#     milvus_imgs = milvus_results(res_vec)
    
#     # send res_vec to Milvus
    
#     print(game_id, img_sub_id)
    
#     input_img = all_images[game_id][img_sub_id]
    
#     input_image = Image.open(io.BytesIO(input_img))
    
#     similar_1 =  Image.open(io.BytesIO(all_images[milvus_imgs[1][0]][milvus_imgs[1][1]]))
#     similar_2 =  Image.open(io.BytesIO(all_images[milvus_imgs[2][0]][milvus_imgs[2][1]]))
#     similar_3 =  Image.open(io.BytesIO(all_images[milvus_imgs[3][0]][milvus_imgs[3][1]]))
#     similar_4 =  Image.open(io.BytesIO(all_images[milvus_imgs[4][0]][milvus_imgs[4][1]]))
#     similar_5 =  Image.open(io.BytesIO(all_images[milvus_imgs[5][0]][milvus_imgs[5][1]]))
    
#     return input_image,similar_1,similar_2,similar_3,similar_4,similar_5

#     # return game_id,img_sub_id

def get_inputs(game_id):
    
    all_vec = ft_df[ft_df['game_id'] == game_id].vector.values
    
    milvus_imgs = milvus_results(all_vec)[:5] ## Selecting only top 5
    
    # Send res_vec to Milvus
    
    distances = [[x[0],x[2]] for x in milvus_imgs]
    
    # Print the top game_ids and distances according to their distances
    
    print(distances)
    
    input_img = all_images[int(game_id)][0]
    
    input_image = Image.open(io.BytesIO(input_img))
    
    similar_1 =  Image.open(io.BytesIO(all_images[milvus_imgs[0][0]][milvus_imgs[0][1]]))
    similar_2 =  Image.open(io.BytesIO(all_images[milvus_imgs[1][0]][milvus_imgs[1][1]]))
    similar_3 =  Image.open(io.BytesIO(all_images[milvus_imgs[2][0]][milvus_imgs[2][1]]))
    similar_4 =  Image.open(io.BytesIO(all_images[milvus_imgs[3][0]][milvus_imgs[3][1]]))
    similar_5 =  Image.open(io.BytesIO(all_images[milvus_imgs[4][0]][milvus_imgs[4][1]]))
    
    return input_image, similar_1, similar_2, similar_3, similar_4, similar_5

    # return game_id,img_sub_id


In [None]:
# Sample

ft_df[ft_df['game_id'] == '2281410'].vector.values


In [None]:
list(all_images.keys())[:10]


In [None]:
demo = gr.Interface(
    
    fn = get_inputs,
    inputs = "text",
    outputs = [
        
        gr.Image(type = "pil", label = 'Input'),
        gr.Image(type = "pil", label = 'Output'),
        gr.Image(type = "pil", label = 'Output'),
        gr.Image(type = "pil", label = 'Output'),
        gr.Image(type = "pil", label = 'Output'),
        gr.Image(type = "pil", label = 'Output')
    
    ],
    
    # outputs = ['number','number']

)

demo.launch(debug = True)


### Generate Similar Recommendations

In [None]:
# Get all game_ids

all_game_ids = ft_df.game_id.unique()

# all_game_ids[:10]

collection.load()

search_params = {"metric_type": "L2", "params": {"nprobe": 20}}

def milvus_results(vectors_to_search):
    
  '''
  The input is the array of image vectors in a particular game.
  It returns the image_id, sub_id and distances as array of arrays. Ex: [[730,0,230.00],[4320,1,4567]]
  '''

    final_res = []
    
    for each_vec in vectors_to_search:

    results = collection.search(
        
        data = [each_vec],
        anns_field = "vector",
        param = search_params,
        limit = 5, # Returns 5 top similar vectors. Change accordingly
        expr = None,
        output_fields = ["image_sub_id", "game_id"], # Return the necessary fields from the milvus database. No need game_id here
        consistency_level = "Strong" 
    
    )
    
    #sim_imgs = []
    
    for i, val in enumerate(results[0]):
        
        if i != 0:
            
            distance = val.distance
            
            image_sub_id = val.entity.get("image_sub_id")
            
            # Splitting the image_id and sub_id Ex: 1234_1 => [123,1]
            
            image_id_subid_dis = [int(i) for i in image_sub_id.split('_')]
            
            image_id_subid_dis.append(distance)
            
            final_res.append(image_id_subid_dis)
            
    ## Sorting the games based on the overall distances
    
    final_res.sort(key = lambda x: x[2])
    
    return final_res

def search_milvus(game_id):
    
    print('working on ', game_id)
    
    all_vec = ft_df[ft_df["game_id"] == game_id].vector.values
    
    milvus_imgs = milvus_results(all_vec)[:5] # Selecting only top 5
    
    # send res_vec to Milvus
    
    distances = [[x[0],x[2]] for x in milvus_imgs]
    
    # Print the top game_ids and distances according to their distances
    
    # print(distances)
    
    return distances

final_results = {}

# for all game_ids search Milvus for all similar images

for i,each_game in enumerate(all_game_ids):
    
    if i % 100 == 0:

        print("Done ", i)

    recm = search_milvus(each_game)

    final_results[each_game] = recm

# Saving the recommendations to PKL

with open('/content/drive/MyDrive/Recommender_project_2/recommendations.pkl','wb') as handle:
    
    pickle.dump(final_results,handle, protocol=pickle.HIGHEST_PROTOCOL)


### Close the Milvus Server

In [None]:
demo.close()


In [None]:
collection.release()
connections.disconnect("default")


In [None]:
default_server.stop()


In [None]:
# Optional, if you want to cleanup previous data

# default_server.cleanup()
