## TRIPLET LOSS CNN IMPLEMENTATION

In [1]:
from Model import triplet_model, embed_model
from utils import genre_count_dataset, img_from_ID, images_from_ids
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
import operator
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


### Loading data.csv
Contains
* Id
* Genre
* Song Name
* Spectrogram(path)

In [2]:
data = pd.read_csv("data.csv")

In [3]:
print("DATASET SHAPE",data.shape)
data.head()

DATASET SHAPE (3208, 4)


Unnamed: 0,Id,Genre,Song Name,Spectrogram
0,1,Classical,Beethoven-Symphony 9,Spectrograms/Classical/Beethoven-Symphony 9/Be...
1,2,Classical,Beethoven-Symphony 9,Spectrograms/Classical/Beethoven-Symphony 9/Be...
2,3,Classical,Beethoven-Symphony 9,Spectrograms/Classical/Beethoven-Symphony 9/Be...
3,4,Classical,Beethoven-Symphony 9,Spectrograms/Classical/Beethoven-Symphony 9/Be...
4,5,Classical,Beethoven-Symphony 9,Spectrograms/Classical/Beethoven-Symphony 9/Be...


In [4]:
print("DATASET GENRE COMPOSITION\n")
cl, h ,m ,r ,p ,co = genre_count_dataset(data, data.shape[0])

DATASET GENRE COMPOSITION

Classical 595
Hip-Hop 573
Metal 643
Rock 469
Pop 462
Country 466


### CALLING EMBEDDING MODEL
* embed_model gives 128 embeddings corresponding to a spectrogram image (128,1402,1)

In [5]:
submodel = embed_model()

In [6]:
submodel.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 1402, 1)      0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 134, 1408, 1)      0         
_________________________________________________________________
conv1 (Conv2D)               (None, 64, 701, 64)       3200      
_________________________________________________________________
activation_1 (Activation)    (None, 64, 701, 64)       0         
_________________________________________________________________
zero_padding2d_2 (ZeroPaddin (None, 66, 703, 64)       0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 351, 64)       0         
_________________________________________________________________
lambda_1 (Lambda)            (None, 32, 351, 64)       0         
__________

### CALL TRIPLET MODEL
* Gets you triplet model
* implements triplet loss
* metric accuracy for custom accuracy check

In [7]:
triplet_model = triplet_model()

  name='loss', output_shape=(1, ))
  name=name)
  model = Model(input=[anchor_input, positive_input, negative_input], output=loss)


In [8]:
triplet_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
anchor_input (InputLayer)       (None, 128, 1402, 1) 0                                            
__________________________________________________________________________________________________
positive_input (InputLayer)     (None, 128, 1402, 1) 0                                            
__________________________________________________________________________________________________
negative_input (InputLayer)     (None, 128, 1402, 1) 0                                            
__________________________________________________________________________________________________
model_2 (Model)                 (None, 128)          496976      anchor_input[0][0]               
                                                                 positive_input[0][0]             
          

### GENERATING TRIPLETS
Concept:
* Pick n (32) random anchor points (ids from database)
* Make a remaining list containing ids not included by anchors
* For each anchor find most optimal semi-hard positives and negatives.


Concept for chosing triplets:
* Choose a positive from same genre but farthest in distance (embedding distance np.linalg.norm) to the anchor.
* Choose a negative from differest genre but closest in distance to the anchor.

### GET IMAGES FROM ID (SPECTROGRAM PATH)

In [9]:
data = images_from_ids(data)

Images added to dataframe


In [10]:
data.Images[1].shape

(1, 128, 1402, 1)

In [11]:
# data.head()

In [12]:
# get_img_from_ID(data, 9)

In [13]:
anchor_batch = 32

In [14]:
def generate_triplets():  
            import time
            trip_ids = []
            triplets  = []
            
            # id's 
            anchors = random.sample(list(range(1,data.shape[0]+1)), anchor_batch)
            remaining = [i for i in range(1, data.shape[0]+1) if i not in anchors]
            start_time = time.time()
            
            for a in tqdm(anchors): 
                pos_embeds = []
                neg_embeds = []

                pos_dict = {}
                neg_dict = {}

                
                # got embeddings, genre of an id in anchor
                a_img = img_from_ID(data, a)
                a_embed = submodel.predict_on_batch(a_img)                
                a_genre = data.loc[a-1]['Genre'] 
                
                
                for rem in remaining:
                    # iterating over remaining and checking for its genre, embed
                    rem_img = img_from_ID(data,rem)
                    rem_embed = submodel.predict_on_batch(rem_img)
                    rem_genre = data.loc[rem-1]['Genre']


                    # id's genre embeddings
                    if (a_genre == rem_genre):
                                  
                        #dictionary of ids : distance
                        pos_dict[rem] = np.linalg.norm(rem_embed - a_embed)
                        

                    else:
                        neg_dict[rem] = np.linalg.norm(rem_embed - a_embed)
            
#             return neg_ids, pos_ids


                # indices of min distance neg and max distance pos
                pos_index = max(pos_dict.items(), key=operator.itemgetter(1))[0]
                neg_index = min(neg_dict.items(), key=operator.itemgetter(1))[0]
            
#                 print(pos_index)

                # add images corresponding to indices 
                anchor_img = img_from_ID(data, a)
                positive_img =img_from_ID(data, pos_index)
                negative_img = img_from_ID(data, neg_index)
                
                trip_ids.append([a, pos_index, neg_index])
                triplets.append([anchor_img, positive_img, negative_img])
           
            trip_ids = np.array(trip_ids)
            triplets = np.array(triplets)
            triplets = triplets.reshape(anchor_batch,3, 128, 1402,1)
            
            print("Execution Time: ", time.time()-start_time) 
            return trip_ids, triplets

In [15]:
# tripvec = np.vectorize(generate_triplets, otypes=[np.ndarray])
# treip_ids, trip = tripvec()

# # n,p = generate_triplets()

In [16]:
# trip = np.array(trip)
# treip_ids = np.array(treip_ids)

In [17]:
# treip_ids.shape

In [18]:
# trip.shape

In [19]:
# trip = trip.reshape(anchor_batch,3, 128, 1402,1)

#### Model fit and evaluate
* Model.fit - fits the triplets into the model using accurcay (custom metric) and triplet loss

* Model.evaluate - evaluates on the metrics the model was compiled in

In [20]:
epochs = 5
split = 2

In [None]:
for epoch in tqdm(range(epochs)):
    print('Epoch %s' % epoch)
    
    
    tripvec = np.vectorize(generate_triplets, otypes=[np.ndarray])
    trip_ids, trip = tripvec()
    
    anchors = trip[:,0]
    positives = trip[:,1]
    negatives = trip[:,2]
    
#     print(trip.shape)
    
    triplet_model.fit([anchors, positives, negatives], y = np.zeros(32), batch_size= 32, verbose = 1)
    
    if(epoch% split==0):
        triplet_model.evaluate([anchors, positives, negatives], y = np.zeros(32), verbose=1)
        


  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

Epoch 0



  0%|                                                                                           | 0/32 [00:00<?, ?it/s]
  3%|██▌                                                                                | 1/32 [00:21<11:15, 21.80s/it]
  6%|█████▏                                                                             | 2/32 [00:37<09:16, 18.55s/it]
  9%|███████▊                                                                           | 3/32 [00:52<08:23, 17.34s/it]
 12%|██████████▍                                                                        | 4/32 [01:06<07:48, 16.73s/it]
 16%|████████████▉                                                                      | 5/32 [01:21<07:21, 16.36s/it]

# RUN TILL HERE FIRST!

## CLUSTERING USING KNN  -- SUPERVISED (On Genre)

### DATA PREPARATION
* Copy data into new dataframe
* Get me embeddings of each song from embed_model (Embedding column)
* Save to csv (embed_data.csv)

In [None]:
embed_data = data.copy(deep= True)
embeddings = []

for i in range(data.shape[0]):
    curr_id = data.Id [i] 
    test_point = img_from_ID(data,curr_id)
    curr_embedding = submodel.predict_on_batch(test_point)
    curr_embedding = np.ndarray.tolist(curr_embedding)
    embeddings.append(np.array(curr_embedding[0]))
#     embeddings =  np.array(embeddings)
    
    
embed_data['Embeddings'] =embeddings
embed_data.Embeddings[0].shape
# np.savetxt('test.csv', embed_data) 
embed_data.to_csv('embed_dataa.csv', na_rep = None )

In [None]:
embeddings = np.array(embeddings)

In [None]:
embeddings[0].dtype

In [None]:
embed_data.Embeddings[0].dtype

### USER INPUT CASE (Imagining its not in database rn)
CONTEXT-
User inputs a value or new song

FOR NOW-
Picking random point from database

In [None]:
curr_id = data.Id [0] 
test_point = img_from_ID(data,curr_id)

In [None]:
test_point.shape

#### Get me embeddings!

In [None]:
e = submodel.predict_on_batch(test_point)

### TESTING KNN APPROACH: SHOW ME WHAT YOU'VE GOT KNN?

In [None]:
edata = pd.read_csv("embed_dataa.csv")
X = embed_data.drop(['Id','Genre',' Spectrogram','Song Name'],axis=1)
y = embed_data['Genre']

In [None]:
X.Embeddings[9].shape

In [None]:
X.head()

In [None]:
X.Embeddings[0].dtype

In [None]:
X.dtypes

In [None]:
X = X.values.T.tolist()

In [None]:
X = np.array(X)
X[0].shape

In [None]:
y[0]

# ERROR - Cant convert string to float_32
* Embeddings (python list) saving as string in csv
* Cant fit string in KNN!!

In [None]:
# X.Embeddings = X.Embeddings.convert_object(convert_numeric=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X[0], y, test_size=0.20, random_state=42)

In [None]:
X[0][0].shape

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
knn = KNeighborsClassifier(n_neighbors=8)


In [None]:
# for i in range(embed_data.shape[0]):
#     print(embed_data.Embeddings[i].shape)

In [None]:
### X_train.shape
knn.fit(np.array(X_train), np.array(y_train))
pred = knn.predict(np.array(X_test))
print (accuracy_score(y_test, pred))

#### OUTPUT
* Gets me class of genre for test data point (pred)
* Now I know the accuracy too.

### KNN NEXT STEP 
#### AIM - Suggest me top k (5) similar songs please!

* I've predicted the class of genre my input song belongs to. Now I need to pick top 5 Song Name from this Genre with lease distant spectrogram embeddings.

In [None]:
## new random point 
## call predict on it
pred = knn.predict([X_test[0]])
## class of genre
# print(pred)

In [None]:
pred[0]

In [None]:
# Suggestions of top 5 songs
curr_embed = (X_test[0])
predicted_Genre = pred
dist = []
dict = {}
for i in range(edata.shape[0]):
    if edata.Genre[i] == predicted_Genre:
        dist = np.linalg.norm(np.array(edata.Embeddings[i]),curr_embed)
#         dict.append([i])
        dist = np.sort(dist, axis = None)
        
print(dist[5:])
        
        
        
    

## CLUSTERING USING K Means  -- UNSUPERVISED 

In [None]:
from sklearn.cluster import KMeans

In [None]:
kmeans = KMeans(n_clusters=6 , random_state=32).fit(X_train)

In [None]:
kmeans.labels_

In [None]:
kmeans.predict(X_test)
# kmeans.cluster_centers_

In [None]:
kmeans.cluster_centers_

In [None]:
import pylab as pl
for i in range(X.shape[0]):
    if kmeans.labels_[i] == 0:
        c1 = pl.scatter(X,y,c='r',  marker='+')
    
# elif kmeans.labels_[i] == 0:
#     c2 = pl.scatter(pca_2d[i,0],pca_2d[i,1],c='g',
#    marker='o')
# elif kmeans.labels_[i] == 2:
#     c3 = pl.scatter(pca_2d[i,0],pca_2d[i,1],c='b',
#     marker='*')
# pl.legend([c1, c2, c3],['Cluster 1', 'Cluster 0',
#     'Cluster 2'])

pl.show()