In [None]:
import numpy as np
import pandas as pd
import psycopg2
import json

In [None]:
#get classification data from database
cs = "dbname=%s user=%s password=%s host=%s port=%s" % ("FilmColors_v2_Production","ERCAdmin","admin","localhost","5432")
conn = psycopg2.connect(cs)
cur = conn.cursor()

cur.execute("SELECT id, video_id, classification_object, analysis_class_name, uuid, dtype, shape, data FROM public.db_concert_analyses where analysis_class_name='ClassificationAnalysis';")
recs = cur.fetchall()

cur.close()
conn.close()

In [None]:
indexes =[]
for row in recs:
    indexes.append(row[1])
column_num = len(np.frombuffer(row[7], dtype=np.float64))

df = pd.DataFrame(columns = list(range(column_num)), index = indexes)

for row in recs:
    df.loc[row[1]] = np.frombuffer(row[7], dtype=np.float64)

In [None]:
cov_mat_classification = np.cov(df.to_numpy(dtype=np.float64), rowvar=False, bias= True)
inv_covmat_classification = np.linalg.pinv(cov_mat_classification)

In [None]:
#form distance matrix for classification analysis
distance_matrix_classification = pd.DataFrame(columns = indexes, index = indexes)

for index1 in indexes:
    for index2 in indexes:
        mu = df.loc[index1]-df.loc[index2]
        left = np.dot(mu, inv_covmat_classification)
        distance_matrix_classification.loc[index1, index2] = np.dot(left, mu.T)

In [None]:
distance_matrix_classification

In [None]:
#get visual features from database
cs = "dbname=%s user=%s password=%s host=%s port=%s" % ("FilmColors_v2_Production","ERCAdmin","admin","localhost","5432")
conn = psycopg2.connect(cs)
cur = conn.cursor()

cur.execute("SELECT id, video_id, classification_object, analysis_class_name, uuid, dtype, shape, data FROM public.db_concert_analyses where analysis_class_name='MusicAnalysis';")
recs = cur.fetchall()

cur.close()
conn.close()

In [None]:
data_dict ={}
for row in recs:
    if row[1] <= 589 and row[1] >= 569:
        data_dict[row[1]] =''.join(chr(int(x, 2)) for x in row[7].tobytes().split())

In [None]:
#collect visual clutter data
#also collect unique musical instruments list
indexes =[]
unique_instruments =[]
clutter_scalars_dict = {}
clutter_scalars_max = 0
for (video_id, data) in data_dict.items():
    indexes.append(video_id)
    dictionary = json.loads(data)
    clutter_scalars_dict[video_id] =dictionary["clutter_scalars"] 
    if len(dictionary["clutter_scalars"] ) > clutter_scalars_max:
        clutter_scalars_max = len(dictionary["clutter_scalars"] )
    for instrument in dictionary["insty"]:
        if not (instrument in unique_instruments):
            unique_instruments.append(instrument)

In [None]:
df_clutter_scalars = pd.DataFrame(columns = list(range(clutter_scalars_max)), index = indexes)

for i in indexes:
    df_clutter_scalars.loc[i] = np.pad(clutter_scalars_dict[i], (0, clutter_scalars_max - len(clutter_scalars_dict[i])), 'constant')
    
    

In [None]:
cov_mat_clutter = np.cov(df_clutter_scalars.to_numpy(dtype=np.float64), rowvar=False, bias= True)
inv_covmat_clutter = np.linalg.pinv(cov_mat_clutter)

In [None]:
distance_matrix_clutter = pd.DataFrame(columns = indexes, index = indexes)

for index1 in indexes:
    for index2 in indexes:
        mu = df_clutter_scalars.loc[index1]-df_clutter_scalars.loc[index2]
        left = np.dot(mu, inv_covmat_clutter)
        distance_matrix_clutter.loc[index1, index2] = np.dot(left, mu.T)

In [None]:
distance_matrix_clutter

In [None]:
#collect camera data

indexes =[]

temp_dict = {"Pan": {}, "Tilt": {}}
for (video_id, data) in data_dict.items():
    indexes.append(video_id)
    dictionary = json.loads(data)
    for (index, motion) in enumerate(dictionary["camy"]):
        if video_id in temp_dict[motion]:
            temp_dict[motion][video_id].append(dictionary["camx"][index])
        else:
            temp_dict[motion][video_id] = [dictionary["camx"][index]]

In [None]:
motion_dict = {"Pan": {}, "Tilt": {}}
max_value_dict = {"Pan": 0, "Tilt": 0}
for motion in temp_dict:
    for video in temp_dict[motion]:
        temp = np.zeros(max(temp_dict[motion][video])+1)
        temp[temp_dict[motion][video]] = 1
        motion_dict[motion][video] = temp
        if len(motion_dict[motion][video]) > max_value_dict[motion]:
            max_value_dict[motion] = len(motion_dict[motion][video])

In [None]:
df_motions_dict = {"Pan": None, "Tilt": None}
for motion in motion_dict:
    df_motions_dict[motion] = pd.DataFrame(columns = list(range(max_value_dict[motion])), index = indexes)
    for video in motion_dict[motion]:
        df_motions_dict[motion].loc[video] =np.pad(motion_dict[motion][video], (0, max_value_dict[motion] - len(motion_dict[motion][video])), 'constant')
    df_motions_dict[motion] = df_motions_dict[motion].fillna(0)
    

In [None]:
distance_matrix_motions_dict = {"Pan": None, "Tilt": None}
for motion in df_motions_dict:
    cov_mat_motion = np.cov(df_motions_dict[motion].to_numpy(dtype=np.float64), rowvar=False, bias= True)
    inv_covmat_motion = np.linalg.pinv(cov_mat_motion)
    distance_matrix_motions_dict[motion] = pd.DataFrame(columns = indexes, index = indexes)
    for index1 in indexes:   
        for index2 in indexes:
            mu = df_motions_dict[motion].loc[index1]-df_motions_dict[motion].loc[index2]
            left = np.dot(mu, inv_covmat_motion)
            distance_matrix_motions_dict[motion].loc[index1, index2] = np.dot(left, mu.T)

In [None]:
distance_matrix_motions_dict["Pan"]
distance_matrix_motions_dict["Tilt"]

In [None]:
temp_dict = {}
instruments_dict = {}
max_value_dict = {}
for instrument in unique_instruments:
    temp_dict[instrument] = {}
    instruments_dict[instrument] = {}
    max_value_dict[instrument] = 0

In [None]:
for (video_id, data) in data_dict.items():
    dictionary = json.loads(data)
    for (index, instrument) in enumerate(dictionary["insty"]):
        if video_id in temp_dict[instrument]:
            temp_dict[instrument][video_id].append(dictionary["instx"][index])
        else:
            temp_dict[instrument][video_id] = [dictionary["instx"][index]]

In [None]:
for instrument in temp_dict:
    for video in temp_dict[instrument]:
        temp = np.zeros(max(temp_dict[instrument][video])+1)
        temp[temp_dict[instrument][video]] = 1
        instruments_dict[instrument][video] = temp

In [None]:
for instrument in instruments_dict:
    for video in instruments_dict[instrument]:
        if len(instruments_dict[instrument][video]) > max_value_dict[instrument]:
            max_value_dict[instrument] = len(instruments_dict[instrument][video])

In [None]:
df_instruments_dict = dict.fromkeys(unique_instruments)
for instrument in instruments_dict:
    df_instruments_dict[instrument] = pd.DataFrame(columns = list(range(max_value_dict[instrument])), index = indexes)
    for video in instruments_dict[instrument]:
        df_instruments_dict[instrument].loc[video] =np.pad(instruments_dict[instrument][video], (0, max_value_dict[instrument] - len(instruments_dict[instrument][video])), 'constant')
    df_instruments_dict[instrument] = df_instruments_dict[instrument].fillna(0)

In [None]:
distance_matrix_instruments_dict = dict.fromkeys(unique_instruments)
for instrument in df_instruments_dict:
    cov_mat_instrument = np.cov(df_instruments_dict[instrument].to_numpy(dtype=np.float64), rowvar=False, bias= True)
    print(instrument)
    print(cov_mat_instrument)
    inv_covmat_instrument = np.linalg.pinv(cov_mat_instrument)
    distance_matrix_instruments_dict[instrument] = pd.DataFrame(columns = indexes, index = indexes)
    for index1 in indexes:
        for index2 in indexes:
            mu = df_instruments_dict[instrument].loc[index1]-df_instruments_dict[instrument].loc[index2]
            left = np.dot(mu, inv_covmat_instrument)
            distance_matrix_instruments_dict[instrument].loc[index1, index2] = np.dot(left, mu.T)

In [None]:
indexes =[]
unique_instruments =[]
instruments_dict = {}
clutter_scalars_max = 0
for (video_id, data) in data_dict.items():
    indexes.append(video_id)
    dictionary = json.loads(data)
    clutter_scalars_dict[video_id] =dictionary["clutter_scalars"] 
    if len(dictionary["clutter_scalars"] ) > clutter_scalars_max:
        clutter_scalars_max = len(dictionary["clutter_scalars"] )
    for instrument in dictionary["insty"]:
        if not (instrument in unique_instruments):
            unique_instruments.append(instrument)

In [None]:
import math
final_distance_matrix = pd.DataFrame(columns = indexes, index = indexes)

for index1 in indexes:
    for index2 in indexes:
   
        final_distance_matrix.loc[index1, index2]= math.sqrt(distance_matrix_clutter.loc[index1, index2]) +\
    math.sqrt(distance_matrix_classification.loc[index1, index2]) + \
    math.sqrt(distance_matrix_motions_dict["Pan"].loc[index1, index2]) +\
    math.sqrt(distance_matrix_motions_dict["Tilt"].loc[index1, index2])
        

final_distance_matrix

In [None]:
from sklearn.manifold import MDS
embedding = MDS(n_components=2, dissimilarity="precomputed")
X_transformed = embedding.fit_transform(final_distance_matrix)

In [None]:
location_df = pd.DataFrame(columns = ["x", "y"], index = indexes)

for (n,i) in enumerate(indexes):
    location_df.loc[i, "x"] = X_transformed[n][0]
    location_df.loc[i, "y"] = X_transformed[n][1]

In [None]:
location_dict = {}

for (n,i) in enumerate(indexes):
    location_dict[i] = list(X_transformed[n].astype(float))
location_dict

In [None]:
from sklearn.cluster import KMeans
import numpy as np
kmeans = KMeans(n_clusters=3, random_state=0)
location_df['cluster'] = kmeans.fit_predict(location_df[['x', 'y']])


In [None]:
centroids = kmeans.cluster_centers_
cen_x = [i[0] for i in centroids] 
cen_y = [i[1] for i in centroids]
## add to df
location_df['cen_x'] = location_df.cluster.map({0:cen_x[0], 1:cen_x[1], 2:cen_x[2]})
location_df['cen_y'] = location_df.cluster.map({0:cen_y[0], 1:cen_y[1], 2:cen_y[2]})


In [None]:
location_df

In [None]:
dumped_json_string = location_df.to_json()

binary_data = ' '.join(format(ord(letter), 'b') for letter in dumped_json_string)

In [None]:
import uuid

cs = "dbname=%s user=%s password=%s host=%s port=%s" % ("FilmColors_v2_Production","ERCAdmin","admin","localhost","5432")
conn = psycopg2.connect(cs)
cur = conn.cursor()

cur.execute("INSERT INTO public.db_concert_analyses(video_id, classification_object, analysis_class_name, uuid, dtype, shape, data) VALUES (%s, %s, %s, %s, %s, %s, %s)"
            ,(1, 'Global', "SimilarityAnalysis",str(uuid.uuid1()), "dict", "",binary_data))
conn.commit()
cur.close()
conn.close()

