In [12]:
import os
import math
import re
import statistics
import pandas as pd
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import kendalltau

In [13]:
corr_fieldnames = [
                    'Country', 
                    'Song Name', 
                    'Popularity', 
                    'Time Signature', 
                    'Duration MS', 
                    'Tempo', 
                    'Valence', 
                    'Liveness', 
                    'Instrumentalness', 
                    'Acousticness', 'Speechiness', 
                    'Loudness', 
                    'Key', 
                    'Mode', 
                    'Energy', 
                    'Danceability', 
                    'Energy-Loudness', 
                    'Energy-Speechiness', 
                    'Energy-Valence', 
                    'Energy-Liveness', 
                    'Energy-Tempo', 
                    'Acousticness-Mode', 
                    'Acousticness-Instrumentalness', 
                    'Acousticness-Danceability', 
                    'Danceability-Valence', 
                    'Danceability-Key', 
                    'Danceability-Mode', 
                    'Danceability-Loudness', 
                    'Danceability-Time Signature', 
                    'Loudness-Speechiness', 
                    'Loudness-Valence', 
                    'Loudness-Liveness', 
                    'Loudness-Key', 
                    'Loudness-Mode', 
                    'Loudness-Tempo', 
                    'Loudness-Time Signature', 
                    'Loudness-Duration MS', 
                    'Valence-Speechiness', 
                    'Valence-Key', 
                    'Valence-Liveness', 
                    'Valence-Tempo', 
                    'Valence-Time Signature', 
                    'Speechiness-Liveness', 
                    'Speechiness-Tempo', 
                    'Liveness-Tempo', 
                    'Liveness-Key', 
                    'Instrumentalness-Mode', 
                    'Tempo-Duration MS', 
                    'Key-Mode', 
                    'Key-Duration MS', 
                    'Key-Time Signature', 
                    'Duration MS-Time Signature'
                   ]

In [2]:
scaled_all_backup_csv_path = '../backup_csv/scaled_backup_all_song_features_csv.csv'
scaled_bangladesh_backup_csv_path = '../backup_csv/scaled_backup_bangladesh_song_features_csv.csv'
scaled_kolkata_backup_csv_path = '../backup_csv/scaled_backup_kolkata_song_features_csv.csv'

alls = pd.read_csv(scaled_all_backup_csv_path)
bd = pd.read_csv(scaled_bangladesh_backup_csv_path)
ind = pd.read_csv(scaled_kolkata_backup_csv_path)

In [3]:
scaled_all_song_correlated_features_csv_path = '../correlated_csv/scaled_all_song_correlated_features_csv.csv'
scaled_bangladesh_song_correlated_features_csv_path = '../correlated_csv/scaled_bangladesh_song_correlated_features_csv.csv'
scaled_kolkata_song_correlated_features_csv_path = '../correlated_csv/scaled_kolkata_song_correlated_features_csv.csv'

corr_alls = pd.read_csv(scaled_all_song_correlated_features_csv_path)
corr_bd = pd.read_csv(scaled_bangladesh_song_correlated_features_csv_path)
corr_ind = pd.read_csv(scaled_kolkata_song_correlated_features_csv_path)

In [7]:
def pearcon_correlarion(df,key_name1,key_name2):
    corr, _ = pearsonr(df[key_name1], df[key_name2])
    return corr

In [8]:
def kendalltau_correlation(df,key_name1,key_name2):
    corr, _ = kendalltau(df[key_name1], df[key_name2])
    return corr

In [9]:
corr_arr_of_dataframes = [corr_alls,corr_bd,corr_ind]

In [17]:
print("All Songs Pearcon")
for i in range(4,16):
    corr = pearcon_correlarion(corr_arr_of_dataframes[0],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

All Songs Pearcon
Popularity X Duration MS : 0.11167250312376391
Popularity X Tempo : 0.0017356501251653254
Popularity X Valence : -0.12962150974682113
Popularity X Liveness : 0.014026266746400305
Popularity X Instrumentalness : 0.046306053662060614
Popularity X Acousticness : -0.020712191183934178
Popularity X Speechiness : -0.032852974600730146
Popularity X Loudness : 0.07689787158153406
Popularity X Key : 0.07843200073415785
Popularity X Mode : -0.001208273148365844
Popularity X Energy : 0.06936849925032981
Popularity X Danceability : 0.04707612507413757


In [15]:
print("All Songs Kendalltau")
for i in range(4,16):
    corr = kendalltau_correlation(corr_arr_of_dataframes[0],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

All Songs Kendalltau
Popularity X Duration MS : 0.11360667446903251
Popularity X Tempo : 0.004597909253384116
Popularity X Valence : -0.09718594788645554
Popularity X Liveness : 0.004027930958312773
Popularity X Instrumentalness : -0.042811920093536474
Popularity X Acousticness : 0.023665665851023195
Popularity X Speechiness : -0.015062497188199365
Popularity X Loudness : 0.0622633544729099
Popularity X Key : 0.04777652064952468
Popularity X Mode : 0.0033226520646052293
Popularity X Energy : 0.02951301317357545
Popularity X Danceability : 0.0396211244235789


In [18]:
print("BD Songs Pearcon")
for i in range(4,16):
    corr = pearcon_correlarion(corr_arr_of_dataframes[1],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

BD Songs Pearcon
Popularity X Duration MS : 0.14068596515924028
Popularity X Tempo : -0.050374992603946823
Popularity X Valence : -0.05753671790734723
Popularity X Liveness : -0.09884148766254672
Popularity X Instrumentalness : 0.004282608859382911
Popularity X Acousticness : 0.2930961518487824
Popularity X Speechiness : -0.19271156790564195
Popularity X Loudness : -0.2059167895809701
Popularity X Key : 0.05277038129960612
Popularity X Mode : 0.12446720403576411
Popularity X Energy : -0.174094109393799
Popularity X Danceability : 0.1663831857116844


In [19]:
print("BD Songs Kendalltau")
for i in range(4,16):
    corr = kendalltau_correlation(corr_arr_of_dataframes[1],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

BD Songs Kendalltau
Popularity X Duration MS : 0.10957842368331298
Popularity X Tempo : -0.05846763949180304
Popularity X Valence : -0.051517601647459936
Popularity X Liveness : -0.046147466344033834
Popularity X Instrumentalness : -0.06917179402816766
Popularity X Acousticness : 0.21489761535066682
Popularity X Speechiness : -0.059332456728043506
Popularity X Loudness : -0.0894438723351424
Popularity X Key : 0.042922796893927806
Popularity X Mode : 0.09953903718246176
Popularity X Energy : -0.09335144358675072
Popularity X Danceability : 0.12831077639823335


In [20]:
print("IND Songs Pearcon")
for i in range(4,16):
    corr = pearcon_correlarion(corr_arr_of_dataframes[2],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

IND Songs Pearcon
Popularity X Duration MS : 0.12429557674696753
Popularity X Tempo : -0.023724731817645832
Popularity X Valence : -0.15023963314100805
Popularity X Liveness : 0.07271664898302882
Popularity X Instrumentalness : -0.016867428753039922
Popularity X Acousticness : -0.14323746412340715
Popularity X Speechiness : 0.07507164847528021
Popularity X Loudness : 0.18376225071002886
Popularity X Key : 0.15101918206828113
Popularity X Mode : -0.0882248382183587
Popularity X Energy : 0.1744767240408974
Popularity X Danceability : 0.07582278030102829


In [21]:
print("IND Songs Kendalltau")
for i in range(4,16):
    corr = kendalltau_correlation(corr_arr_of_dataframes[2],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

IND Songs Kendalltau
Popularity X Duration MS : 0.15080666937960246
Popularity X Tempo : -0.009233061390587905
Popularity X Valence : -0.11699665564948
Popularity X Liveness : 0.030421124641418294
Popularity X Instrumentalness : -0.05113030345666611
Popularity X Acousticness : -0.08657643029903121
Popularity X Speechiness : 0.03196143474984454
Popularity X Loudness : 0.12541575055548573
Popularity X Key : 0.12187868996775622
Popularity X Mode : -0.05465670559868272
Popularity X Energy : 0.10085185572083742
Popularity X Danceability : 0.05396232984387949
