In [12]:
import os
import math
import re
import statistics
import pandas as pd
import seaborn as sns
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import kendalltau

In [29]:
corr_fieldnames = [
                    'Country', 
                    'Song Name', 
                    'Popularity', 
                    'Time Signature', 
                    'Duration MS', 
                    'Tempo', 
                    'Valence', 
                    'Liveness', 
                    'Instrumentalness', 
                    'Acousticness', 
                    'Speechiness', 
                    'Loudness', 
                    'Key', 
                    'Mode', 
                    'Energy', 
                    'Danceability', 
                    'Tempo-Valence', 
                    'Tempo-Liveness', 
                    'Tempo-Instrumentalness', 
                    'Tempo-Acousticness', 
                    'Tempo-Speechiness', 
                    'Tempo-Loudness', 
                    'Tempo-Energy', 
                    'Tempo-Danceability', 
                    'Valence-Liveness', 
                    'Valence-Acousticness', 
                    'Valence-Speechiness', 
                    'Valence-Loudness', 
                    'Valence-Energy', 
                    'Valence-Danceability', 
                    'Liveness-Instrumentalness', 
                    'Liveness-Acousticness', 
                    'Liveness-Speechiness', 
                    'Liveness-Loudness', 
                    'Liveness-Energy', 
                    'Liveness-Danceability', 
                    'Instrumentalness-Acousticness', 
                    'Instrumentalness-Loudness', 
                    'Instrumentalness-Energy', 
                    'Instrumentalness-Danceability', 
                    'Acousticness-Speechiness', 
                    'Acousticness-Loudness', 
                    'Acousticness-Energy', 
                    'Acousticness-Danceability', 
                    'Speechiness-Loudness', 
                    'Speechiness-Energy', 
                    'Loudness-Energy', 
                    'Loudness-Danceability', 
                    'Energy-Danceability'
                ]

In [30]:
scaled_all_backup_csv_path = '../backup_csv/scaled_backup_all_song_features_csv.csv'
scaled_bangladesh_backup_csv_path = '../backup_csv/scaled_backup_bangladesh_song_features_csv.csv'
scaled_kolkata_backup_csv_path = '../backup_csv/scaled_backup_kolkata_song_features_csv.csv'

alls = pd.read_csv(scaled_all_backup_csv_path)
bd = pd.read_csv(scaled_bangladesh_backup_csv_path)
ind = pd.read_csv(scaled_kolkata_backup_csv_path)

In [31]:
scaled_all_song_correlated_features_csv_path = '../correlated_csv/scaled_all_song_correlated_features_csv.csv'
scaled_bangladesh_song_correlated_features_csv_path = '../correlated_csv/scaled_bangladesh_song_correlated_features_csv.csv'
scaled_kolkata_song_correlated_features_csv_path = '../correlated_csv/scaled_kolkata_song_correlated_features_csv.csv'

corr_alls = pd.read_csv(scaled_all_song_correlated_features_csv_path)
corr_bd = pd.read_csv(scaled_bangladesh_song_correlated_features_csv_path)
corr_ind = pd.read_csv(scaled_kolkata_song_correlated_features_csv_path)

In [32]:
def pearson_correlarion(df,key_name1,key_name2):
    corr, _ = pearsonr(df[key_name1], df[key_name2])
    return corr

In [33]:
def kendalltau_correlation(df,key_name1,key_name2):
    corr, _ = kendalltau(df[key_name1], df[key_name2])
    return corr

In [34]:
corr_arr_of_dataframes = [corr_alls,corr_bd,corr_ind]

In [36]:
print("All Songs Pearson Correlation")
for i in range(4,49):
    corr = pearson_correlarion(corr_arr_of_dataframes[0],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

All Songs Pearson Correlation
Popularity X Duration MS : 0.11167250312376391
Popularity X Tempo : 0.0017356501251653254
Popularity X Valence : -0.12962150974682113
Popularity X Liveness : 0.014026266746400305
Popularity X Instrumentalness : 0.046306053662060614
Popularity X Acousticness : -0.020712191183934178
Popularity X Speechiness : -0.032852974600730146
Popularity X Loudness : 0.07689787158153406
Popularity X Key : 0.07843200073415785
Popularity X Mode : -0.001208273148365844
Popularity X Energy : 0.06936849925032981
Popularity X Danceability : 0.04707612507413757
Popularity X Tempo-Valence : -0.10675909020127788
Popularity X Tempo-Liveness : 0.014568059590246433
Popularity X Tempo-Instrumentalness : 0.0380472130615681
Popularity X Tempo-Acousticness : -0.016659551351635316
Popularity X Tempo-Speechiness : -0.04140851619813284
Popularity X Tempo-Loudness : 0.012198670794615457
Popularity X Tempo-Energy : 0.024698524000911472
Popularity X Tempo-Danceability : 0.03306429391259429
Po

In [37]:
print("All Songs Kendalltau")
for i in range(4,49):
    corr = kendalltau_correlation(corr_arr_of_dataframes[0],corr_fieldnames[2],corr_fieldnames[i])
    print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

All Songs Kendalltau
Popularity X Duration MS : 0.11360667446903251
Popularity X Tempo : 0.004597909253384116
Popularity X Valence : -0.09718594788645554
Popularity X Liveness : 0.004027930958312773
Popularity X Instrumentalness : -0.042811920093536474
Popularity X Acousticness : 0.023665665851023195
Popularity X Speechiness : -0.015062497188199365
Popularity X Loudness : 0.0622633544729099
Popularity X Key : 0.04777652064952468
Popularity X Mode : 0.0033226520646052293
Popularity X Energy : 0.02951301317357545
Popularity X Danceability : 0.0396211244235789
Popularity X Tempo-Valence : -0.02557707865447702
Popularity X Tempo-Liveness : 0.03161211968530868
Popularity X Tempo-Instrumentalness : -0.042912023202797404
Popularity X Tempo-Acousticness : 0.041287344195054664
Popularity X Tempo-Speechiness : 0.018679888904955127
Popularity X Tempo-Loudness : 0.02136212936310253
Popularity X Tempo-Energy : 0.03975463536182758
Popularity X Tempo-Danceability : 0.041287344195054664
Popularity X V

In [22]:
# print("BD Songs Pearcon")
# for i in range(4,16):
#     corr = pearcon_correlarion(corr_arr_of_dataframes[1],corr_fieldnames[2],corr_fieldnames[i])
#     print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

In [23]:
# print("BD Songs Kendalltau")
# for i in range(4,16):
#     corr = kendalltau_correlation(corr_arr_of_dataframes[1],corr_fieldnames[2],corr_fieldnames[i])
#     print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

In [24]:
# print("IND Songs Pearcon")
# for i in range(4,16):
#     corr = pearcon_correlarion(corr_arr_of_dataframes[2],corr_fieldnames[2],corr_fieldnames[i])
#     print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)

In [25]:
# print("IND Songs Kendalltau")
# for i in range(4,16):
#     corr = kendalltau_correlation(corr_arr_of_dataframes[2],corr_fieldnames[2],corr_fieldnames[i])
#     print(corr_fieldnames[2],'X',corr_fieldnames[i],':',corr)