In [37]:
import pandas as pd
import numpy as np
import matplotlib

In [38]:
df = pd.read_csv('sentiment_data.csv')
print(df.head())

             Title                        Artist  Year Position     Ave BPM  \
0  Goodnight Irene  Gordon Jenkins & The Weavers  1950        1   94.000000   
1        Mona Lisa                 Nat King Cole  1950        2   65.000000   
2  Third Man Theme                   Anton Karas  1950        3  133.714286   
3       Sam's Song            Gary & Bing Crosby  1950        4  118.000000   
4    Simple Melody            Gary & Bing Crosby  1950        5  151.000000   

   Ave Pitch STD  Ave Pitch Value  Pitch Variation  Pitch Range  \
0       3.110000        63.748000               20           33   
1       4.115000        54.972678               16           19   
2       5.047143        65.208633               21           28   
3       3.045000        63.687679               18           21   
4       3.065000        55.836299               14           17   

   Mode Polarity  Note Onset Density  
0            1.0            1.540000  
1            1.0            1.430000  
2    

In [39]:
# normalise columns to fit range from -1 to 1, with 0 representing the value at which no strong sentiment data is present
# e.g. for tempo, 88 is "normal" so 88 is 0; -1 is the slowest BPM in the dataset, and 1 is the fastest BPM in the dataset

normal_values = {
    'Pitch': 60,
    'Pitch Variation': 7,
    'Pitch Range': 12,
    'Mode': 0,
    'BPM': 90,
    'Note Density': 2.5,
}

max_values = {
    'Pitch': max(df['Ave Pitch Value'].to_list()),
    'Pitch Variation': max(df['Pitch Variation'].tolist()),
    'Pitch Range': max(df['Pitch Range'].tolist()),
    'Mode': max(df['Mode Polarity'].tolist()),
    'BPM': max(df['Ave BPM'].tolist()),
    'Note Density': max(df['Note Onset Density'].tolist())
}

min_values = {
    'Pitch': min(df['Ave Pitch Value'].to_list()),
    'Pitch Variation': min(df['Pitch Variation'].tolist()),
    'Pitch Range': min(df['Pitch Range'].tolist()),
    'Mode': min(df['Mode Polarity'].tolist()),
    'BPM': min(df['Ave BPM'].tolist()),
    'Note Density': min(df['Note Onset Density'].tolist())
}

print(normal_values)
print(max_values)
print(min_values)

{'Pitch': 60, 'Pitch Variation': 7, 'Pitch Range': 12, 'Mode': 0, 'BPM': 90, 'Note Density': 2.5}
{'Pitch': 81.6456953642384, 'Pitch Variation': 33, 'Pitch Range': 41, 'Mode': 1.0, 'BPM': 174.0, 'Note Density': 4.47}
{'Pitch': 49.87456445993031, 'Pitch Variation': 3, 'Pitch Range': 6, 'Mode': -1.0, 'BPM': 57.0, 'Note Density': 0.71}


In [40]:
keys = ['Pitch', 'Pitch Variation', 'Pitch Range', 'Mode', 'BPM', 'Note Density']
cols = ['Ave Pitch Value', 'Pitch Variation', 'Pitch Range', 'Mode Polarity', 'Ave BPM', 'Note Onset Density']

In [41]:
def normalise_column(col_name, key_name):
    # average pitch
    df[col_name] = df[col_name] - normal_values[key_name]
    
    # Conditionally multiply result
    df[col_name] = np.where(
        df[col_name] < 0,
        df[col_name] * (1 / (normal_values[key_name] - min_values[key_name])),
        df[col_name] * (1 / (max_values[key_name] - normal_values[key_name]))
    )

In [42]:
for key, col in zip(keys, cols):
    normalise_column(col, key)

In [43]:
print(df)

               Title                        Artist  Year Position   Ave BPM  \
0    Goodnight Irene  Gordon Jenkins & The Weavers  1950        1  0.047619   
1          Mona Lisa                 Nat King Cole  1950        2 -0.757576   
2    Third Man Theme                   Anton Karas  1950        3  0.520408   
3         Sam's Song            Gary & Bing Crosby  1950        4  0.333333   
4      Simple Melody            Gary & Bing Crosby  1950        5  0.726190   
..               ...                           ...   ...      ...       ...   
360       Heat Waves                 Glass Animals  2022        1 -0.272727   
361        As It Was                  Harry Styles  2022        2  1.000000   
362             Stay   Kid Laroi and Justin Bieber  2022        3  0.952381   
363       Easy On Me                         Adele  2022        4 -0.575758   
364          Shivers                    Ed Sheeran  2022        5  0.607143   

     Ave Pitch STD  Ave Pitch Value  Pitch Variatio

In [45]:
df.to_csv('normalised_sentiment_data.csv', index=False)