# <center> SPACY ON MUSIC </center>

<p> In this project we attempted a series of studies using the SPACY library of PYTHON to perform sentiment analysis, entity analysis, parsing of grammar and keyword extraction on different artists and their original compositions. </p>

##### Clean the cells

In [None]:

%reset -f
!pip install -U spacy==3.2.0
!python -m spacy download 'pt_core_news_lg'
!pip install matplotlib.pyplot

# <center> SENTIMENT ANALYSIS </center>

<p> In the code below, we parse the datasets contained in the 'singer' directory. These datasets are song lyrics by random singers. We analyzed each composition individually, extracting whether or not the lyrics are romantic, or if they belong to a neutral genre. After an individual analysis of each composition, there is a general analysis of the singer or band. Also saying whether or not he is romantic. </p>

In [None]:
# imports
import json
import os
import spacy
import matplotlib.pyplot as plt

# main variables
pathFull = os.getcwd()
dirFull = os.path.dirname(pathFull)
dirBase = os.path.abspath(os.path.join(dirFull, os.pardir, os.pardir))
data = open(f"{dirBase}/ia/nlp/param/data.json")
var = json.load(data)
dataset = f"{dirBase}/{var['dataset_dir']}"
nlp = spacy.load('pt_core_news_lg')

dir_path = [os.path.abspath(os.path.join(dataset, dir_path)) for dir_path in os.listdir(dataset) if os.path.isdir(os.path.join(dataset, dir_path))]
total_songs = 0
total_positive = 0
total_negative = 0

# variables for artist-level data
artists = []
positive_counts = []
negative_counts = []

# code
for file_name in dir_path:
    artist_name = os.path.basename(file_name)
    artist_positive = 0
    artist_negative = 0

    for file in os.listdir(file_name):
        file_path = os.path.join(file_name, file)
        with open(file_path, "r", encoding="utf-8") as file:
            txt = file.read()
            outRows = txt.splitlines()[:2]
            titles = outRows[0]
            contentRaw = txt.splitlines()[2:]
            content = '\n'.join(contentRaw).lower()

        document = nlp(content)

        num_positive = 0
        num_negative = 0

        for token in document:
            conj = ["amor", "amar", "amarei", "amou", "amando"]
            if token.pos_ == 'NOUN':
                if token.text in conj:
                    num_positive += 1
                elif token.sentiment <= -0.5:
                    num_negative += 1

        if num_positive > num_negative:
            connotation = 'romantic'
        elif num_positive < num_negative:
            connotation = 'non-romantic'
        else:
            connotation = 'neutral'

        print(f"Analyzed Song: '{titles}':")
        print(f"Connotation: {connotation.upper()}.")
        print("------------------------")

        total_songs += 1
        total_positive += num_positive
        total_negative += num_negative
        artist_positive += num_positive
        artist_negative += num_negative

    artists.append(artist_name)
    positive_counts.append(artist_positive)
    negative_counts.append(artist_negative)

if total_songs > 0:
    avg_positive = total_positive / total_songs
    avg_negative = total_negative / total_songs

    if avg_positive > avg_negative:
        connotation = 'romantic'
    elif avg_positive < avg_negative:
        connotation = 'non-romantic'
    else:
        connotation = 'neutral'

    print("General Report:")
    print(f"Average connotation of songs from {artist_name.upper()}: {connotation.upper()}.")
    print(f"Total analyzed songs: {total_songs}.")

    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 10))

    ax1.bar(artists, positive_counts, label='Romantic')
    ax1.set_ylabel('Count')
    ax1.set_title('Romantic Songs by Artist')
    ax1.legend()

    ax2.bar(artists, negative_counts, label='Non-Romantic')
    ax2.set_xlabel('Artists')
    ax2.set_ylabel('Count')
    ax2.set_title('Non-Romantic Songs by Artist')
    ax2.legend()

    plt.tight_layout()

    plt.show()

else:
    print(f"No files found in the {artist_name.upper()} folder.")
