### USEFUL FUNCTIONS

Notebook with useful functions used throughout the `main_TheExplainerNotebook.ipynb` and other notebooks.

In [None]:
# Basic statistics for a numerical LIST
def basic_stats(LIST, ratio=False, r=0):    
    # Stats:
    if ratio:
        print("  N: ", len(LIST), f"({round(100*len(LIST)/r,2)}%)")
    else:
        print("  N: ", len(LIST))

    print('  Variance', round(np.var(LIST),2))
    print('  Average:', round(np.mean(LIST),2))
    print('  Median:', np.median(LIST))
    print('  Max:', max(LIST))
    print('  Min:', min(LIST))

In [None]:
# Generating some function to re-use them when evaluating the different artists groups (genres, popular, sentiment,etc...)
def degree_stats(G):    
    degree_val =[elem[1] for elem in G.degree()]
   
    # Stats:
    print('Average:', round(np.mean(degree_val),3))
    print('Median:', np.median(degree_val))
    print('Mode:', statistics.mode(degree_val))
    print('Max:', max(degree_val))
    print('Min:', min(degree_val))

In [2]:
def degree_dist(G):

    ## Degree distribution
    # For the loglog
    degree_sorted = sorted(G.degree, key=lambda x: x[1], reverse=True)
    degree_sequence = sorted([d for n, d in G.degree()], reverse=True) 
    hist, bin_edges = np.histogram(np.array(degree_sequence), bins=np.arange(degree_sorted[-1][1], degree_sorted[0][1]+2))
    # For the barplot
    degree_sequence = sorted([d for n, d in G.degree()], reverse=True)  # degree sequence
    degreeCount = collections.Counter(degree_sequence) # Count the degrees
    deg, cnt = zip(*degreeCount.items()) # deg: degree, cnt: the count of the degree
    
    #Plot:
    fig, (ax1,ax2) = plt.subplots(1,2, figsize=(16,6))
    # fig.suptitle("Degree Distribution", fontsize=16)

    ax1.bar(deg,cnt, color="mediumseagreen", width = 2)
    ax1.set_xlabel('Degree')
    ax1.set_ylabel("Frequency")
    ax1.set_title("Degree distribution")

    ax1.set_xlim((-10, 200))
    ax1.set_ylim((0, 700))

    ax2.loglog(bin_edges[:-1],hist,'.', color='dodgerblue')

    ax2.set_xlabel('Degree')
    ax2.set_ylabel("Frequency")
    ax2.set_title("Degree distribution (Log-Log)")
    
    fig.tight_layout()
    fig.subplots_adjust(top=0.9)
    # plt.grid(False)

    
    sns.despine()
    plt.show()

    # fig.savefig("src/webpage/pictures/deg_dist.png")

In [None]:
## Function build to show wordclouds for word counts
#Takes tokenized text, title and max number of words in the cloud as input
def show_wordcloud(text,title,nr_words):
    fig_wordcloud = wordcloud.WordCloud(max_font_size=100, max_words=nr_words, 
                                        background_color="white",collocations=False).generate(text)
    plt.figure(figsize=(10,7), frameon=True)
    plt.imshow(fig_wordcloud, interpolation="bilinear")  
    plt.axis('off')
    plt.title(title, fontsize=20)
    plt.show()

In [None]:
## Function build to show wordclouds for word counts
#Takes tokenized text, title and max number of words in the cloud as input, and is green
def green_color_func(word, font_size, position, orientation, random_state=None,
                    **kwargs):
    return "hsl(150, 60%%, %d%%)" % random.randint(30, 70)

def show_wordcloud_gen(text,title,nr_words): 
    fig_wordcloud = wordcloud.WordCloud(max_font_size=100, max_words=nr_words, 
                                        background_color="white",
                                        collocations=False).generate_from_frequencies(text)
    
    plt.figure(figsize=(10,7), frameon=True)

    plt.imshow(fig_wordcloud.recolor(color_func=green_color_func, random_state=3),
           interpolation="bilinear")

    plt.axis('off')
    plt.title(title, fontsize=14)
    
    plt.show()

In [None]:
## Function build to show wordclouds for TF-IDF score
#Takes TF-IDF score, title and max number of words in the cloud as input
def show_wordcloud_TF_IDF(TF_IDF_words,title,nr_words): 
    fig_wordcloud = wordcloud.WordCloud(max_font_size=100, max_words=nr_words, 
                                        background_color="white",
                                        collocations=False).generate_from_frequencies(TF_IDF_words)
    plt.figure(figsize=(10,7), frameon=True)
    plt.imshow(fig_wordcloud.recolor(color_func=green_color_func, random_state=3), interpolation="bilinear")  
    plt.axis('off')
    plt.title(title, fontsize=20)
    plt.show()

In [None]:
def SF_show_wordcloud_gen(text,title,nr_words,name): 
    fig_wordcloud = wordcloud.WordCloud(max_font_size=100, max_words=nr_words, 
                                        background_color="white",
                                        collocations=False).generate_from_frequencies(text)
    
    plt.rcParams['figure.dpi'] = 360
    fig, ax = plt.subplots(1,1, figsize=(10,6), frameon=True)


    plt.imshow(fig_wordcloud.recolor(color_func=green_color_func, random_state=3),
           interpolation="bilinear")

    plt.axis('off')
    plt.title(title, fontsize=14)
    fig.savefig(name)
    plt.show()

In [None]:
def SF_show_wordcloud_TF_IDF(TF_IDF_words,title,nr_words,name): 
    fig_wordcloud = wordcloud.WordCloud(max_font_size=100, max_words=nr_words, 
                                        background_color="white",
                                        collocations=False).generate_from_frequencies(TF_IDF_words)
    # plt.figure(figsize=(10,7), frameon=True)
    plt.rcParams['figure.dpi'] = 360
    fig, ax = plt.subplots(1,1, figsize=(10,6), frameon=True)
    plt.imshow(fig_wordcloud.recolor(color_func=green_color_func, random_state=3), interpolation="bilinear")  
    plt.axis('off')
    plt.title(title, fontsize=20)
    fig.savefig(name)
    plt.show()

In [None]:
## Code below is taken from: 
# https://thispointer.com/python-how-to-add-append-key-value-pairs-in-dictionary-using-dict-update/

def append_value(dict_obj, key, value):
    # Check if key exist in dict or not
    if key in dict_obj:
        # Key exist in dict.
        # Check if type of value of key is list or not
        if not isinstance(dict_obj[key], list):
            # If type is not list then make it list
            dict_obj[key] = [dict_obj[key]]
        # Append the value in list
        dict_obj[key].append(value)
    else:
        # As key is not in dict,
        # so, add key-value pair
        dict_obj[key] = value

In [None]:
# Creating a function to match each nodes of the network with its corresponding list of genres 
def get_artist_genres(Graph, nw_file):
    with open("data/"+ nw_file) as f:
        artists = json.load(f)
    
    dic_Genres = {}
    for artist in artists:
        if artist['isNode']:
            dic_Genres[artist['id']] = artist['genres']
    
    return(dic_Genres)