In [22]:
import pandas as pd

REVIEWS_FILENAME = "./fantano-fetch/bin/reviews.csv"

In [313]:
class Reviews:
    
    def __init__(self, filename=None, df=None):
        if filename is not None:
            self.df = pd.read_csv(filename)
            self.df["publishedAt"] = pd.to_datetime(self.published_at())
        elif df is not None:
            self.df = df
        else:
            raise ValueError("Need to supply a source filename or dataframe")

    def artist(self):
        return self.df["artist"]
    
    def title(self):
        return self.df["title"]
    
    def rating(self):
        return self.df["rating"]
    
    def genres(self):
        return self.df["genres"]
        
    def published_at(self):
        return self.df["publishedAt"]
    
    def on_genres(self, fn):
        ''' Function:   on_genres
            Parameters: fn, function to execute on a list of genres for a single review
            Returns:    series, result of applying the function on the genres column
        '''
        return [fn(str(genres).split("; ")) for genres in self.genres()]
    
    def get_by_artist(self, artist):
        ''' Function:   get_by_artist
            Parameters: artist, the exact artist to search for
            Returns:    Review, reviews of the given artist
        '''
        return Reviews(df=self.df.loc[self.artist() == artist])
    
    def get_by_rating(self, rating):
        ''' Function:   get_by_rating
            Parameters: rating, the exact rating to search for
            Returns:    Review, reviews with the given rating
        '''
        if rating > 10 or rating < 0:
            raise ValueError(f"Invalid rating: {rating}")
            
        return Reviews(df=self.df.loc[self.rating() == rating])
    
    def get_all_genres(self):
        ''' Function:   all_genres
            Parameters: none
            Returns:    set, all distinct genres
        '''
        genres_set = set()
        
        # 
        [[genres_set.add(genre) for genre in str(genres).split("; ")] for genres in self.genres()]
        return genres_set
    
    def is_in_genres(self, genre):
        ''' Function:   is_in_genres
            Parameters: genre
            Returns:    bool, whether the given genre has been reviewed
        '''
        return genre in self.get_all_genres()
    
    def get_by_genre(self, genre):
        ''' Function:   get_by_genre
            Parameters: genre, the exact genre to filter reviews by
            Return      Review, all reviews of works of the given genre
        '''
        return Reviews(df=self.df.loc[self.on_genres(lambda genres: genre in genres)])
    
    def get_by_genre_word(self, genre_word):
        ''' Function:   get_by_genre_word
            Parameters: genre_word, the genre word to filter reviews by
            Return      Review, all reviews of works with a genre with the given genre word
        
        '''
        return Reviews(df=self.df.loc[self.on_genres(lambda genres: any((genre_word in [word for word in genre.split(" ")]) for genre in genres))])
    
    def get_by_genre_substr(self, genre_substr):
        ''' Function:   get_by_genre_substr
            Parameters: genre_substr, the genre substring to filter reviews by
            Return      Review, all reviews of works with a genre with the given genre substring
        
        '''
        return Reviews(df=self.df.loc[self.on_genres(lambda genres: any((genre_substr in genre) for genre in genres))])
    
    def mean_rating(self):
        return (self.df["rating"].mean(), len(self.df))


In [314]:
reviews = Reviews(filename=REVIEWS_FILENAME)
reviews.get_all_genres()

{'industrial hip hop instrumentals',
 'satanic-space-jams',
 'no no no no no',
 'anti-folk',
 'chamber music',
 'lo-fi refinement',
 'post-industrial',
 'piano cuteness',
 'blues rock',
 'k-pop',
 'gentrified reggae jams',
 'irregular techno magic',
 'sampledelica',
 'coldwave',
 'progressive electronic',
 'darkwave',
 'prog metal',
 'experimental music',
 'minimal techno',
 'dub techno',
 'strange plinky plonky crazy instrumentals that are somewhat boss',
 'ghost raps',
 'grimy storytelling',
 'funk',
 'doom electronics',
 'absolutely twisted shit',
 'overly atmospheric synth pop',
 'west coast hip hop',
 'electroacoustic',
 'psychedelic beat magic',
 'tape music',
 'no trap beats',
 'dance pop',
 'uk bass',
 '"brostep"',
 'blackened post-punk',
 'grimy soul',
 'classic dirty mystic stylezzz magic',
 'zombie rockabilly',
 'latin trap',
 'good jazzy times',
 'too.',
 'instrumental pop',
 'harcore hip hop',
 'whisper raps',
 'acid techno',
 'eccentric electro trap rap',
 'indie folk',
 

In [315]:
reviews.get_by_genre_substr("country").mean_rating()

(5.717948717948718, 39)

In [344]:
POPULAR_GENRES = [
    "pop", 
    "hip hop",
    "electronic", 
    "rock",
    "r&b",
    "country", 
    "metal",
    "singer-songwriter",
    "folk",
    "indie",
    "ambient",
    "soul"
]

data = [(genre, reviews.get_by_genre_substr(genre).mean_rating()) for genre in POPULAR_GENRES]
data = [(genre, rating, num_reviews) for (genre, (rating, num_reviews)) in data]

df = pd.DataFrame(data, columns=["genre", "rating", "num_reviews"]).sort_values("rating", ascending=False)

In [348]:
df

Unnamed: 0,genre,rating,num_reviews
8,folk,6.695652,138
7,singer-songwriter,6.68,125
11,soul,6.576271,118
6,metal,6.37619,210
10,ambient,6.35,100
1,hip hop,6.332636,478
3,rock,6.276636,535
2,electronic,6.25,72
9,indie,6.126866,268
0,pop,5.924147,791
