### Step 4 Semantic similarity classification on reviews

In [2]:
import pandas as pd

In [3]:
combined_reviews=pd.read_csv('data_w_summaries.csv')

In [4]:
combined_reviews

Unnamed: 0,product_title,review_body,summary
0,"""2""","If you're a fighter pilot, are with a fighter ...","If you're a fighter pilot, are with a fighter ..."
1,"""DLG (Dark, Latin Groove) - Greatest Hits""",I liked this cd. Its a good compilation of son...,"Its a good compilation of songs, however I wis..."
2,"""If I Go Away"" / ""Man Like Me""",I love his voice!!!!! My boyfriend knows that ...,"Trust me when you hear JP sing his OWN music, ..."
3,"""John Skinner's Second Complete Ballroom Dance""",Good music for ballroom dancing -- both for th...,Good music for ballroom dancing -- both for th...
4,"""Metal Box 3x 12"""" 45 Rpm Lp in Metal Box""",Indie label 4 men with Beards gets permission ...,many will want to get the turn table out of mo...
...,...,...,...
17100,this is the fire,"its like Tom Petty, remixed by David Lynch. <b...","<br /> <br />1) \\""la news\\"" is a great start..."
17101,tin cans & car tires,"Very good studio moe. Plane crash, Happy Hour...","Plane crash, Happy Hour Hero, Nebraska are som..."
17102,travelling,"Sorry for the mixed metaphor of a title, but M...",Spear is one of reggae's all-time most importa...
17103,¿La Calle Es Tuya?,Compare to the others Estopa CD's this is not ...,Compare to the others Estopa CD's this is not ...


In [5]:
from gensim.models import KeyedVectors

In [6]:
# location of the downloaded binary
filepath='/Users/johannebear/Library/Mobile Documents/com~apple~CloudDocs/APPLIED ANALYTICS/APAN 5430/Assignments/assignment5/GoogleNews-vectors-negative300.bin.gz'

In [7]:
model_word2vec = KeyedVectors.load_word2vec_format(filepath, binary=True)

In [8]:
topic_taxonomy = {
    "Sound Quality": {
        "Clear Audio": "The sound quality is clear and crisp, allowing every instrument and detail to be heard distinctly.",
        "High Fidelity": "The sound fidelity is high, with a realistic and natural reproduction of the music.",
        "Excellent Engineering": "The sound engineering in the recording and mixing process is well-executed.",
        "Exceptional Acoustics": "The acoustic performance is exceptional, showcasing the natural resonance and tone of instruments.",
        "Balanced Sound": "The audio balance is well-maintained, ensuring that no instrument overpowers others.",
    },
    "Lyrics": {
        "Lyricism": "The lyrics exhibit a high level of lyricism and poetic expression.",
        "Compelling Storytelling": "The lyrics tell a compelling story that engages the listeners.",
        "Emotionally Resonant Delivery": "The delivery of the lyrics is skillful and emotionally resonant.",
        "Deep Emotional Expression": "The lyrics convey deep emotions and heartfelt sentiments.",
        "Meaningful Messages": "The lyrics carry meaningful messages and thought-provoking themes.",
    },
    "Vocals": {
        "Powerful Vocal Performance": "The vocal performance is powerful, expressive, and skillfully executed.",
        "Impressive Vocal Range": "The vocalist demonstrates an impressive vocal range.",
        "Excellent Vocal Technique": "The vocalist showcases excellent vocal techniques and control.",
        "Commanding Vocal Power": "The vocals have a strong and commanding presence.",
        "Clear Vocal Articulation": "The vocals are clear and intelligible, enhancing the understanding of the lyrics.",
    },
    "Instrumentation": {
        "Well-Crafted Instrumental Arrangement": "The instrumental arrangement is well-crafted and complements the overall sound.",
        "Impressive Instrumental Performance": "The instrumental performance is top-notch and showcases musicians' skills.",
        "Exemplary Instrumental Technique": "The instrumentalists demonstrate excellent playing techniques.",
        "Balanced Instrumental Mix": "The instrumental balance ensures that each instrument has its place in the mix.",
        "Diverse Instrumental Variety": "The instrumentation offers a diverse range of sounds and textures.",
    },
    "Production": {
        "High Production Quality": "The production quality is high, resulting in a polished and professional sound.",
        "Well-Balanced Mixing": "The mixing is well-balanced, with each element fitting harmoniously in the mix.",
        "Enhanced Mastering": "The mastering process enhances the overall sound and loudness without distortion.",
        "Innovative Production Techniques": "The production showcases innovative and creative techniques.",
        "Unique Production Flair": "The production has a unique flair that sets it apart from standard recordings.",
    },
    "Originality": {
        "Distinctive Sound": "The music possesses a distinctive and recognizable sound.",
        "Exploratory and Experimental": "The music explores experimental and boundary-pushing elements.",
        "Innovative Composition": "The music showcases innovative and novel approaches to composition.",
        "Creative Genre Fusion": "The music takes a creative and fresh approach by fusing different genres.",
        "Unique Artistic Style": "The music exhibits a unique and distinguishable artistic style.",
    },
    "Emotion": {
        "Emotional Impact": "The music leaves a profound emotional impact on the listener.",
        "Emotionally Evocative": "The music evokes a range of emotions, from joy to melancholy.",
        "Sentimental and Nostalgic": "The music carries sentimental and nostalgic qualities.",
        "Passionate Performance": "The music is passionately performed and conveys intense emotions.",
        "Soulful Expression": "The music has a soulful and heartfelt quality.",
    },
    "Energy": {
        "Energetic Music": "The music is energetic and full of lively and vibrant elements.",
        "High-Energy Performance": "The music exudes high-energy and a sense of excitement.",
        "Powerful Delivery": "The music has a powerful and impactful delivery.",
        "Dynamic Shifts": "The music showcases dynamic shifts and contrasts.",
        "Upbeat and Uplifting": "The music has an upbeat and uplifting vibe.",
    },
    "Melody": {
        "Memorable Melodic Structure": "The melodic structure is well-crafted and memorable.",
        "Catchy Melodies": "The music contains catchy and infectious melodies.",
        "Easily Recallable Melodies": "The melodies are memorable and easy to recall.",
        "Harmonious Melodies": "The melodies blend harmoniously with the overall composition.",
        "Engaging Melodic Progression": "The melodic progression is engaging and captivating.",
    },
    "Harmony": {
        "Harmonization": "The harmonization of instruments and vocals is skillfully executed.",
        "Harmonic Balance": "The harmonic balance ensures a pleasing and coherent sound.",
        "Harmonic Richness": "The music exhibits harmonic richness and complexity"
    }
}

In [9]:
topic_taxonomy.keys()

dict_keys(['Sound Quality', 'Lyrics', 'Vocals', 'Instrumentation', 'Production', 'Originality', 'Emotion', 'Energy', 'Melody', 'Harmony'])

In [10]:
topic_taxonomy['Sound Quality'].keys()

dict_keys(['Clear Audio', 'High Fidelity', 'Excellent Engineering', 'Exceptional Acoustics', 'Balanced Sound'])

In [11]:
class TaxonomyClassifier:
    def __init__(self, model, taxonomy, topN=3):
        '''
        Constructor for TaxonomyClassifier
        model: word2vec model
        taxonomy: 2-level taxonomy defined as dictinoary-in-dictionary
        '''
        self.model = model
        self.taxonomy = taxonomy
        self.topN = topN
        
    def classify(self, text, topN=None):
        '''
        Classfify text to subtopics based on the similarity between the input text and the subtopic keywords 
        text: the input text
        topN: number of subtopics to be shown
        '''
        
        topN = topN if topN else self.topN
        text = str(text).lower()
        scores = dict()
        for topic, taxonomy in self.taxonomy.items():

            subtopic_scores = dict()

            for subtopic, keywords in taxonomy.items():
                subtopic_text = (subtopic + ' ' + keywords).strip().lower()
                subtopic_scores[subtopic] = self.model.n_similarity(text.split(), subtopic_text.split())

            topSubtopic, subtopiScore = sorted(subtopic_scores.items(), key=lambda x: x[1], reverse=True)[0]
            scores[topSubtopic] = subtopiScore

        return sorted(scores.items(), key=lambda x: x[1])[::-1][:topN]

In [12]:
tc = TaxonomyClassifier(model=model_word2vec, taxonomy=topic_taxonomy)

In [13]:
test_reviews = [
    "The sound quality of this album is amazing! Every instrument and detail can be heard distinctly.",
    "The lyrics tell a compelling story that engages the listeners. The emotional delivery is skillful.",
    "The production quality is top-notch, resulting in a polished and professional sound.",
    "The melodic progression is engaging and captivating. The melodies blend harmoniously with the overall composition."
]

test_labels = ['Clear Audio',  'Compelling Storytelling', 'High Production Quality',  'Engaging Melodic Progression']


In [14]:
predicted_topics = []

for review in test_reviews:
    prediction = tc.classify(review, topN=1)[0]
    predicted_topics.append(prediction[0])
    
print(predicted_topics)

['Clear Audio', 'Compelling Storytelling', 'High Production Quality', 'Harmonious Melodies']


In [15]:
correct_predictions = sum(1 for predicted, truth in zip(predicted_topics, test_labels) if predicted == truth)
accuracy = correct_predictions / len(test_labels) * 100

print(test_labels)
print(predicted_topics)
print(f"Accuracy of this model  is: {accuracy:.2f}%")


['Clear Audio', 'Compelling Storytelling', 'High Production Quality', 'Engaging Melodic Progression']
['Clear Audio', 'Compelling Storytelling', 'High Production Quality', 'Harmonious Melodies']
Accuracy of this model  is: 75.00%


In [16]:
combined_reviews['review_topic'] = combined_reviews['summary'].apply(lambda x: tc.classify(x, topN=1))

In [17]:
combined_reviews['review_topic'] = combined_reviews['review_topic'].apply(lambda x: x[0][0])

In [18]:
combined_reviews

Unnamed: 0,product_title,review_body,summary,review_topic
0,"""2""","If you're a fighter pilot, are with a fighter ...","If you're a fighter pilot, are with a fighter ...",Clear Audio
1,"""DLG (Dark, Latin Groove) - Greatest Hits""",I liked this cd. Its a good compilation of son...,"Its a good compilation of songs, however I wis...",Clear Audio
2,"""If I Go Away"" / ""Man Like Me""",I love his voice!!!!! My boyfriend knows that ...,"Trust me when you hear JP sing his OWN music, ...",Clear Audio
3,"""John Skinner's Second Complete Ballroom Dance""",Good music for ballroom dancing -- both for th...,Good music for ballroom dancing -- both for th...,Energetic Music
4,"""Metal Box 3x 12"""" 45 Rpm Lp in Metal Box""",Indie label 4 men with Beards gets permission ...,many will want to get the turn table out of mo...,Clear Audio
...,...,...,...,...
17100,this is the fire,"its like Tom Petty, remixed by David Lynch. <b...","<br /> <br />1) \\""la news\\"" is a great start...",Balanced Sound
17101,tin cans & car tires,"Very good studio moe. Plane crash, Happy Hour...","Plane crash, Happy Hour Hero, Nebraska are som...",Clear Audio
17102,travelling,"Sorry for the mixed metaphor of a title, but M...",Spear is one of reggae's all-time most importa...,Well-Balanced Mixing
17103,¿La Calle Es Tuya?,Compare to the others Estopa CD's this is not ...,Compare to the others Estopa CD's this is not ...,Clear Audio


In [22]:
review_topic=combined_reviews['review_topic'].unique()
review_topic

array(['Clear Audio', 'Energetic Music', 'Balanced Sound',
       'Balanced Instrumental Mix', 'Well-Balanced Mixing',
       'High-Energy Performance', 'Dynamic Shifts',
       'Passionate Performance', 'Creative Genre Fusion',
       'Easily Recallable Melodies', 'Harmonic Richness',
       'Emotionally Evocative', 'Emotional Impact', 'Soulful Expression',
       'Harmonious Melodies', 'Unique Production Flair',
       'Catchy Melodies', 'Harmonization', 'Compelling Storytelling',
       'Excellent Engineering', 'Lyricism', 'High Production Quality',
       'Sentimental and Nostalgic', 'High Fidelity',
       'Clear Vocal Articulation', 'Memorable Melodic Structure',
       'Upbeat and Uplifting', 'Exemplary Instrumental Technique',
       'Impressive Vocal Range', 'Enhanced Mastering',
       'Powerful Delivery', 'Commanding Vocal Power',
       'Diverse Instrumental Variety', 'Exceptional Acoustics',
       'Excellent Vocal Technique', 'Powerful Vocal Performance',
       'Distinct

In [21]:
combined_reviews.to_csv('data_w_review_topic.csv',index=False)