In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.colors import LogNorm

from textblob.sentiments import NaiveBayesAnalyzer

import pandas as pd
import sqlite3
from textblob import TextBlob

import seaborn as sns
sns.set(color_codes=True)


import spacy
nlp = spacy.load('en')

import re

In [105]:
df = pd.read_csv('../pitchfork4.csv')

df['word_count'] = df['content'].str.count('\w+')
df['desc_count'] = df['content_desc'].str.count('\w+')

df['avg_word_length'] = df['content'].str.count('[^ ]') / df['word_count']
df['desc_word_length'] = df['content_desc'].str.count('[^ ]') / df['content_desc'].str.count('\w+')
df['desc_freq'] = df['desc_count'] / df['word_count']

In [114]:
df.columns

Index([u'Unnamed: 0', u'index', u'reviewid', u'title', u'artist', u'url',
       u'score', u'best_new_music', u'best_new_reissue', u'author',
       u'pub_date', u'pub_weekday', u'pub_day', u'pub_month', u'pub_year',
       u'year', u'genre_electronic', u'genre_experimental',
       u'genre_folk/country', u'genre_global', u'genre_jazz', u'genre_metal',
       u'genre_pop/r&b', u'genre_rap', u'genre_rock', u'content', u'abstract',
       u'reissue', u'num_years_since_release', u'new_album', u'abstract_desc',
       u'content_desc', u'cont_polarity', u'cont_subjectivity',
       u'abs_polarity', u'abs_subjectivity', u'word_count', u'desc_count',
       u'desc_freq', u'score_bin', u'avg_word_length', u'desc_word_length'],
      dtype='object')

In [117]:
abs_desc = df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'abstract_desc', 'abs_subjectivity']].sort_values('abs_subjectivity').reset_index()['abstract_desc'][5]
    

In [41]:
abstract = """Sufjan Stevens has always written personally, weaving his life story into larger narratives, but here his autobiography is front and center. Carrie & Lowell is a return to the stripped-back folk of Seven Swans but with a decade's worth of refinement and exploration packed into it."""

In [49]:
def parse_sentiment(abstract):
    for word in abstract.split():
        if TextBlob(word).sentiment[1] > 0:
            print word, "\nSubjectivity:", \
              TextBlob(word).sentiment[1], "\n"

In [71]:
abstract = """Sufjan Stevens has always written
              personally, weaving his life story
              into larger narratives, but here his
              autobiography is front and center.
              Carrie & Lowell is a return to the
              stripped-back folk of Seven Swans but
              with a decade's worth of refinement
              and exploration packed into it."""
from textblob import TextBlob
TextBlob(abstract).sentiment

Sentiment(polarity=0.049999999999999996, subjectivity=0.25)

In [72]:
parse_sentiment(abstract)

personally, 
Subjectivity: 0.3 

larger 
Subjectivity: 0.5 

center. 
Subjectivity: 0.1 

worth 
Subjectivity: 0.1 



In [33]:
blob = nlp(unicode(abstract))

In [36]:
blob.

0.0

In [30]:
import spacy
nlp = spacy.load('en')

In [52]:
pd.DataFrame([['word', 'POS', 'tag']], )


Unnamed: 0,0,1,2
0,word,POS,tag


In [69]:
abstract = """Sufjan Stevens has always written
              personally, weaving his life story
              into larger narratives, but here his
              autobiography is front and center.
              Carrie & Lowell is a return to the
              stripped-back folk of Seven Swans but
              with a decade's worth of refinement
              and exploration packed into it."""
import spacy
nlp = spacy.load('en')

In [119]:
make_POS_df(abs_desc, nlp)

Unnamed: 0,word,POS,tag
0,has,VERB,VBZ
1,always,ADV,RB
2,personally,ADV,RB
3,weaving,VERB,VBG
4,larger,ADJ,JJR
5,here,ADV,RB
6,is,VERB,VBZ
7,front,ADJ,JJ
8,is,VERB,VBZ
9,s,NOUN,NN


In [81]:
def make_POS_df(abstract, nlp):
    df_POS = []
    for each in nlp(unicode(abstract)):
        df_POS.append([each, each.pos_, each.tag_])
    return pd.DataFrame(df_POS, columns=['word', 'POS', 'tag'])

In [56]:
pd.DataFrame(df_POS, columns=['word', 'POS', 'tag']).head(10)

Unnamed: 0,word,POS,tag
0,Sufjan,PROPN,NNP
1,Stevens,PROPN,NNP
2,has,VERB,VBZ
3,always,ADV,RB
4,written,VERB,VBN
5,\n,SPACE,_SP
6,personally,ADV,RB
7,",",PUNCT,","
8,weaving,VERB,VBG
9,his,ADJ,PRP$


In [45]:
def parse_for_adj(df, column, new_column_name):
    prop_noun_parse = []
    for i in range(len(df[column])):
        non_PN = []
        blob = nlp(unicode(df[column][i]))
        for each in blob:
            if each.tag_ in [u'ADJ', u'JJ', u'JJR', u'JJS' u'VBN', u'VBD', u'RB', u'RBR', u'RBS']:
                non_PN.append(str(each))
        prop_noun_parse.append((df['reviewid'][i], " ".join(non_PN)))

    df_new = pd.DataFrame(prop_noun_parse, columns =['reviewid', new_column_name])
    return df.merge(df_new)

In [75]:
df = parse_for_adj(df.head(200), 'content', 'content_desc')

In [77]:
df['desc_count'] = df['content_desc'].str.count('\w+')


In [78]:
df['desc_freq'] = df['desc_count'] / df['word_count']

In [79]:
df['desc_freq']

0      0.134629
1      0.118436
2      0.115811
3      0.105666
4      0.141813
5      0.107280
6      0.138531
7      0.133705
8      0.115423
9      0.119887
10     0.149633
11     0.122156
12     0.111235
13     0.125259
14     0.124845
15     0.125174
16     0.139842
17     0.101667
18     0.110879
19     0.135099
20     0.121387
21     0.120647
22     0.112202
23     0.110169
24     0.133333
25     0.102313
26     0.139175
27     0.142653
28     0.118768
29     0.134752
         ...   
170    0.119326
171    0.125477
172    0.130896
173    0.146023
174    0.116900
175    0.105512
176    0.101828
177    0.103416
178    0.155059
179    0.126354
180    0.110372
181    0.128540
182    0.123094
183    0.125152
184    0.137129
185    0.148148
186    0.118684
187    0.186207
188    0.120758
189    0.101322
190    0.109073
191    0.126424
192    0.111969
193    0.138211
194    0.129781
195    0.124150
196    0.120294
197    0.131272
198    0.131117
199    0.127551
Name: desc_freq, Length:

In [None]:
def parse_for_less(df, column, new_column_name):
    prop_noun_parse = []
    for i in range(len(df[column])):
        non_PN = []
        blob = nlp(unicode(df[column][i]))
        for each in blob:
            if each.tag_ in [u'ADJ', u'JJ', u'JJR', u'VBN', u'VBD']:
                non_PN.append(str(each))
        prop_noun_parse.append((df['reviewid'][i], " ".join(non_PN)))

    df_new = pd.DataFrame(prop_noun_parse, columns =['reviewid', new_column_name])
    return df.merge(df_new)

In [None]:
from wordcloud import WordCloud, STOPWORDS

In [None]:
stopwords = set(STOPWORDS)
stopwords.add("final")


In [None]:
word_dict = {}
word_dict[2]

In [None]:
def make_agg_cloud(series, subj):
    word_dict = {}
    for each in range(len(series)):
        print each
        for word in series[each].split():
            word_dict[word] = TextBlob(word).sentiment[1] + .00001
        

    wordcloud = WordCloud(stopwords=stopwords)
    wordcloud.generate_from_frequencies(frequencies=word_dict, max_font_size=40)
    plt.figure(figsize=(8, 6))
    plt.title('Subjective Words with From Abstract With Score of {}'.format(subj))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [None]:
df = df.dropna()

In [None]:
df[(df['abs_subjectivity'] > .6) & (df['abs_subjectivity'] < .61) & \
    (df['new_album'] == 1)]           \
    [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').sample(10).reset_index()

In [None]:
series0 = df[(df['cont_subjectivity'] > .3) & (df['cont_subjectivity'] < .32) & (df['score'] > .8) & \
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').sample(2).reset_index() \
    ['content']
    
series1 = df[(df['cont_subjectivity'] > .4) & (df['cont_subjectivity'] < .42) & (df['score'] > .8) &\
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').sample(2).reset_index() \
    ['content']
    
series2 = df[(df['cont_subjectivity'] > .5) & (df['cont_subjectivity'] < .52) & (df['score'] > .8) &\
    (df['new_album'] == 1)]           \
    [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').sample(2).reset_index() \
    ['content']
    
series3 = df[(df['cont_subjectivity'] > .6) & (df['cont_subjectivity'] < .62) & (df['score'] > .8) &\
    (df['new_album'] == 1)]           \
    [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').sample(2).reset_index() \
    ['content']

In [None]:
make_agg_cloud(series3, .6)

In [None]:
make_agg_cloud(series2, .5)

In [None]:
make_agg_cloud(series1, .4)

In [None]:
make_agg_cloud(series0, .3)

In [None]:
def make_cloud(sent, subj):
    word_dict = {}
    for word in sent.split():
        word_dict[word] = TextBlob(word).sentiment[1] + .00001

    wordcloud = WordCloud(stopwords=stopwords)
    wordcloud.generate_from_frequencies(frequencies=word_dict)
    plt.figure(figsize=(8, 6))
    plt.title('Subjective Words with From Abstract With Score of {}'.format(subj))
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()

In [None]:
# .4 subjectivity
a = df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').reset_index() \
    ['content'][1]

In [None]:
make_cloud(a, .4)

In [None]:
# .45 subjectivity
b = df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').reset_index() \
    ['content'][4]

In [None]:
make_cloud(b, .45)

In [None]:
# .5 subjectivity
c = df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').reset_index() \
    ['content'][8]

In [None]:
make_cloud(c, .5)

In [None]:
# .55 subjectivity
d = df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'content', 'abs_subjectivity']].sort_values('abs_subjectivity').reset_index() \
    ['content'][11]

In [None]:
make_cloud(d, .55)

In [None]:
df[(df['artist'] == 'sufjan stevens') & 
   (df['new_album'] == 1)]           \
   [['abstract', 'title', 'score', 'cont_subjectivity']].sort_values('cont_subjectivity').reset_index() \
    

In [None]:
df[(df['abs_subjectivity'] > .4) & \
   (df['abs_subjectivity'] < .41) & \
   (df['best_new_music'] == 1) &    \
   (df['new_album'] == 1)]           \
    ['abstract']

In [None]:
d = df[(df['abs_subjectivity'] > .45) & \
   (df['abs_subjectivity'] < .46) & \
   (df['best_new_music'] == 1) &    \
   (df['new_album'] == 1)]           \
    ['abstract'][8568]

In [None]:
df[(df['abs_subjectivity'] > .5) & \
   (df['abs_subjectivity'] < .51) & \
   (df['best_new_music'] == 1) &    \
   (df['new_album'] == 1)]           \
    ['abstract'][670]

In [80]:
kida = """had never even seen a shooting star before. 25 years of rotations, passes through comets' paths, and travel, and to my memory I had never witnessed burning debris scratch across the night sky. Radiohead were hunched over their instruments. Thom Yorke slowly beat on a grand piano, singing, eyes closed, into his microphone like he was trying to kiss around a big nose. Colin Greenwood tapped patiently on a double bass, waiting for his cue. White pearls of arena light swam over their faces. A lazy disco light spilled artificial constellations inside the aluminum cove of the makeshift stage. The metal skeleton of the stage ate one end of Florence's Piazza Santa Croce, on the steps of the Santa Croce Cathedral. Michelangelo's bones and cobblestone laid beneath. I stared entranced, soaking in Radiohead's new material, chiseling each sound into the best functioning parts of my brain which would be the only sound system for the material for months.

The butterscotch lamps along the walls of the tight city square bled upward into the cobalt sky, which seemed as strikingly artificial and perfect as a wizard's cap. The staccato piano chords ascended repeatedly. "Black eyed angels swam at me," Yorke sang like his dying words. "There was nothing to fear, nothing to hide." The trained critical part of me marked the similarity to Coltrane's "Ole." The human part of me wept in awe.

The Italians surrounding me held their breath in communion (save for the drunken few shouting "Criep!"). Suddenly, a rise of whistles and orgasmic cries swept unfittingly through the crowd. The song, "Egyptian Song," was certainly momentous, but wasn't the response more apt for, well, "Creep?" I looked up. I thought it was fireworks. A teardrop of fire shot from space and disappeared behind the church where the syrupy River Arno crawled. Radiohead had the heavens on their side.

For further testament, Chip Chanko and I both suffered auto-debilitating accidents in the same week, in different parts of the country, while blasting "Airbag" in our respective Japanese imports. For months, I feared playing the song about car crashes in my car, just as I'd feared passing 18- wheelers after nearly being crushed by one in 1990. With good reason, I suspect Radiohead to possess incomprehensible powers. The evidence is only compounded with Kid A-- the rubber match in the band's legacy-- an album which completely obliterates how albums, and Radiohead themselves, will be considered.

Even the heralded OK Computer has been nudged down one spot in Valhalla. Kid A makes rock and roll childish. Considerations on its merits as "rock" (i.e. its radio fodder potential, its guitar riffs, and its hooks) are pointless. Comparing this to other albums is like comparing an aquarium to blue construction paper. And not because it's jazz or fusion or ambient or electronic. Classifications don't come to mind once deep inside this expansive, hypnotic world. Ransom, the philologist hero of C.S. Lewis' Out of the Silent Planet who is kidnapped and taken to another planet, initially finds his scholarship useless in his new surroundings, and just tries to survive the beautiful new world.

This is an emotional, psychological experience. Kid A sounds like a clouded brain trying to recall an alien abduction. It's the sound of a band, and its leader, losing faith in themselves, destroying themselves, and subsequently rebuilding a perfect entity. In other words, Radiohead hated being Radiohead, but ended up with the most ideal, natural Radiohead record yet.

"Everything in Its Right Place" opens like Close Encounters spaceships communicating with pipe organs. As your ears decide whether the tones are coming or going, Thom Yorke's Cuisinarted voice struggles for its tongue. "Everything," Yorke belts in uplifting sighs. The first-person mantra of "There are two colors in my head" is repeated until the line between Yorke's mind and the listener's mind is erased.

Skittering toy boxes open the album's title song, which, like the track "Idioteque," shows a heavy Warp Records influence. The vocoder lullaby lulls you deceivingly before the riotous "National Anthem." Mean, fuzzy bass shapes the spine as unnerving theremin choirs limn. Brash brass bursts from above like Terry Gilliam's animated foot. The horns swarm as Yorke screams, begs, "Turn it off!" It's the album's shrill peak, but just one of the incessant goosebumps raisers.

After the rockets exhaust, Radiohead float in their lone orbit. "How to Disappear Completely" boils down "Let Down" and "Karma Police" to their spectral essence. The string-laden ballad comes closest to bridging Yorke's lyrical sentiment to the instrumental effect. "I float down the Liffey/ I'm not here/ This isn't happening," he sings in his trademark falsetto. The strings melt and weep as the album shifts into its underwater mode. "Treefingers," an ambient soundscape similar in sound and intent to Side B of Bowie and Eno's Low, calms after the record's emotionally strenuous first half.

The primal, brooding guitar attack of "Optimistic" stomps like mating Tyrannosaurs. The lyrics seemingly taunt, "Try the best you can/ Try the best you can," before revealing the more resigned sentiment, "The best you can is good enough." For an album reportedly "lacking" in traditional Radiohead moments, this is the best summation of their former strengths. The track erodes into a light jam before morphing into "In Limbo." "I'm lost at sea," Yorke cries over clean, uneasy arpeggios. The ending flares with tractor beams as Yorke is vacuumed into nothingness. The aforementioned "Idioteque" clicks and thuds like Aphex Twin and Bjork's Homogenic, revealing brilliant new frontiers for the "band." For all the noise to this point, it's uncertain entirely who or what has created the music. There are rarely traditional arrangements in the ambiguous origin. This is part of the unique thrill of experiencing Kid A.

Pulsing organs and a stuttering snare delicately propel "Morning Bell." Yorke's breath can be heard frosting over the rainy, gray jam. Words accumulate and stick in his mouth like eye crust. "Walking walking walking walking," he mumbles while Jonny Greenwood squirts whale-chant feedback from his guitar. The closing "Motion Picture Soundtrack" brings to mind The White Album, as it somehow combines the sentiment of Lennon's LP1 closer-- the ode to his dead mother, "Julia"-- with Ringo and Paul's maudlin, yet sincere LP2 finale, "Goodnight." Pump organ and harp flutter as Yorke condones with affection, "I think you're crazy." To further emphasize your feeling at that moment and the album's overall theme, Yorke bows out with "I will see you in the next life." If you're not already there with him.

The experience and emotions tied to listening to Kid A are like witnessing the stillborn birth of a child while simultaneously having the opportunity to see her play in the afterlife on Imax. It's an album of sparking paradox. It's cacophonous yet tranquil, experimental yet familiar, foreign yet womb-like, spacious yet visceral, textured yet vaporous, awakening yet dreamlike, infinite yet 48 minutes. It will cleanse your brain of those little crustaceans of worries and inferior albums clinging inside the fold of your gray matter. The harrowing sounds hit from unseen angles and emanate with inhuman genesis. When the headphones peel off, and it occurs that six men (Nigel Godrich included) created this, it's clear that Radiohead must be the greatest band alive, if not the best since you know who. Breathing people made this record! And you can't wait to dive back in and try to prove that wrong over and over."""

In [82]:
df_kida = make_POS_df(kida, nlp)

In [101]:
df_kida['tag'].value_counts() / len(kida.split()) - df_wiki['tag'].value_counts() / len(wiki.split())

''            NaN
,       -0.015747
-LRB-         NaN
-RRB-         NaN
.        0.019218
:             NaN
CC      -0.021445
CD      -0.030377
DT      -0.002227
EX            NaN
FW            NaN
HYPH    -0.002951
IN       0.016386
JJ       0.031647
JJR           NaN
JJS     -0.004831
MD            NaN
NN      -0.003725
NNP     -0.079736
NNPS          NaN
NNS     -0.011442
PDT           NaN
POS      0.007295
PRP      0.011903
PRP$     0.013500
RB       0.035590
RBR           NaN
RBS           NaN
RP            NaN
TO       0.000020
UH      -0.008873
VB       0.016996
VBD     -0.034643
VBG      0.029122
VBN     -0.029549
VBP           NaN
VBZ      0.031265
WDT           NaN
WP            NaN
WRB     -0.000263
_SP      0.001899
``            NaN
Name: tag, dtype: float64

In [94]:
wiki = """Kid A is the fourth studio album by the English rock band Radiohead, released on 2 October 2000 by Parlophone. After having suffered a breakdown promoting Radiohead's acclaimed 1997 album OK Computer, songwriter Thom Yorke envisioned a radical change in direction. The band replaced their rock sound with synthesisers, drum machines, the ondes Martenot, string orchestras and brass instruments, and incorporated influences from electronic music, krautrock, jazz, and 20th-century classical music. They recorded Kid A with OK Computer producer Nigel Godrich in Paris, Copenhagen, Gloucestershire and their hometown Oxford, England. The sessions produced over 20 tracks, and Radiohead split the work into two albums: Kid A, and Amnesiac, released the following year.

Radiohead released no singles or music videos to promote Kid A and conducted few interviews and photoshoots. They became instead one of the first major acts to use the internet as a promotional tool; the album was made available to stream and was promoted with short animated films featuring music and artwork. Bootlegs of early performances were shared on file sharing services, and the album was leaked before release.

Kid A debuted at the top of the charts in Britain, where it went platinum in the first week, and it became Radiohead's first number-one album in the United States. Like OK Computer, it won a Grammy for Best Alternative Album and was nominated for Album of the Year. Its departure from Radiohead's earlier sound divided fans and critics, but it later attracted widespread acclaim. At the turn of the decade, Rolling Stone, Pitchfork and the Times ranked Kid A the greatest album of the 2000s. In 2012, Rolling Stone ranked it number 67 on its list of the 500 greatest albums of all time."""

In [95]:
df_wiki = make_POS_df(wiki, nlp)

In [100]:
df_wiki['tag'].value_counts() / len(wiki.split())

NN      0.167832
NNP     0.157343
IN      0.115385
DT      0.104895
,       0.080420
NNS     0.076923
VBD     0.062937
JJ      0.062937
CC      0.059441
VBN     0.048951
.       0.045455
CD      0.038462
PRP     0.024476
PRP$    0.013986
VB      0.010490
JJS     0.010490
TO      0.010490
POS     0.010490
UH      0.010490
RB      0.010490
VBG     0.010490
_SP     0.006993
HYPH    0.006993
:       0.006993
WRB     0.003497
VBZ     0.003497
JJR     0.003497
Name: tag, dtype: float64