In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Spotify Data, Pitchfork Reviews

First, we are going to upload to the acoustic_features.csv, albums.csv, and reviews.csv file.

In [68]:
acoustics = pd.read_csv("acoustic_features.csv")
billboard = pd.read_csv("albums.csv")
pitchfork = pd.read_csv("reviews.csv")

In [62]:
billboard

Unnamed: 0,index,id,date,artist,album,rank,length,track_length
0,0,1,,,,,,
1,1,2,2019-01-19,A Boogie Wit da Hoodie,Hoodie SZN,1.0,20.0,185233.800000
2,2,3,2019-01-19,21 Savage,I Am > I Was,2.0,15.0,211050.733333
3,3,4,2019-01-19,Soundtrack,Spider-Man: Into The Spider-Verse,3.0,13.0,190866.384615
4,4,5,2019-01-19,Meek Mill,Championships,4.0,19.0,219173.894737
5,5,6,2019-01-19,Post Malone,beerbongs & bentleys,5.0,18.0,214113.611111
6,6,7,2019-01-19,Travis Scott,ASTROWORLD,6.0,17.0,207191.823529
7,7,8,2019-01-19,Lady Gaga & Bradley Cooper,A Star Is Born (Soundtrack),7.0,19.0,220006.000000
8,8,9,2019-01-19,Drake,Scorpion,8.0,25.0,217055.640000
9,9,10,2019-01-19,Kodak Black,Dying To Live,9.0,16.0,179588.750000


## Size of each data set:

In [3]:
print("The size of the acoustics dataset is:","{:,}".format(acoustics.shape[0]), "rows.")
print("The size of the billboard dataset is:","{:,}".format(billboard.shape[0]), "rows.")
print("The size of the pitchfork dataset is:","{:,}".format(pitchfork.shape[0]), "rows.")

The size of the acoustics dataset is: 339,855 rows.
The size of the billboard dataset is: 573,947 rows.
The size of the pitchfork dataset is: 20,873 rows.


## Data Cleaning: Create Subsets of Data

In [4]:
mini_acoustics = acoustics.iloc[0:48,:]
mini_billboard = billboard.iloc[0:4,:].drop([0]).reset_index().drop(["level_0","index", "id"], axis = 1)
mini_pitchfork = pitchfork.iloc[0:100,:]
mini_pitchfork

Unnamed: 0,index,id,artist,album,genre,score,date,author,role,review,bnm,link
0,0,0,David Byrne,“…The Best Live Show of All Time” — NME EP,Rock,5.5,January 11 2019,Andy Beta,Contributor,"Viva Brother, Terris, Mansun, the Twang, Joe L...",0,https://pitchfork.com/reviews/albums/david-byr...
1,1,1,DJ Healer,Lost Lovesongs / Lostsongs Vol. 2,Electronic,6.2,January 11 2019,Chal Ravens,Contributor,"The Prince of Denmark—that is, the proper prin...",0,https://pitchfork.com/reviews/albums/dj-healer...
2,2,2,Jorge Velez,Roman Birds,Electronic,7.9,January 10 2019,Philip Sherburne,Contributing Editor,"Jorge Velez has long been prolific, but that’s...",0,https://pitchfork.com/reviews/albums/jorge-vel...
3,3,3,Chandra,Transportation EPs,Rock,7.8,January 10 2019,Andy Beta,Contributor,When the Avalanches returned in 2016 after an ...,0,https://pitchfork.com/reviews/albums/chandra-t...
4,4,4,The Chainsmokers,Sick Boy,Electronic,3.1,January 9 2019,Larry Fitzmaurice,Contributor,We’re going to be stuck with the Chainsmokers ...,0,https://pitchfork.com/reviews/albums/the-chain...
5,5,5,Silent Servant,Shadows of Death and Desire,Electronic,7.8,January 9 2019,Harley Brown,Contributor,Ever since Mudd Club DJ Johnny Dynell played I...,0,https://pitchfork.com/reviews/albums/silent-se...
6,6,6,A Boogie Wit Da Hoodie,Hoodie SZN,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
7,7,7,Çaykh,V I S C 0 9,Experimental,7.3,January 8 2019,Tom Hawking,Contributor,"The Berlin-based producer and DJ Çaykh, born N...",0,https://pitchfork.com/reviews/albums/caykh-v-i...
8,8,8,The-Dream,"Ménage à Trois: Sextape Vol. 1, 2, 3",Pop/R&B,7.4,January 7 2019,Sheldon Pearce,Contributing Writer,Much of The-Dream’s underappreciated body of w...,0,https://pitchfork.com/reviews/albums/the-dream...
9,9,9,Doug Paisley,Starter Home,Folk/Country,7.7,January 7 2019,Amanda Wicks,Associate Staff Writer,"For a decade, Canadian singer/songwriter Doug ...",0,https://pitchfork.com/reviews/albums/doug-pais...


In [5]:
merged_df = pd.merge(mini_acoustics,mini_billboard, on = "album")
merged_df

Unnamed: 0,index,id,song,album,artist_x,acousticness,danceability,duration_ms,energy,instrumentalness,...,tempo,time_signature,valence,album_id,date_x,date_y,artist_y,rank,length,track_length
0,0,0Veyvc3n9AcLSoK3r1dA12,Voices In My Head,Hoodie SZN,A Boogie Wit da Hoodie,0.0555,0.754,142301.0,0.663,0.0,...,90.195,4.0,0.207,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
1,1,77JzXZonNumWsuXKy9vr3U,Beasty,Hoodie SZN,A Boogie Wit da Hoodie,0.292,0.86,152829.0,0.418,0.0,...,126.023,4.0,0.374,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
2,2,18yllZD0TdF7ykcREib8Z1,I Did It,Hoodie SZN,A Boogie Wit da Hoodie,0.153,0.718,215305.0,0.454,4.6e-05,...,89.483,4.0,0.196,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
3,3,1wJRveJZLSb1rjhnUHQiv6,Swervin (feat. 6ix9ine),Hoodie SZN,A Boogie Wit da Hoodie,0.0153,0.581,189487.0,0.662,0.0,...,93.023,4.0,0.434,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
4,4,0jAfdqv18goRTUxm3ilRjb,Startender (feat. Offset and Tyga),Hoodie SZN,A Boogie Wit da Hoodie,0.0235,0.736,192779.0,0.622,0.0,...,191.971,4.0,0.506,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
5,5,3L19besdNQzd342qL78xqm,Demons and Angels (feat. Juice WRLD),Hoodie SZN,A Boogie Wit da Hoodie,0.0114,0.81,214593.0,0.55,6e-06,...,76.503,4.0,0.183,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
6,6,1Gno5IBUHh6vUKp4nxu2Yd,Love Drugs and Sex,Hoodie SZN,A Boogie Wit da Hoodie,0.55,0.762,157863.0,0.553,0.0,...,119.973,4.0,0.773,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
7,7,2A4vCndSkZ3jd5nfh3a8gc,Skeezers,Hoodie SZN,A Boogie Wit da Hoodie,0.558,0.792,198054.0,0.484,0.0,...,140.019,4.0,0.353,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
8,8,1srm5YnZiu7Ksnd2UTTmmk,Savage,Hoodie SZN,A Boogie Wit da Hoodie,0.188,0.78,169046.0,0.402,0.0,...,160.046,4.0,0.483,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8
9,9,3UhRsQVrELOATEiY7E9X2T,Come Closer (feat. Queen Naija),Hoodie SZN,A Boogie Wit da Hoodie,0.22,0.741,156512.0,0.556,0.0,...,140.056,4.0,0.616,3r5hf3Cj3EMh1C2saQ8jyt,2018-12-21,2019-01-19,A Boogie Wit da Hoodie,1.0,20.0,185233.8


In [6]:
merged_df = pd.merge(merged_df, pitchfork, on = "album")
merged_df

Unnamed: 0,index_x,id_x,song,album,artist_x,acousticness,danceability,duration_ms,energy,instrumentalness,...,id_y,artist,genre,score,date,author,role,review,bnm,link
0,0,0Veyvc3n9AcLSoK3r1dA12,Voices In My Head,Hoodie SZN,A Boogie Wit da Hoodie,0.0555,0.754,142301.0,0.663,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
1,1,77JzXZonNumWsuXKy9vr3U,Beasty,Hoodie SZN,A Boogie Wit da Hoodie,0.292,0.86,152829.0,0.418,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
2,2,18yllZD0TdF7ykcREib8Z1,I Did It,Hoodie SZN,A Boogie Wit da Hoodie,0.153,0.718,215305.0,0.454,4.6e-05,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
3,3,1wJRveJZLSb1rjhnUHQiv6,Swervin (feat. 6ix9ine),Hoodie SZN,A Boogie Wit da Hoodie,0.0153,0.581,189487.0,0.662,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
4,4,0jAfdqv18goRTUxm3ilRjb,Startender (feat. Offset and Tyga),Hoodie SZN,A Boogie Wit da Hoodie,0.0235,0.736,192779.0,0.622,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
5,5,3L19besdNQzd342qL78xqm,Demons and Angels (feat. Juice WRLD),Hoodie SZN,A Boogie Wit da Hoodie,0.0114,0.81,214593.0,0.55,6e-06,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
6,6,1Gno5IBUHh6vUKp4nxu2Yd,Love Drugs and Sex,Hoodie SZN,A Boogie Wit da Hoodie,0.55,0.762,157863.0,0.553,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
7,7,2A4vCndSkZ3jd5nfh3a8gc,Skeezers,Hoodie SZN,A Boogie Wit da Hoodie,0.558,0.792,198054.0,0.484,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
8,8,1srm5YnZiu7Ksnd2UTTmmk,Savage,Hoodie SZN,A Boogie Wit da Hoodie,0.188,0.78,169046.0,0.402,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
9,9,3UhRsQVrELOATEiY7E9X2T,Come Closer (feat. Queen Naija),Hoodie SZN,A Boogie Wit da Hoodie,0.22,0.741,156512.0,0.556,0.0,...,6,A Boogie Wit Da Hoodie,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...


**Problems:**
1. Pitchfork doesn't write reviews for all of the albums on the Billboard 200. For example, the Spider-Man: Into The Spider-Verse soundtrack was not covered by Pitchfork. The question is: How many albums on the Billboard 200 do not have Pitchfork Reviews? I could try merging the Pitchfork dataset with the Billboard dataset and then compare that to the original Billboard dataset and see what was emitted.

In [64]:
merged_df = pd.merge(billboard,pitchfork, on = "album")
print("Out of",billboard.shape[0],"albums,", billboard.shape[0] - merged_df.shape[0],"albums were not reviewed by Pitchfork.")
print("The proportion of billboard 200 albums reviewed is: ", merged_df.shape[0]/billboard.shape[0])
print("The difference between reviewered and not reviewed albums is: ",billboard.shape[0] - (billboard.shape[0] - merged_df.shape[0]))

Out of 573947 albums, 447319 albums were not reviewed by Pitchfork.
The proportion of billboard 200 albums reviewed is:  0.22062664322663939
The difference between reviewered and not reviewed albums is:  126628


1. (continued) Here we can see that only 22% of billboard 200 albums were reviewed by Pitchfork. This could be because below a certain threshold certain albums aren't considered relevant enough to be reviewed by Pitchfork.

Let's see if there's a billboard 200 threshold where this difference decreases. Perhaps, Pitchfork only reviews an album if they make it to the top 50 on the billboard 200. However, the billboard 200 dataset includes duplicates of albums and pitchfork only writes one review per album. In order to condense the billboard dataframe, I am going to use a groupby and find the minimum value of rank in order to find the highest ranking each album has achieved.

## String cleaning: lowercasing artist and album and then removing punctation

In [69]:
#Billboard preprocessing
artist = billboard["artist"]
artist = artist.str.lower()
artist = artist.str.replace("&","and")
artist
billboard["artist"] = artist

album = billboard["album"]
album = album.str.lower()
album = album.str.replace("&","and")
billboard["album"] = album
billboard

Unnamed: 0,index,id,date,artist,album,rank,length,track_length
0,0,1,,,,,,
1,1,2,2019-01-19,a boogie wit da hoodie,hoodie szn,1.0,20.0,185233.800000
2,2,3,2019-01-19,21 savage,i am > i was,2.0,15.0,211050.733333
3,3,4,2019-01-19,soundtrack,spider-man: into the spider-verse,3.0,13.0,190866.384615
4,4,5,2019-01-19,meek mill,championships,4.0,19.0,219173.894737
5,5,6,2019-01-19,post malone,beerbongs and bentleys,5.0,18.0,214113.611111
6,6,7,2019-01-19,travis scott,astroworld,6.0,17.0,207191.823529
7,7,8,2019-01-19,lady gaga and bradley cooper,a star is born (soundtrack),7.0,19.0,220006.000000
8,8,9,2019-01-19,drake,scorpion,8.0,25.0,217055.640000
9,9,10,2019-01-19,kodak black,dying to live,9.0,16.0,179588.750000


In [70]:
artist = pitchfork["artist"]
artist = artist.str.lower()
artist = artist.str.replace("&","and")
pitchfork["artist"] = artist

album = pitchfork["album"]
album = album.str.lower()
album = album.str.replace("&","and")
pitchfork["album"] = album
pitchfork

Unnamed: 0,index,id,artist,album,genre,score,date,author,role,review,bnm,link
0,0,0,david byrne,“…the best live show of all time” — nme ep,Rock,5.5,January 11 2019,Andy Beta,Contributor,"Viva Brother, Terris, Mansun, the Twang, Joe L...",0,https://pitchfork.com/reviews/albums/david-byr...
1,1,1,dj healer,lost lovesongs / lostsongs vol. 2,Electronic,6.2,January 11 2019,Chal Ravens,Contributor,"The Prince of Denmark—that is, the proper prin...",0,https://pitchfork.com/reviews/albums/dj-healer...
2,2,2,jorge velez,roman birds,Electronic,7.9,January 10 2019,Philip Sherburne,Contributing Editor,"Jorge Velez has long been prolific, but that’s...",0,https://pitchfork.com/reviews/albums/jorge-vel...
3,3,3,chandra,transportation eps,Rock,7.8,January 10 2019,Andy Beta,Contributor,When the Avalanches returned in 2016 after an ...,0,https://pitchfork.com/reviews/albums/chandra-t...
4,4,4,the chainsmokers,sick boy,Electronic,3.1,January 9 2019,Larry Fitzmaurice,Contributor,We’re going to be stuck with the Chainsmokers ...,0,https://pitchfork.com/reviews/albums/the-chain...
5,5,5,silent servant,shadows of death and desire,Electronic,7.8,January 9 2019,Harley Brown,Contributor,Ever since Mudd Club DJ Johnny Dynell played I...,0,https://pitchfork.com/reviews/albums/silent-se...
6,6,6,a boogie wit da hoodie,hoodie szn,Rap,6.8,January 8 2019,Alphonse Pierre,Staff Writer,"In New York, time moves at its own pace: Faceb...",0,https://pitchfork.com/reviews/albums/a-boogie-...
7,7,7,çaykh,v i s c 0 9,Experimental,7.3,January 8 2019,Tom Hawking,Contributor,"The Berlin-based producer and DJ Çaykh, born N...",0,https://pitchfork.com/reviews/albums/caykh-v-i...
8,8,8,the-dream,"ménage à trois: sextape vol. 1, 2, 3",Pop/R&B,7.4,January 7 2019,Sheldon Pearce,Contributing Writer,Much of The-Dream’s underappreciated body of w...,0,https://pitchfork.com/reviews/albums/the-dream...
9,9,9,doug paisley,starter home,Folk/Country,7.7,January 7 2019,Amanda Wicks,Associate Staff Writer,"For a decade, Canadian singer/songwriter Doug ...",0,https://pitchfork.com/reviews/albums/doug-pais...


## Removing duplicate albums in the billboard dataset

In [71]:
grouped_billboard = billboard.loc[billboard.groupby("album")["rank"].idxmin()]
len(grouped_billboard)

32936

Doing some data cleaning by removing unnecessary colums.

In [72]:
grouped_billboard = grouped_billboard.drop(["id","index"],axis=1).reset_index().drop(["index"], axis = 1)

Checking the unique value counts of album to see if it is equal to the number of rows.

In [73]:
import difflib

grouped_billboard.index = grouped_billboard.index.map(lambda x: difflib.get_close_matches(x, pitchfork.index)[0])

TypeError: 'int' object is not iterable

Now, to **merge** the the billboard dataframe and the pitchfork dataframe. We check the length of each dataframe so we can check to see how many albums didn't receive Pitchfork reviews.

In [44]:
merged_bbpitch = pd.merge(grouped_billboard, pitchfork, on = "album")
print("The length of the merged dataset is",len(merged_bbpitch),"\nThe length of the original billboard dataset is", len(grouped_billboard))
print("The length of the pitchfork dataset is", len(pitchfork))

The length of the merged dataset is 4295 
The length of the original billboard dataset is 32764
The length of the pitchfork dataset is 20873


In [58]:
diff_df.head(1)

Unnamed: 0,date_x,artist_x,album,rank,length,track_length,index,id,artist_y,genre,score,date_y,author,role,review,bnm,link
0,2018-03-31,xxxtentacion,,1.0,18.0,151254.666667,2244,2247,ed sheeran,Rock,2.8,March 10 2017,Laura Snapes,Contributor,Ed Sheeran needs you to know that he did not g...,0,https://pitchfork.com/reviews/albums/22960-div...


In [56]:
#Checking to see which artist names are different
diff_df = merged_bbpitch[merged_bbpitch["artist_x"] != merged_bbpitch["artist_y"]]
diff_df = diff_df.drop(['index_x','id_x'], columns = 1) #,"id_x","date_x","rank","length","track_length","index_y","id_y",], axis = 1)
diff_df

ValueError: Cannot specify both 'labels' and 'index'/'columns'

## Sorting by date

The following code is good for sorting by date, but the datasets need their strings cleaned before being merged

Below we look at all of the rows where the artist name in the billboard dataframe is different than the artist name in the pitchfork dataframe.

In [81]:
recent_merged[recent_merged["artist_x"] != recent_merged["artist_y"]]
recent_merged.loc[1561,"review"]

"The music Bo Anders Persson made in the late 1960s and early 70s may not have spread wide, but its influence went deep. While his Swedish groups Pärson Sound, International Harvester, and Träd, Gräs Och Stenar didn’t sell tons of records, for a certain cross-section of the underground, their sprawling swirl of psych, folk, noise, and\xa0abstraction\xa0was a sound ripe for worship.\xa0Bardo Pond, Acid Mothers Temple, and Sunburned Hand of the Man all owe their smoky trails in part to the paths that Persson’s bands blazed. Persson’s solo music might have made as much impact over the past five decades, if anyone had actually heard it. One piece did make a small dent: the 1967 tape-loop protest “Proteinimperialism,” released in 1970 on a split LP with fellow countryman\xa0Folke Rabe. But it turns out Persson had a lot more going on before he formed his avant-rock groups with fellow students at Stockholm’s Royal College of Music. Inspired to “play music the way I myself imagined that it sh

Cleaning up the new dataframe, giving it proper labels, etc.

In [69]:
recent_merged = recent_merged.rename(columns = {"date_x": "date of highest album rank", "artist_x": "artist"})

SyntaxError: invalid syntax (<ipython-input-69-c685068ba28f>, line 1)

So, that's an interesting find. It raises a couple questions: first of all, does pitchfork review albums that don't make it to the billboard 200? If so, how could I check that?

Right off the bat, it seems easiest just to check to find albums that are not in the pitchfork data.

In [51]:
pitch_series = set(pitchfork["album"])
unique_bill_series = pd.Series(list(set(billboard["album"])))
bill_series = unique_bill_series.isin(pitch_series)
#bill_series.sum()
len(pitch_series)
len(pitchfork)

20873

## Now begins the other stuff

In [None]:
top_10_albums = grouped_billboard[grouped_billboard["rank"] <= 10]
top_10_albums.head(10)

In [None]:
top_50_df = billboard[billboard["rank"] <= 50]
top_50_df.shape

In [None]:
merged_df = pd.merge(top_50_df,pitchfork, on = "album")
print("Out of",top_50_df.shape[0],"albums,", top_50_df.shape[0] - merged_df.shape[0],"albums were not reviewed by Pitchfork.")
print("The proportion of billboard 200 albums reviewed is: ", merged_df.shape[0]/top_50_df.shape[0])
print("However, pitchfork reviewed a total of:",pitchfork.shape[0],"albums.")

In [None]:
top_20_df = billboard[billboard["rank"] <= 20]
top_20_df.shape

In [None]:
merged_df = pd.merge(top_20_df,pitchfork, on = "album")
print("Out of",top_20_df.shape[0],"albums,", top_20_df.shape[0] - merged_df.shape[0],"albums were not reviewed by Pitchfork.")
print("The proportion of billboard 200 albums reviewed is: ", merged_df.shape[0]/top_20_df.shape[0])
print("However, pitchfork reviewed a total of:",pitchfork.shape[0],"albums.")

In [None]:
top_10_df = billboard[billboard["rank"] <= 10]
top_10_df.shape

In [None]:
merged_df = pd.merge(top_10_df,pitchfork, on = "album")
print("Out of",top_10_df.shape[0],"albums,", top_10_df.shape[0] - merged_df.shape[0],"albums were not reviewed by Pitchfork.")
print("The proportion of billboard 200 albums reviewed is: ", merged_df.shape[0]/top_10_df.shape[0])
print("However, pitchfork reviewed a total of:",pitchfork.shape[0],"albums.")

## Merge The Three Datasets Together

In [None]:
#albums_names = list(albums.columns.values)
#albums_names
albums.query("rank=='1.0'")

rough_df = pd.merge(acoustic,albums, on = "album")
rough_df.head(5)
rough_df.shape

In [None]:
albums.head(201)

In [None]:
reviews.head()
#print("... The Best Live Show of All Time ", reviews.iloc[0,9])

In [None]:
checked_df = final_df[final_df['artist_x'] != final_df['artist_y']]
len(checked_df)

In [None]:
second_df = final_df.drop(columns = ["id_x","album_id","length","artist_y"])

In [None]:
second_df = final_df.rename(index=str, columns={"artist_x": "artist","date_x": "release Date", "date_y": "billboard date"})
second_df.head()

In [None]:
final_df = pd.merge(final_df,reviews, on = "artist")

In [None]:
len(final_df)

In [None]:
checked_df = final_df[final_df['artist_x'] != final_df['artist_y']]