In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# read in csv file
file = 'full_scrape_20yr'
meta_df = pd.read_csv(file)
# display columns for sql tables
meta_df.columns


Index(['artist', 'album', 'date', 'week_num', 'meta_score', 'user_score',
       'pickle_name', 'album_img', 'crit_rev_num', 'user_rev_num',
       'record_label', 'album_genre'],
      dtype='object')

In [3]:
# create dataframe without pickle name column
meta_sql_df = meta_df.drop(columns=['pickle_name'])
meta_sql_df.head()

Unnamed: 0,artist,album,date,week_num,meta_score,user_score,album_img,crit_rev_num,user_rev_num,record_label,album_genre
0,The Streets,Original Pirate Material,"October 22, 2002",43,90,87,https://static.metacritic.com/images/products/...,25.0,87,Locked On / Vice,Electronic
1,Tom Waits,Alice,"May 7, 2002",19,90,85,https://static.metacritic.com/images/products/...,20.0,86,Epitaph / Anti-,Rock
2,Queens of the Stone Age,Songs For The Deaf,"August 27, 2002",35,89,89,https://static.metacritic.com/images/products/...,23.0,89,Interscope,Rock
3,Spoon,Kill The Moonlight,"August 20, 2002",34,88,88,https://static.metacritic.com/images/products/...,21.0,88,Merge,Indie
4,Wilco,Yankee Hotel Foxtrot,"April 23, 2002",17,87,89,https://static.metacritic.com/images/products/...,26.0,89,Nonesuch,Rock


In [4]:
meta_sql_df.describe()

Unnamed: 0,week_num,meta_score,user_score,crit_rev_num,user_rev_num
count,9319.0,9319.0,9319.0,9106.0,9319.0
mean,25.162786,75.60661,59.050113,14.829673,58.743642
std,13.618921,7.106779,35.605379,7.466144,35.791688
min,1.0,15.0,0.0,0.0,0.0
25%,13.0,73.0,0.0,9.0,0.0
50%,24.0,76.0,78.0,13.0,78.0
75%,38.0,80.0,84.0,19.0,84.0
max,52.0,99.0,95.0,51.0,93.0


In [5]:
bins = [0, 59.9, 69.9, 79.9, 89.9, 100]

# Create the names for the five bins
group_names = ["60 and under", "60s", "70s", "80s", "90s"]

meta_sql_df["meta_groups"] = pd.cut(meta_sql_df["meta_score"], bins, labels=group_names, include_lowest=True)
meta_sql_df.head()


Unnamed: 0,artist,album,date,week_num,meta_score,user_score,album_img,crit_rev_num,user_rev_num,record_label,album_genre,meta_groups
0,The Streets,Original Pirate Material,"October 22, 2002",43,90,87,https://static.metacritic.com/images/products/...,25.0,87,Locked On / Vice,Electronic,90s
1,Tom Waits,Alice,"May 7, 2002",19,90,85,https://static.metacritic.com/images/products/...,20.0,86,Epitaph / Anti-,Rock,90s
2,Queens of the Stone Age,Songs For The Deaf,"August 27, 2002",35,89,89,https://static.metacritic.com/images/products/...,23.0,89,Interscope,Rock,80s
3,Spoon,Kill The Moonlight,"August 20, 2002",34,88,88,https://static.metacritic.com/images/products/...,21.0,88,Merge,Indie,80s
4,Wilco,Yankee Hotel Foxtrot,"April 23, 2002",17,87,89,https://static.metacritic.com/images/products/...,26.0,89,Nonesuch,Rock,80s


In [18]:
label_df = meta_sql_df.groupby(["record_label", "meta_groups"]).count()
# grouped_multiple = df.groupby(['Team', 'Pos']).agg({'Age': ['mean', 'min', 'max']})
label_df.reset_index(inplace=True)
label_df

Unnamed: 0,record_label,meta_groups,artist,album,date,week_num,meta_score,user_score,album_img,crit_rev_num,user_rev_num,album_genre
0,!K7,60 and under,0,0,0,0,0,0,0,0,0,0
1,!K7,60s,0,0,0,0,0,0,0,0,0,0
2,!K7,70s,10,10,10,10,10,10,10,10,10,10
3,!K7,80s,4,4,4,4,4,4,4,4,4,4
4,!K7,90s,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
8965,~scape,60 and under,0,0,0,0,0,0,0,0,0,0
8966,~scape,60s,0,0,0,0,0,0,0,0,0,0
8967,~scape,70s,0,0,0,0,0,0,0,0,0,0
8968,~scape,80s,1,1,1,1,1,1,1,1,1,1


In [7]:
label_df_na = meta_sql_df[meta_sql_df['record_label'].isna()]

In [8]:
label_df_na

Unnamed: 0,artist,album,date,week_num,meta_score,user_score,album_img,crit_rev_num,user_rev_num,record_label,album_genre,meta_groups
25,Sigur Rós,(),"October 29, 2002",44,82,88,,,80,,,80s
40,Röyksopp,Melody A.M.,"October 15, 2002",42,81,87,,,86,,,80s
236,Sinéad O'Connor,Sean-Nós Nua,"October 8, 2002",41,65,80,,,79,,,60s
244,The Jon Spencer Blues Explosion,Plastic Fang,"April 9, 2002",15,64,0,,,0,,,60s
320,Chance the Rapper,Coloring Book [Mixtape],"May 13, 2016",19,89,83,https://static.metacritic.com/images/products/...,21.0,83,,Rap,80s
...,...,...,...,...,...,...,...,...,...,...,...,...
9140,Arctic Monkeys,Tranquility Base Hotel + Casino,"May 11, 2018",19,76,72,,,0,,,70s
9145,Felicita,hej!,"August 3, 2018",31,76,66,https://static.metacritic.com/images/products/...,8.0,66,,Dance,70s
9146,Kamaal Williams,The Return,"May 25, 2018",21,76,0,https://static.metacritic.com/images/products/...,9.0,0,,Pop,70s
9159,Florence + the Machine,High As Hope,"June 29, 2018",26,75,85,,,0,,,70s
