In [89]:
import pandas as pd
import re
import datetime

This document is purely for practicing reshaping dataframes using functions such as pd.melt(). I followed along from this blogpost: https://www.jeannicholashould.com/tidy-data-in-python.html which introduced me to a number of new concepts in reshaping and tidying data. 

In [90]:
df = pd.read_csv('pew-raw.csv')
df

Unnamed: 0,religion,<$10k,$10-20k,$20-30k,$30-40k,$40-50k,$50-75k
0,Agnostic,27,34,60,81,76,137
1,Atheist,12,27,37,52,35,70
2,Buddhist,27,21,30,34,33,58
3,Catholic,418,617,732,670,638,1116
4,Dont know/refused,15,14,15,11,10,35
5,Evangelical Prot,575,869,1064,982,881,1486
6,Hindu,1,9,7,9,11,34
7,Historically Black Prot,228,244,236,238,197,223
8,Jehovahs Witness,20,27,24,24,21,30
9,Jewish,19,19,25,25,30,95


In [91]:
df1 = pd.melt(frame=df, id_vars=['religion'], var_name='Income', value_name='Count')

df1 = df1.sort_values(by=['religion'])

df1.head()

Unnamed: 0,religion,Income,Count
0,Agnostic,<$10k,27
30,Agnostic,$30-40k,81
40,Agnostic,$40-50k,76
50,Agnostic,$50-75k,137
10,Agnostic,$10-20k,34


In [92]:
df = pd.read_csv('billboard.csv', encoding='mac_latin2')
df

Unnamed: 0,year,artist.inverted,track,time,genre,date.entered,date.peaked,x1st.week,x2nd.week,x3rd.week,...,x67th.week,x68th.week,x69th.week,x70th.week,x71st.week,x72nd.week,x73rd.week,x74th.week,x75th.week,x76th.week
0,2000,Destiny's Child,Independent Women Part I,3:38,Rock,2000-09-23,2000-11-18,78,63.0,49.0,...,,,,,,,,,,
1,2000,Santana,"Maria, Maria",4:18,Rock,2000-02-12,2000-04-08,15,8.0,6.0,...,,,,,,,,,,
2,2000,Savage Garden,I Knew I Loved You,4:07,Rock,1999-10-23,2000-01-29,71,48.0,43.0,...,,,,,,,,,,
3,2000,Madonna,Music,3:45,Rock,2000-08-12,2000-09-16,41,23.0,18.0,...,,,,,,,,,,
4,2000,"Aguilera, Christina",Come On Over Baby (All I Want Is You),3:38,Rock,2000-08-05,2000-10-14,57,47.0,45.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312,2000,Ghostface Killah,Cherchez LaGhost,3:04,R&B,2000-08-05,2000-08-05,98,,,...,,,,,,,,,,
313,2000,"Smith, Will",Freakin' It,3:58,Rap,2000-02-12,2000-02-12,99,99.0,99.0,...,,,,,,,,,,
314,2000,Zombie Nation,Kernkraft 400,3:30,Rock,2000-09-02,2000-09-02,99,99.0,,...,,,,,,,,,,
315,2000,"Eastsidaz, The",Got Beef,3:58,Rap,2000-07-01,2000-07-01,99,99.0,,...,,,,,,,,,,


In [93]:
id_vars = ["year",
           "artist.inverted",
           "track",
           "time",
           "genre",
           "date.entered",
           "date.peaked"]
df1 = pd.melt(frame=df, id_vars=id_vars, var_name='week', value_name='rank')

df1

Unnamed: 0,year,artist.inverted,track,time,genre,date.entered,date.peaked,week,rank
0,2000,Destiny's Child,Independent Women Part I,3:38,Rock,2000-09-23,2000-11-18,x1st.week,78.0
1,2000,Santana,"Maria, Maria",4:18,Rock,2000-02-12,2000-04-08,x1st.week,15.0
2,2000,Savage Garden,I Knew I Loved You,4:07,Rock,1999-10-23,2000-01-29,x1st.week,71.0
3,2000,Madonna,Music,3:45,Rock,2000-08-12,2000-09-16,x1st.week,41.0
4,2000,"Aguilera, Christina",Come On Over Baby (All I Want Is You),3:38,Rock,2000-08-05,2000-10-14,x1st.week,57.0
...,...,...,...,...,...,...,...,...,...
24087,2000,Ghostface Killah,Cherchez LaGhost,3:04,R&B,2000-08-05,2000-08-05,x76th.week,
24088,2000,"Smith, Will",Freakin' It,3:58,Rap,2000-02-12,2000-02-12,x76th.week,
24089,2000,Zombie Nation,Kernkraft 400,3:30,Rock,2000-09-02,2000-09-02,x76th.week,
24090,2000,"Eastsidaz, The",Got Beef,3:58,Rap,2000-07-01,2000-07-01,x76th.week,


In [94]:
df1['week'] = df1['week'].str.extract('(\d+)').astype(int)
df1 = df1.dropna()
df1['rank'] = df1['rank'].astype(int)
df1['date'] = pd.to_datetime(df1['date.entered']) + pd.to_timedelta(df1['week'], unit='w') - pd.DateOffset(weeks=1)
df1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['rank'] = df1['rank'].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df1['date'] = pd.to_datetime(df1['date.entered']) + pd.to_timedelta(df1['week'], unit='w') - pd.DateOffset(weeks=1)


Unnamed: 0,year,artist.inverted,track,time,genre,date.entered,date.peaked,week,rank,date
0,2000,Destiny's Child,Independent Women Part I,3:38,Rock,2000-09-23,2000-11-18,1,78,2000-09-23
1,2000,Santana,"Maria, Maria",4:18,Rock,2000-02-12,2000-04-08,1,15,2000-02-12
2,2000,Savage Garden,I Knew I Loved You,4:07,Rock,1999-10-23,2000-01-29,1,71,1999-10-23
3,2000,Madonna,Music,3:45,Rock,2000-08-12,2000-09-16,1,41,2000-08-12
4,2000,"Aguilera, Christina",Come On Over Baby (All I Want Is You),3:38,Rock,2000-08-05,2000-10-14,1,57,2000-08-05
...,...,...,...,...,...,...,...,...,...,...
19663,2000,Lonestar,Amazed,4:25,Country,1999-06-05,2000-03-04,63,45,2000-08-12
19700,2000,Creed,Higher,5:16,Rock,1999-09-11,2000-07-22,63,50,2000-11-18
19980,2000,Lonestar,Amazed,4:25,Country,1999-06-05,2000-03-04,64,50,2000-08-19
20017,2000,Creed,Higher,5:16,Rock,1999-09-11,2000-07-22,64,50,2000-11-25


In [95]:
df1 = df1.sort_values(ascending=True, by=["year","artist.inverted","track","week","rank"]).reset_index(drop=True)
df1

Unnamed: 0,year,artist.inverted,track,time,genre,date.entered,date.peaked,week,rank,date
0,2000,2 Pac,Baby Don't Cry (Keep Ya Head Up II),4:22,Rap,2000-02-26,2000-03-11,1,87,2000-02-26
1,2000,2 Pac,Baby Don't Cry (Keep Ya Head Up II),4:22,Rap,2000-02-26,2000-03-11,2,82,2000-03-04
2,2000,2 Pac,Baby Don't Cry (Keep Ya Head Up II),4:22,Rap,2000-02-26,2000-03-11,3,72,2000-03-11
3,2000,2 Pac,Baby Don't Cry (Keep Ya Head Up II),4:22,Rap,2000-02-26,2000-03-11,4,77,2000-03-18
4,2000,2 Pac,Baby Don't Cry (Keep Ya Head Up II),4:22,Rap,2000-02-26,2000-03-11,5,87,2000-03-25
...,...,...,...,...,...,...,...,...,...,...
5302,2000,matchbox twenty,Bent,4:12,Rock,2000-04-29,2000-07-22,35,33,2000-12-23
5303,2000,matchbox twenty,Bent,4:12,Rock,2000-04-29,2000-07-22,36,37,2000-12-30
5304,2000,matchbox twenty,Bent,4:12,Rock,2000-04-29,2000-07-22,37,38,2001-01-06
5305,2000,matchbox twenty,Bent,4:12,Rock,2000-04-29,2000-07-22,38,38,2001-01-13
