In [1]:
import pandas as pd
import numpy as np

This chapter objectives:
1. Unpivoting/melting/gathering columns into rows
2. Pivoting/casting/spreading rows into columns
3. Normalizing data by separating a dataframe into multiple tables
4. Assembling data from multiple parts

# 1. Unpivot Columns into Rows

- changing from "wide" dataframe into "long" dataframe
- means to convert every column names into values inside new column for every rows
- another new column will be created for the column name values
- __suitable when the column names have similar type__

Melt Function
- to transpose dataframe from "wide" table into "long" table
- used when columns name suitable to convert into rows (__unpivot__)

Parameters
- id_vars = telling pandas to take all other columns __(beside id_vars)__ , and combine them into one column.
- value_vars = column(s) to unpivot, default all columns except __id_vars__.
- var_name = Rename variable column.
- value_name = Rename value column.

## (a) Column with One Type

- when we convert column names into rows, new column is created
- column names become the data for new column and they are the same kind

Example 1


In [2]:
sdmn = pd.DataFrame.from_dict({
    'Name':['Tommy T','KSImon','W2S','Curryboy'],
    '8/4/2020':np.random.randint(120,900, size=(1,4))[0],
    '9/4/2020':np.random.randint(120,900, size=(1,4))[0],
    '10/4/2020':np.random.randint(120,900, size=(1,4))[0],
    '11/4/2020':np.random.randint(120,900, size=(1,4))[0]
}, orient='columns')

In [3]:
sdmn

Unnamed: 0,Name,8/4/2020,9/4/2020,10/4/2020,11/4/2020
0,Tommy T,601,464,734,275
1,KSImon,306,506,598,491
2,W2S,356,426,823,763
3,Curryboy,273,783,189,815


Since there are many date columns  __(type = date)__ , it is better to convert them into rows.

In [4]:
sdmn_long = sdmn.melt(id_vars=['Name'], var_name='Date', value_name='New subcribers (K)')

In [5]:
sdmn_long

Unnamed: 0,Name,Date,New subcribers (K)
0,Tommy T,8/4/2020,601
1,KSImon,8/4/2020,306
2,W2S,8/4/2020,356
3,Curryboy,8/4/2020,273
4,Tommy T,9/4/2020,464
5,KSImon,9/4/2020,506
6,W2S,9/4/2020,426
7,Curryboy,9/4/2020,783
8,Tommy T,10/4/2020,734
9,KSImon,10/4/2020,598


Example 2

In [6]:
billboard = pd.read_csv('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone/data/billboard.csv')

In [7]:
billboard.head()

Unnamed: 0,year,artist,track,time,date.entered,wk1,wk2,wk3,wk4,wk5,...,wk67,wk68,wk69,wk70,wk71,wk72,wk73,wk74,wk75,wk76
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,87,82.0,72.0,77.0,87.0,...,,,,,,,,,,
1,2000,2Ge+her,The Hardest Part Of ...,3:15,2000-09-02,91,87.0,92.0,,,...,,,,,,,,,,
2,2000,3 Doors Down,Kryptonite,3:53,2000-04-08,81,70.0,68.0,67.0,66.0,...,,,,,,,,,,
3,2000,3 Doors Down,Loser,4:24,2000-10-21,76,76.0,72.0,69.0,67.0,...,,,,,,,,,,
4,2000,504 Boyz,Wobble Wobble,3:35,2000-04-15,57,34.0,25.0,17.0,17.0,...,,,,,,,,,,


The dataframe is too wide. We can convert them into long since most of the columns are about rating in weeks __(type = week)__.

In [8]:
billboard_long = billboard.melt(id_vars=['year','artist','track','time','date.entered'], var_name='week', value_name='rating')

In [9]:
billboard_long

Unnamed: 0,year,artist,track,time,date.entered,week,rating
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk1,87.0
1,2000,2Ge+her,The Hardest Part Of ...,3:15,2000-09-02,wk1,91.0
2,2000,3 Doors Down,Kryptonite,3:53,2000-04-08,wk1,81.0
3,2000,3 Doors Down,Loser,4:24,2000-10-21,wk1,76.0
4,2000,504 Boyz,Wobble Wobble,3:35,2000-04-15,wk1,57.0
...,...,...,...,...,...,...,...
24087,2000,Yankee Grey,Another Nine Minutes,3:10,2000-04-29,wk76,
24088,2000,"Yearwood, Trisha",Real Live Woman,3:55,2000-04-01,wk76,
24089,2000,Ying Yang Twins,Whistle While You Tw...,4:19,2000-03-18,wk76,
24090,2000,Zombie Nation,Kernkraft 400,3:30,2000-09-02,wk76,


## (b) Column with Multiple Types

This part we will explore what to do when we have multiple column names that have more than one type

In [10]:
ebola = pd.read_csv('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone/data/country_timeseries.csv')

In [11]:
ebola.head()

Unnamed: 0,Date,Day,Cases_Guinea,Cases_Liberia,Cases_SierraLeone,Cases_Nigeria,Cases_Senegal,Cases_UnitedStates,Cases_Spain,Cases_Mali,Deaths_Guinea,Deaths_Liberia,Deaths_SierraLeone,Deaths_Nigeria,Deaths_Senegal,Deaths_UnitedStates,Deaths_Spain,Deaths_Mali
0,1/5/2015,289,2776.0,,10030.0,,,,,,1786.0,,2977.0,,,,,
1,1/4/2015,288,2775.0,,9780.0,,,,,,1781.0,,2943.0,,,,,
2,1/3/2015,287,2769.0,8166.0,9722.0,,,,,,1767.0,3496.0,2915.0,,,,,
3,1/2/2015,286,,8157.0,,,,,,,,3496.0,,,,,,
4,12/31/2014,284,2730.0,8115.0,9633.0,,,,,,1739.0,3471.0,2827.0,,,,,


In [12]:
# Most of the columns name are about number of cases and death for every country
ebola.columns

Index(['Date', 'Day', 'Cases_Guinea', 'Cases_Liberia', 'Cases_SierraLeone',
       'Cases_Nigeria', 'Cases_Senegal', 'Cases_UnitedStates', 'Cases_Spain',
       'Cases_Mali', 'Deaths_Guinea', 'Deaths_Liberia', 'Deaths_SierraLeone',
       'Deaths_Nigeria', 'Deaths_Senegal', 'Deaths_UnitedStates',
       'Deaths_Spain', 'Deaths_Mali'],
      dtype='object')

In [13]:
# We will be selecting Liberia and SierraLeone for number of cases and deaths in column
ebola_subset = ebola.loc[:5,['Date','Day','Cases_Liberia','Cases_SierraLeone','Deaths_Liberia','Deaths_SierraLeone']]

In [14]:
ebola_subset

Unnamed: 0,Date,Day,Cases_Liberia,Cases_SierraLeone,Deaths_Liberia,Deaths_SierraLeone
0,1/5/2015,289,,10030.0,,2977.0
1,1/4/2015,288,,9780.0,,2943.0
2,1/3/2015,287,8166.0,9722.0,3496.0,2915.0
3,1/2/2015,286,8157.0,,3496.0,
4,12/31/2014,284,8115.0,9633.0,3471.0,2827.0
5,12/28/2014,281,8018.0,9446.0,3423.0,2758.0


In [15]:
# First, we will melt the DataFrame
ebola_long = ebola_subset.melt(id_vars=['Date','Day'])

In [16]:
ebola_long.sample(n=5)

Unnamed: 0,Date,Day,variable,value
10,12/31/2014,284,Cases_SierraLeone,9633.0
19,1/4/2015,288,Deaths_SierraLeone,2943.0
22,12/31/2014,284,Deaths_SierraLeone,2827.0
1,1/4/2015,288,Cases_Liberia,
8,1/3/2015,287,Cases_SierraLeone,9722.0


As you can see in the variable column, the values can be categorized into two kind:
- status = whether it is death or cases
- country = whether it is from Liberia or Sierra Leone

Unlike in (a) where we have 
- sdmn table where __variable column values only on dates__ 
- billboard table where __variable column values only on week__

In [17]:
# Create a dataframe where it store variable column values that split into two columns 
# expand = If True, return DataFrame/MultiIndex expanding dimensionality.
#          If False (default), return Series/Index, containing lists of strings.
variable_split = ebola_long.variable.str.split('_', expand=True)

In [18]:
variable_split.sample(n=5)

Unnamed: 0,0,1
21,Deaths,SierraLeone
18,Deaths,SierraLeone
11,Cases,SierraLeone
0,Cases,Liberia
15,Deaths,Liberia


In [19]:
# Change column names from 0 and 1 into status and country
variable_split.columns = ['status','country']

In [20]:
# We combine the variable_split with ebola_long
# axis = 1 means combine by columns
ebola_parsed = pd.concat([ebola_long, variable_split], axis=1)

In [21]:
ebola_parsed.sample(n=5)

Unnamed: 0,Date,Day,variable,value,status,country
21,1/2/2015,286,Deaths_SierraLeone,,Deaths,SierraLeone
0,1/5/2015,289,Cases_Liberia,,Cases,Liberia
18,1/5/2015,289,Deaths_SierraLeone,2977.0,Deaths,SierraLeone
6,1/5/2015,289,Cases_SierraLeone,10030.0,Cases,SierraLeone
5,12/28/2014,281,Cases_Liberia,8018.0,Cases,Liberia


## (c) Redundant Rows After Unpivot

### Example 1

In [22]:
# Import data
weather = pd.read_csv('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone/data/weather.csv')

In [23]:
# As you can see, there are multipe columns which are the same type, days
weather

Unnamed: 0,id,year,month,element,d1,d2,d3,d4,d5,d6,...,d22,d23,d24,d25,d26,d27,d28,d29,d30,d31
0,MX17004,2010,1,tmax,,,,,,,...,,,,,,,,,27.8,
1,MX17004,2010,1,tmin,,,,,,,...,,,,,,,,,14.5,
2,MX17004,2010,2,tmax,,27.3,24.1,,,,...,,29.9,,,,,,,,
3,MX17004,2010,2,tmin,,14.4,14.4,,,,...,,10.7,,,,,,,,
4,MX17004,2010,3,tmax,,,,,32.1,,...,,,,,,,,,,
5,MX17004,2010,3,tmin,,,,,14.2,,...,,,,,,,,,,
6,MX17004,2010,4,tmax,,,,,,,...,,,,,,36.3,,,,
7,MX17004,2010,4,tmin,,,,,,,...,,,,,,16.7,,,,
8,MX17004,2010,5,tmax,,,,,,,...,,,,,,33.2,,,,
9,MX17004,2010,5,tmin,,,,,,,...,,,,,,18.2,,,,


In [24]:
# Unpivot the data so that all day columns become one
weather_melt = weather.melt(id_vars=['id','year','month','element'], var_name='day', value_name='temp')

In [25]:
weather_melt

Unnamed: 0,id,year,month,element,day,temp
0,MX17004,2010,1,tmax,d1,
1,MX17004,2010,1,tmin,d1,
2,MX17004,2010,2,tmax,d1,
3,MX17004,2010,2,tmin,d1,
4,MX17004,2010,3,tmax,d1,
...,...,...,...,...,...,...
677,MX17004,2010,10,tmin,d31,
678,MX17004,2010,11,tmax,d31,
679,MX17004,2010,11,tmin,d31,
680,MX17004,2010,12,tmax,d31,


In [26]:
# Clean data by removing NaN temp values and sort by month and day
weather_melt_clean = weather_melt[weather_melt['temp'].notnull()].sort_values(by=['month','day'])

In [27]:
weather_melt_clean.head(20)

Unnamed: 0,id,year,month,element,day,temp
638,MX17004,2010,1,tmax,d30,27.8
639,MX17004,2010,1,tmin,d30,14.5
222,MX17004,2010,2,tmax,d11,29.7
223,MX17004,2010,2,tmin,d11,13.4
24,MX17004,2010,2,tmax,d2,27.3
25,MX17004,2010,2,tmin,d2,14.4
486,MX17004,2010,2,tmax,d23,29.9
487,MX17004,2010,2,tmin,d23,10.7
46,MX17004,2010,2,tmax,d3,24.1
47,MX17004,2010,2,tmin,d3,14.4


- From the data above, as you can see there are redundant data
- This is because every ID has its tmin and tmax temperature for a day, but they are separated
- Instead of every row has temperature for tmax __OR__ tmin, we can change the data so that every row  has temperature for tmax __AND__ tmin
- For better data analysis, 'tmin' and 'tmax' can be convert into their own columns

In [28]:
# columns = when we select element, their values will be converted into columns
# element has tmin and tmax, therefore weather_tidy will have tmax and tmin columns
# values = what column will be using to refer as value for the new table, NaN will be removed
weather_tidy = weather_melt.pivot_table(
index=['id','year','month','day'],
columns='element',
values='temp')

In [29]:
weather_tidy

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,element,tmax,tmin
id,year,month,day,Unnamed: 4_level_1,Unnamed: 5_level_1
MX17004,2010,1,d30,27.8,14.5
MX17004,2010,2,d11,29.7,13.4
MX17004,2010,2,d2,27.3,14.4
MX17004,2010,2,d23,29.9,10.7
MX17004,2010,2,d3,24.1,14.4
MX17004,2010,3,d10,34.5,16.8
MX17004,2010,3,d16,31.1,17.6
MX17004,2010,3,d5,32.1,14.2
MX17004,2010,4,d27,36.3,16.7
MX17004,2010,5,d27,33.2,18.2


### Example 2

In [30]:
billboard_long.head()

Unnamed: 0,year,artist,track,time,date.entered,week,rating
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk1,87.0
1,2000,2Ge+her,The Hardest Part Of ...,3:15,2000-09-02,wk1,91.0
2,2000,3 Doors Down,Kryptonite,3:53,2000-04-08,wk1,81.0
3,2000,3 Doors Down,Loser,4:24,2000-10-21,wk1,76.0
4,2000,504 Boyz,Wobble Wobble,3:35,2000-04-15,wk1,57.0


In [31]:
billboard_long[billboard_long.track=='Loser'].head()

Unnamed: 0,year,artist,track,time,date.entered,week,rating
3,2000,3 Doors Down,Loser,4:24,2000-10-21,wk1,76.0
320,2000,3 Doors Down,Loser,4:24,2000-10-21,wk2,76.0
637,2000,3 Doors Down,Loser,4:24,2000-10-21,wk3,72.0
954,2000,3 Doors Down,Loser,4:24,2000-10-21,wk4,69.0
1271,2000,3 Doors Down,Loser,4:24,2000-10-21,wk5,67.0


- You can see that there are many repeated data, the only difference is week and number of rating
- The best way to tidy them are to separate table into two, the song table and rating table

In [32]:
# Store artists and their songs table
billboard_songs = billboard_long[['year','artist','track','time']]

In [33]:
# Have duplicate data, so we need to remove them
billboard_songs[billboard_songs['artist']=='2 Pac']

Unnamed: 0,year,artist,track,time
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22
317,2000,2 Pac,Baby Don't Cry (Keep...,4:22
634,2000,2 Pac,Baby Don't Cry (Keep...,4:22
951,2000,2 Pac,Baby Don't Cry (Keep...,4:22
1268,2000,2 Pac,Baby Don't Cry (Keep...,4:22
...,...,...,...,...
22507,2000,2 Pac,Baby Don't Cry (Keep...,4:22
22824,2000,2 Pac,Baby Don't Cry (Keep...,4:22
23141,2000,2 Pac,Baby Don't Cry (Keep...,4:22
23458,2000,2 Pac,Baby Don't Cry (Keep...,4:22


In [34]:
# Before remove duplicate
billboard_songs.shape

(24092, 4)

In [35]:
# Remove duplicate
billboard_songs = billboard_songs.drop_duplicates()

In [36]:
# No duplicate data anymore
billboard_songs[billboard_songs['artist']=='2 Pac']

Unnamed: 0,year,artist,track,time
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22


In [37]:
# After remove duplicate
billboard_songs.shape

(317, 4)

In [38]:
# Create and id for every song
billboard_songs['id'] = range(len(billboard_songs))

In [39]:
billboard_songs

Unnamed: 0,year,artist,track,time,id
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,0
1,2000,2Ge+her,The Hardest Part Of ...,3:15,1
2,2000,3 Doors Down,Kryptonite,3:53,2
3,2000,3 Doors Down,Loser,4:24,3
4,2000,504 Boyz,Wobble Wobble,3:35,4
...,...,...,...,...,...
312,2000,Yankee Grey,Another Nine Minutes,3:10,312
313,2000,"Yearwood, Trisha",Real Live Woman,3:55,313
314,2000,Ying Yang Twins,Whistle While You Tw...,4:19,314
315,2000,Zombie Nation,Kernkraft 400,3:30,315


In [40]:
# Create rating song table 
billboard_ratings = billboard_long.merge(billboard_songs, on=['year','artist','track','time'])

In [41]:
# Rating table shape
billboard_ratings.shape

(24092, 8)

In [42]:
billboard_ratings.head()

Unnamed: 0,year,artist,track,time,date.entered,week,rating,id
0,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk1,87.0,0
1,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk2,82.0,0
2,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk3,72.0,0
3,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk4,77.0,0
4,2000,2 Pac,Baby Don't Cry (Keep...,4:22,2000-02-26,wk5,87.0,0


In [43]:
# Subset only important column for rating song table
billboard_ratings = billboard_ratings[['id','date.entered','week','rating']]

In [44]:
billboard_ratings

Unnamed: 0,id,date.entered,week,rating
0,0,2000-02-26,wk1,87.0
1,0,2000-02-26,wk2,82.0
2,0,2000-02-26,wk3,72.0
3,0,2000-02-26,wk4,77.0
4,0,2000-02-26,wk5,87.0
...,...,...,...,...
24087,316,2000-04-29,wk72,
24088,316,2000-04-29,wk73,
24089,316,2000-04-29,wk74,
24090,316,2000-04-29,wk75,


## (d) Load and Combine From Multiple Tables

In [45]:
import os
import urllib
import glob

In [46]:
# Download multiple files
with open('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone/data/raw_data_urls.txt', 'r') as data_urls:
    for line, url in enumerate(data_urls):
        if line == 5:
            break
        fn = url.split('/')[-1].strip()
        fp = os.path.join('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone', 'data', fn)
        print(url)
        print(fp)
        urllib.request.urlretrieve(url, fp)

https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-01.csv

C:/Users/User/Desktop/Data Science/6. Pandas for Everyone\data\fhv_tripdata_2015-01.csv
https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-02.csv

C:/Users/User/Desktop/Data Science/6. Pandas for Everyone\data\fhv_tripdata_2015-02.csv
https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-03.csv

C:/Users/User/Desktop/Data Science/6. Pandas for Everyone\data\fhv_tripdata_2015-03.csv
https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-04.csv

C:/Users/User/Desktop/Data Science/6. Pandas for Everyone\data\fhv_tripdata_2015-04.csv
https://s3.amazonaws.com/nyc-tlc/trip+data/fhv_tripdata_2015-05.csv

C:/Users/User/Desktop/Data Science/6. Pandas for Everyone\data\fhv_tripdata_2015-05.csv


### First Method
 Create variable for every data set and concate them

In [47]:
# Save the file address as list
nyc_taxi_data = glob.glob('C:/Users/User/Desktop/Data Science/6. Pandas for Everyone/data/fhv_*')

In [48]:
# Every file has its own variable
taxi1 = pd.read_csv(nyc_taxi_data[0])
taxi2 = pd.read_csv(nyc_taxi_data[1])
taxi3 = pd.read_csv(nyc_taxi_data[2])
taxi4 = pd.read_csv(nyc_taxi_data[3])
taxi5 = pd.read_csv(nyc_taxi_data[4])

In [49]:
# Checking their shape
print(taxi1.shape)
print(taxi2.shape)
print(taxi3.shape)
print(taxi4.shape)
print(taxi5.shape)

(2746033, 3)
(3126401, 3)
(3281427, 3)
(3917789, 3)
(4296067, 3)


In [50]:
# Combining them all
taxi = pd.concat([taxi1,taxi2,taxi3,taxi4,taxi5])

### Second Method
Append all of them into a list, then concate them

In [51]:
# Create an empty list to append to
list_taxi_df = []

# Loop through each CSV filename
for csv_filename in nyc_taxi_data:
    
    # Load CSV file into dataframe
    df = pd.read_csv(csv_filename)
    
    # Append the dataframe to the list
    list_taxi_df.append(df)

print(len(list_taxi_df))

5


In [52]:
# list_taxi_df right now is looking like this
# [ [taxi1] , [taxi2] , [taxi3] , [taxi4] , taxi5] ]
# Because of that we need to concat to turn into pandas dataframe
taxi_loop_concat = pd.concat(list_taxi_df)

In [53]:
# Here we can visualize to see the difference
row = len(list_taxi_df)
col = len(list_taxi_df[0])
print('list_taxi_df shape: (',row,',',col,')')
print('taxi_loop_concat shape:',taxi_loop_concat.shape)

list_taxi_df shape: ( 5 , 2746033 )
taxi_loop_concat shape: (17367717, 3)


In [54]:
list_taxi_df

[        Dispatching_base_num          Pickup_date  locationID
 0                     B00013  2015-01-01 00:30:00         NaN
 1                     B00013  2015-01-01 01:22:00         NaN
 2                     B00013  2015-01-01 01:23:00         NaN
 3                     B00013  2015-01-01 01:44:00         NaN
 4                     B00013  2015-01-01 02:00:00         NaN
 ...                      ...                  ...         ...
 2746028               B02765  2015-01-31 23:59:02       169.0
 2746029               B02765  2015-01-31 23:59:07        80.0
 2746030               B02765  2015-01-31 23:59:34       186.0
 2746031               B02765  2015-01-31 23:59:40       181.0
 2746032               B02765  2015-01-31 23:59:48        79.0
 
 [2746033 rows x 3 columns],
         Dispatching_base_num          Pickup_date  locationID
 0                     B00013  2015-02-01 00:00:00         NaN
 1                     B00013  2015-02-01 00:01:00         NaN
 2                     B

In [55]:
taxi_loop_concat

Unnamed: 0,Dispatching_base_num,Pickup_date,locationID
0,B00013,2015-01-01 00:30:00,
1,B00013,2015-01-01 01:22:00,
2,B00013,2015-01-01 01:23:00,
3,B00013,2015-01-01 01:44:00,
4,B00013,2015-01-01 02:00:00,
...,...,...,...
4296062,B02821,2015-05-31 14:00:00,
4296063,B02821,2015-05-31 14:32:00,
4296064,B02821,2015-05-31 14:45:00,
4296065,B02821,2015-05-31 17:44:00,


### Third Method
Load multiple file using __list comprehension__

In [56]:
# Same as second method
list_taxi_df = []
for csv_filename in nyc_taxi_data:
    df = pd.read_csv(csv_filename)
    list_taxi_df.append(df)
    
# List comprehension
list_taxi_df_comp = [pd.read_csv(data) for data in nyc_taxi_data]
taxi_loop_concat_comp = pd.concat(list_taxi_df_comp)

In [57]:
taxi_loop_concat_comp.head()

Unnamed: 0,Dispatching_base_num,Pickup_date,locationID
0,B00013,2015-01-01 00:30:00,
1,B00013,2015-01-01 01:22:00,
2,B00013,2015-01-01 01:23:00,
3,B00013,2015-01-01 01:44:00,
4,B00013,2015-01-01 02:00:00,
