# Appending & concatenating Series

### append()
- .append(): Series & DataFrame method
-  Invocation:
-  s1.append(s2)
- Stacks rows of s2 below s1
- Method for Series & DataFrames

### concat()
-  concat(): pandas module function
- Invocation:
- pd.concat([s1, s2, s3])
- Can stack row-wise or column-wise

### concat() & .append()
- Equivalence of concat() & .append():
- result1 = pd.concat([s1, s2, s3])
- result2 = s1.append(s2).append(s3)
- result1 == result2 elementwise

### Series of US states

In [1]:
import pandas as pd

northeast = pd.Series(['CT', 'ME', 'MA', 'NH', 'RI', 'VT','NJ', 'NY', 'PA'])

south = pd.Series(['DE', 'FL', 'GA', 'MD', 'NC', 'SC', 'VA', 'DC', 'WV', 'AL', 'KY', 'MS', 'TN', 'AR', 'LA', 'OK', 'TX'])

midwest = pd.Series(['IL', 'IN', 'MN', 'MO', 'NE', 'ND', 'SD', 'IA', 'KS', 'MI', 'OH', 'WI'])
    
west = pd.Series(['AZ', 'CO', 'ID', 'MT', 'NV', 'NM', 'UT', 'WY', 'AK', 'CA', 'HI', 'OR','WA'])

### Using .append()

In [2]:
east = northeast.append(south)
east

0     CT
1     ME
2     MA
3     NH
4     RI
5     VT
6     NJ
7     NY
8     PA
0     DE
1     FL
2     GA
3     MD
4     NC
5     SC
6     VA
7     DC
8     WV
9     AL
10    KY
11    MS
12    TN
13    AR
14    LA
15    OK
16    TX
dtype: object

### The appended Index

In [3]:
east.index

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,  7,
             8,  9, 10, 11, 12, 13, 14, 15, 16],
           dtype='int64')

In [4]:
east.loc[3]

3    NH
3    MD
dtype: object

### Using .reset_index()


In [5]:
new_east = northeast.append(south).reset_index(drop=True)

new_east.head(11)

0     CT
1     ME
2     MA
3     NH
4     RI
5     VT
6     NJ
7     NY
8     PA
9     DE
10    FL
dtype: object

### Using concat()


In [6]:
east = pd.concat([northeast, south])

east.head(11)

0    CT
1    ME
2    MA
3    NH
4    RI
5    VT
6    NJ
7    NY
8    PA
0    DE
1    FL
dtype: object

In [7]:
east.index

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,  7,
             8,  9, 10, 11, 12, 13, 14, 15, 16],
           dtype='int64')

### Using ignore_index


In [8]:
new_east = pd.concat([northeast, south],
                    ignore_index=True)

new_east.head(11)

0     CT
1     ME
2     MA
3     NH
4     RI
5     VT
6     NJ
7     NY
8     PA
9     DE
10    FL
dtype: object

In [9]:
new_east.index

RangeIndex(start=0, stop=26, step=1)

---
# Let’s practice!

---
# Appending & concatenating DataFrames

### Loading population data

In [1]:
import pandas as pd

pop1 = pd.read_csv('population_01.csv', index_col=0)
pop2 = pd.read_csv('population_02.csv', index_col=0)

print(type(pop1),pop1.shape)
print(type(pop2),pop2.shape)

In [7]:
pop1

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
66407,479
72732,4716
50579,2405
46241,30670


In [8]:
pop2

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
12776,2180
76092,26669
98360,12221
49464,27481


### Appending opulation DataFrames

In [11]:
pop1.append(pop2)

Unnamed: 0_level_0,2010 Census Population
Zip Code ZCTA,Unnamed: 1_level_1
66407,479
72732,4716
50579,2405
46241,30670
12776,2180
76092,26669
98360,12221
49464,27481


In [12]:
print(pop1.index.name, pop1.columns)
print(pop2.index.name, pop2.columns)

Zip Code ZCTA Index(['2010 Census Population'], dtype='object')
Zip Code ZCTA Index(['2010 Census Population'], dtype='object')


### Population & unemployment data

In [13]:
population = pd.read_csv('population_00.csv', index_col=0)
unemployment = pd.read_csv('unemployment_00.csv', index_col=0)

print(population)
print(unemployment)

               2010 Census Population
Zip Code ZCTA                        
57538                             322
59916                             130
37660                           40038
2860                            45199
       unemployment  participants
Zip                              
2860           0.11         34447
46167          0.02          4800
1097           0.33            42
80808          0.07          4310


### Appending population & unemployment

In [14]:
population.append(unemployment)

Unnamed: 0,2010 Census Population,participants,unemployment
57538,322.0,,
59916,130.0,,
37660,40038.0,,
2860,45199.0,,
2860,,34447.0,0.11
46167,,4800.0,0.02
1097,,42.0,0.33
80808,,4310.0,0.07


### Repeated index labels

### Concatenating rows


In [15]:
pd.concat([population, unemployment], axis=0)

Unnamed: 0,2010 Census Population,participants,unemployment
57538,322.0,,
59916,130.0,,
37660,40038.0,,
2860,45199.0,,
2860,,34447.0,0.11
46167,,4800.0,0.02
1097,,42.0,0.33
80808,,4310.0,0.07


### Concatenating columns


In [16]:
pd.concat([population, unemployment], axis=1)

Unnamed: 0,2010 Census Population,unemployment,participants
1097,,0.33,42.0
2860,45199.0,0.11,34447.0
37660,40038.0,,
46167,,0.02,4800.0
57538,322.0,,
59916,130.0,,
80808,,0.07,4310.0


---
# Let’s practice!

---
# Concatenation, keys, & MultiIndexes

### Loading rainfall data

In [17]:
import pandas as pd

rain2013 = pd.read_csv('q1_rainfall_2013.csv', index_col='Month', parse_dates=True)
rain2014 = pd.read_csv('q1_rainfall_2014.csv', index_col='Month', parse_dates=True)
print(rain2013)
print(rain2014)

       Percipitation
Month               
Jan         0.096129
Feb         0.067143
Mar         0.061613
       Percipitation
Month               
Jan         0.050323
Feb         0.082143
Mar         0.070968


### Concatenating rows


In [18]:
pd.concat([rain2013, rain2014], axis=0)

Unnamed: 0_level_0,Percipitation
Month,Unnamed: 1_level_1
Jan,0.096129
Feb,0.067143
Mar,0.061613
Jan,0.050323
Feb,0.082143
Mar,0.070968


### Using multi-index on rows


In [19]:
rain1314 = pd.concat([rain2013, rain2014], keys=[2013, 2014], axis=0)

rain1314

Unnamed: 0_level_0,Unnamed: 1_level_0,Percipitation
Unnamed: 0_level_1,Month,Unnamed: 2_level_1
2013,Jan,0.096129
2013,Feb,0.067143
2013,Mar,0.061613
2014,Jan,0.050323
2014,Feb,0.082143
2014,Mar,0.070968


### Accessing a multi-index


In [20]:
rain1314.loc[2014]

Unnamed: 0_level_0,Percipitation
Month,Unnamed: 1_level_1
Jan,0.050323
Feb,0.082143
Mar,0.070968


### Concatenating columns

In [21]:
rain1314 = pd.concat([rain2013, rain2014], axis='columns')

rain1314

Unnamed: 0_level_0,Percipitation,Percipitation
Month,Unnamed: 1_level_1,Unnamed: 2_level_1
Jan,0.096129,0.050323
Feb,0.067143,0.082143
Mar,0.061613,0.070968


### Using a multi-index on columns

In [22]:
rain1314 = pd.concat([rain2013, rain2014], keys=[2013, 2014], axis='columns')
rain1314

Unnamed: 0_level_0,2013,2014
Unnamed: 0_level_1,Percipitation,Percipitation
Month,Unnamed: 1_level_2,Unnamed: 2_level_2
Jan,0.096129,0.050323
Feb,0.067143,0.082143
Mar,0.061613,0.070968


### pd.concat() with dict


In [23]:
rain_dict = {2013: rain2013, 2014: rain2014}

rain1314 = pd.concat(rain_dict, axis='columns') 

rain1314

Unnamed: 0_level_0,2013,2014
Unnamed: 0_level_1,Percipitation,Percipitation
Month,Unnamed: 1_level_2,Unnamed: 2_level_2
Jan,0.096129,0.050323
Feb,0.067143,0.082143
Mar,0.061613,0.070968


---
# Let’s practice!

---

---
# Outer & inner joins

### Using with arrays

In [25]:
import pandas as pd
import numpy as np

A = np.arange(8).reshape(2,4) + 0.1
A

array([[0.1, 1.1, 2.1, 3.1],
       [4.1, 5.1, 6.1, 7.1]])

In [27]:
B = np.arange(6).reshape(2,3) + 0.2
B

array([[0.2, 1.2, 2.2],
       [3.2, 4.2, 5.2]])

In [28]:
C = np.arange(12).reshape(3,4) + 0.3
C

array([[ 0.3,  1.3,  2.3,  3.3],
       [ 4.3,  5.3,  6.3,  7.3],
       [ 8.3,  9.3, 10.3, 11.3]])

### Stacking arrays horizontally

In [29]:
np.hstack([B,A])

array([[0.2, 1.2, 2.2, 0.1, 1.1, 2.1, 3.1],
       [3.2, 4.2, 5.2, 4.1, 5.1, 6.1, 7.1]])

In [30]:
np.concatenate([B,A], axis=1)

array([[0.2, 1.2, 2.2, 0.1, 1.1, 2.1, 3.1],
       [3.2, 4.2, 5.2, 4.1, 5.1, 6.1, 7.1]])

### Stacking arrays vertically

In [33]:
np.vstack([A,C])

array([[ 0.1,  1.1,  2.1,  3.1],
       [ 4.1,  5.1,  6.1,  7.1],
       [ 0.3,  1.3,  2.3,  3.3],
       [ 4.3,  5.3,  6.3,  7.3],
       [ 8.3,  9.3, 10.3, 11.3]])

In [34]:
np.concatenate([A,C], axis=0)

array([[ 0.1,  1.1,  2.1,  3.1],
       [ 4.1,  5.1,  6.1,  7.1],
       [ 0.3,  1.3,  2.3,  3.3],
       [ 4.3,  5.3,  6.3,  7.3],
       [ 8.3,  9.3, 10.3, 11.3]])

### Incompatible array dimensions

```python
In [11]: np.concatenate([A, B], axis=0) # incompatible columns 
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
----> 1 np.concatenate([A, B], axis=0) # incompatible columns
ValueError: all the input array dimensions except for the concatenation axis must match
exactly

In [12]: np.concatenate([A, C], axis=1) # incompatible rows
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
----> 1 np.concatenate([A, C], axis=1) # incompatible rows
ValueError: all the input array dimensions except for the concatenation axis must match
exactly
```

### Population & unemployment data


In [35]:
population = pd.read_csv('population_00.csv', index_col=0)
unemployment = pd.read_csv('unemployment_00.csv', index_col=0)

print(population)
print(unemployment)

               2010 Census Population
Zip Code ZCTA                        
57538                             322
59916                             130
37660                           40038
2860                            45199
       unemployment  participants
Zip                              
2860           0.11         34447
46167          0.02          4800
1097           0.33            42
80808          0.07          4310


### Converting to arrays

In [36]:
population_array = np.array(population)
population_array  # Index info is lost

array([[  322],
       [  130],
       [40038],
       [45199]])

In [38]:
unemployment_array = np.array(unemployment)
unemployment_array

array([[1.1000e-01, 3.4447e+04],
       [2.0000e-02, 4.8000e+03],
       [3.3000e-01, 4.2000e+01],
       [7.0000e-02, 4.3100e+03]])

### Manipulating data as arrays

In [39]:
np.concatenate([population_array, unemployment_array],axis=1)

array([[3.2200e+02, 1.1000e-01, 3.4447e+04],
       [1.3000e+02, 2.0000e-02, 4.8000e+03],
       [4.0038e+04, 3.3000e-01, 4.2000e+01],
       [4.5199e+04, 7.0000e-02, 4.3100e+03]])

## Joins
- Joining tables: Combining rows of multiple tables
- Outer join
    - Union of index sets (all labels, no repetition)
    - Missing fields filled with NaN
- Inner join
    - Intersection of index sets (only common labels)

### Concatenation & inner join


In [40]:
pd.concat([population, unemployment], axis=1, join='inner')

Unnamed: 0,2010 Census Population,unemployment,participants
2860,45199,0.11,34447


### Concatenation & outer join


In [41]:
pd.concat([population, unemployment], axis=1, join='outer')

Unnamed: 0,2010 Census Population,unemployment,participants
1097,,0.33,42.0
2860,45199.0,0.11,34447.0
37660,40038.0,,
46167,,0.02,4800.0
57538,322.0,,
59916,130.0,,
80808,,0.07,4310.0


### Inner join on other axis


In [43]:
pd.concat([population, unemployment], join='inner', axis=0)

57538
59916
37660
2860
2860
46167
1097
80808


---
# Let’s practice!