### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np
import random

In [3]:
class display(object):
    """Display HTML representation of multiple objects"""
    template = """<div style="float: left; padding: 10px;">
    <p style='font-family:"Courier New", Courier, monospace'>{0}</p>{1}
    </div>"""
    def __init__(self, *args):
        self.args = args
        
    def _repr_html_(self):
        return '\n'.join(self.template.format(a, eval(a)._repr_html_())
                         for a in self.args)
    
    def __repr__(self):
        return '\n\n'.join(a + '\n' + repr(eval(a))
                           for a in self.args)

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a to a variable called cars1 and cars2

In [16]:
cars1  = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv', sep=',', low_memory=False)
cars2  = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv', sep=',', low_memory=False)

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [17]:
display("cars1")

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick,,,,,
194,22.5,6,232,90,3085,17.6,76,1,amc hornet,,,,,
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette,,,,,
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody,,,,,


In [19]:
mask_columns = cars1.columns.str.contains('Unnamed')

In [21]:
cars1.drop(cars1.columns[mask_columns], axis=1)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


In [10]:
cars1.dropna(how="all", axis = 1)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


In [11]:
display("cars1")

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


### Step 5. What is the number of observations in each dataset?

In [23]:
print(cars1.shape)
print(cars2.shape)
print(f'Cars1 rows {cars1.shape[0]}')
print(f'Cars2 rows {cars2.shape[0]}')

(198, 14)
(200, 9)
Cars1 rows 198
Cars2 rows 200


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [35]:
rows_frame = [cars1, cars2]
cars = pd.concat(rows_frame, ignore_index = False)

In [36]:
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


In [26]:
cars = pd.concat([cars1, cars2], axis=0, ignore_index=True)
display("cars")

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,,,,,
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,,,,,
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,,,,,
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,,,,,


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [27]:
owners = pd.Series([random.randint(15000, 73001) for _ in range(len(cars.index.values.tolist()))],
                index = [cars.index.values.tolist()])
owners


0      57596
1      72763
2      64852
3      59980
4      44658
       ...  
393    45405
394    68875
395    37605
396    41813
397    33842
Length: 398, dtype: int64

In [28]:
s = np.random.randint(15000, 73001, size=len(cars))
s

array([41793, 24532, 71957, 67473, 36895, 28274, 61915, 19816, 55235,
       59019, 71592, 51361, 63736, 23922, 26496, 68507, 66171, 44429,
       36842, 38554, 36560, 55756, 65561, 59063, 20855, 45766, 44643,
       31581, 46532, 30058, 65169, 24674, 46690, 17080, 51643, 71801,
       67691, 28220, 29602, 20638, 63412, 22506, 63589, 17536, 71407,
       36693, 49379, 67524, 30248, 29851, 65387, 49249, 39624, 45666,
       26962, 71815, 72176, 57114, 26106, 42482, 24562, 15538, 42330,
       67583, 57001, 15161, 39928, 50498, 36112, 41787, 69000, 42726,
       45303, 37061, 45822, 71314, 31351, 21385, 58536, 32085, 45567,
       16543, 33161, 45832, 61823, 57421, 44056, 24653, 61572, 18282,
       16323, 16754, 41070, 58693, 20687, 68683, 48446, 42739, 22204,
       19127, 45671, 32768, 54658, 55672, 33113, 34493, 66559, 72199,
       42202, 72966, 54821, 51668, 20964, 71761, 47210, 71053, 66301,
       66563, 23399, 43292, 28386, 40648, 58824, 68504, 28044, 68672,
       34364, 21678,

### Step 8. Add the column owners to cars

In [31]:
cars['owners'] = s
cars.dropna(axis=1)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,41793
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,24532
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,71957
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,67473
4,17.0,8,302,140,3449,10.5,70,1,ford torino,36895
...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,61436
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,49925
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,71212
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,49348


In [32]:
cars['owners'] = owners.values

In [34]:
cars.dropna(axis=1)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,57596
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,72763
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,64852
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,59980
4,17.0,8,302,140,3449,10.5,70,1,ford torino,44658
...,...,...,...,...,...,...,...,...,...,...
393,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,45405
394,44.0,4,97,52,2130,24.6,82,2,vw pickup,68875
395,32.0,4,135,84,2295,11.6,82,1,dodge rampage,37605
396,28.0,4,120,79,2625,18.6,82,1,ford ranger,41813
