# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [4]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [5]:
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [8]:
cars1 = cars1.loc[ : , "mpg" : "car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [9]:
cars1.shape, cars2.shape

((198, 9), (200, 9))

### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [11]:
cars = cars1.append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [13]:
random_series = np.random.randint(15000, high=73001, size=398, dtype='l')
random_series

array([35149, 66005, 32372, 67150, 42350, 56253, 62406, 32628, 29988,
       40531, 54260, 17248, 67872, 72382, 69737, 21996, 25145, 50561,
       61590, 37389, 54711, 15855, 31790, 31142, 63541, 55138, 46157,
       65691, 61417, 24735, 50866, 58562, 15468, 26286, 32608, 33434,
       28111, 29813, 60312, 24125, 32939, 60371, 53606, 34962, 47030,
       23796, 37247, 16863, 51473, 53130, 35573, 19533, 35704, 33422,
       58870, 59222, 17375, 47331, 58109, 43982, 55697, 56403, 71636,
       29173, 62412, 50984, 39900, 49663, 18165, 38096, 18081, 60017,
       58100, 50046, 33148, 72963, 32922, 66676, 26586, 26020, 42708,
       66610, 64673, 52441, 64139, 30885, 22443, 23146, 71896, 49134,
       33467, 35615, 67999, 65232, 70054, 71163, 53590, 32355, 31029,
       66128, 42134, 55244, 71848, 34815, 16509, 66067, 26300, 30539,
       60253, 52447, 39577, 58567, 35223, 38703, 69615, 60085, 30550,
       62702, 45603, 23680, 49478, 37069, 72490, 70238, 52118, 71254,
       15718, 72530,

### Step 8. Add the column owners to cars

In [16]:
cars['owners'] = random_series
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,35149
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,66005
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,32372
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,67150
4,17.0,8,302,140,3449,10.5,70,1,ford torino,42350
...,...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,54610
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,50852
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,53067
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,59291
