# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [1]:
import pandas as pd 
import numpy as np 

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [4]:
cars1 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv')
cars2 = pd.read_csv('https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv')

In [5]:
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,,,,,
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,,,,,
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,,,,,
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,,,,,
4,17.0,8,302,140,3449,10.5,70,1,ford torino,,,,,


In [6]:
cars2.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,33.0,4,91,53,1795,17.4,76,3,honda civic
1,20.0,6,225,100,3651,17.7,76,1,dodge aspen se
2,18.0,6,250,78,3574,21.0,76,1,ford granada ghia
3,18.5,6,250,110,3645,16.2,76,1,pontiac ventura sj
4,17.5,6,258,95,3193,17.8,76,1,amc pacer d/l


### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [7]:
cars1 = cars1.loc[:, 'mpg':'car']
cars1

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
193,24.0,6,200,81,3012,17.6,76,1,ford maverick
194,22.5,6,232,90,3085,17.6,76,1,amc hornet
195,29.0,4,85,52,2035,22.2,76,1,chevrolet chevette
196,24.5,4,98,60,2164,22.1,76,1,chevrolet woody


### Step 5. What is the number of observations in each dataset?

In [8]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [10]:
cars = cars1.append(cars2)
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [11]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([40155, 36667, 21820, 56943, 34773, 29949, 40027, 26404, 70237,
       47059, 65729, 68872, 37406, 65373, 50873, 47478, 19502, 49523,
       71334, 64944, 70559, 70651, 36242, 30272, 67784, 48793, 64712,
       26200, 26241, 71810, 36230, 17275, 59722, 66264, 30575, 66147,
       29151, 32997, 36790, 30002, 44095, 58219, 69015, 37708, 52269,
       18138, 57410, 20233, 44655, 15600, 16491, 44808, 45370, 56842,
       52764, 39256, 42465, 43793, 61008, 48312, 16339, 32001, 34123,
       30881, 52305, 51256, 23416, 46132, 39168, 69936, 46702, 38227,
       24659, 50086, 51929, 52813, 53279, 15010, 57288, 20902, 69692,
       51043, 68137, 71221, 59552, 29009, 35988, 56245, 32120, 21222,
       46954, 26786, 21472, 72877, 54552, 28299, 70465, 71202, 39400,
       59210, 57056, 22893, 37159, 68399, 32320, 69927, 52501, 34905,
       31096, 33418, 42055, 56069, 59440, 29294, 69264, 46947, 72369,
       37922, 18785, 61043, 55250, 17588, 38026, 34332, 35208, 26977,
       64555, 35462,

### Step 8. Add the column owners to cars

In [12]:
cars['owners'] = nr_owners
cars.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu,40155
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320,36667
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite,21820
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst,56943
4,17.0,8,302,140,3449,10.5,70,1,ford torino,34773
