# MPG Cars

### Introduction:

The following exercise utilizes data from [UC Irvine Machine Learning Repository](https://archive.ics.uci.edu/ml/datasets/Auto+MPG)

### Step 1. Import the necessary libraries

In [2]:
import pandas as pd
import numpy as np

### Step 2. Import the first dataset [cars1](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv) and [cars2](https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv).  

   ### Step 3. Assign each to a variable called cars1 and cars2

In [4]:
cars1 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars1.csv")
cars2 = pd.read_csv("https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/05_Merge/Auto_MPG/cars2.csv")

print(cars1.head())
print(cars2.head())

    mpg  cylinders  displacement horsepower  weight  acceleration  model  \
0  18.0          8           307        130    3504          12.0     70   
1  15.0          8           350        165    3693          11.5     70   
2  18.0          8           318        150    3436          11.0     70   
3  16.0          8           304        150    3433          12.0     70   
4  17.0          8           302        140    3449          10.5     70   

   origin                        car  Unnamed: 9  Unnamed: 10  Unnamed: 11  \
0       1  chevrolet chevelle malibu         NaN          NaN          NaN   
1       1          buick skylark 320         NaN          NaN          NaN   
2       1         plymouth satellite         NaN          NaN          NaN   
3       1              amc rebel sst         NaN          NaN          NaN   
4       1                ford torino         NaN          NaN          NaN   

   Unnamed: 12  Unnamed: 13  
0          NaN          NaN  
1          NaN

### Step 4. Oops, it seems our first dataset has some unnamed blank columns, fix cars1

In [5]:
cars1 = cars1.loc[:, "mpg":"car"]
cars1.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino


### Step 5. What is the number of observations in each dataset?

In [8]:
print(cars1.shape)
print(cars2.shape)

(198, 9)
(200, 9)


### Step 6. Join cars1 and cars2 into a single DataFrame called cars

In [9]:
cars = pd.concat([cars1, cars2])
cars

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car
0,18.0,8,307,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl
196,44.0,4,97,52,2130,24.6,82,2,vw pickup
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage
198,28.0,4,120,79,2625,18.6,82,1,ford ranger


### Step 7. Oops, there is a column missing, called owners. Create a random number Series from 15,000 to 73,000.

In [10]:
nr_owners = np.random.randint(15000, high=73001, size=398, dtype='l')
nr_owners

array([67354, 57274, 52859, 53116, 43608, 38075, 63847, 50116, 15925,
       56016, 69180, 71733, 28010, 60117, 40845, 26166, 47588, 67678,
       67786, 65703, 58571, 38238, 53260, 41566, 15564, 15916, 49417,
       48274, 35677, 44804, 35835, 62165, 19735, 38632, 66456, 32189,
       56957, 15087, 52334, 20504, 68488, 51883, 29271, 23232, 49749,
       34849, 19390, 20700, 27392, 16111, 48862, 18800, 57125, 21296,
       49488, 28191, 40875, 24240, 48515, 16557, 54894, 53897, 48895,
       21068, 57220, 67322, 21376, 30791, 30538, 19340, 26426, 21060,
       42338, 30397, 20439, 22789, 50122, 58867, 44014, 45971, 49521,
       55631, 34270, 37117, 63733, 57800, 68253, 35349, 41188, 33829,
       67950, 32492, 53854, 71096, 18809, 60553, 53774, 63071, 35217,
       63269, 31392, 20080, 38795, 39824, 15309, 40746, 43367, 57876,
       21729, 37768, 72703, 59791, 55416, 54222, 30586, 20630, 28047,
       40986, 56763, 68297, 40855, 30081, 16692, 48648, 51368, 26038,
       26898, 15280,

### Step 8. Add the column owners to cars

In [11]:
cars['owners'] = nr_owners
cars.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model,origin,car,owners
195,27.0,4,140,86,2790,15.6,82,1,ford mustang gl,39372
196,44.0,4,97,52,2130,24.6,82,2,vw pickup,38183
197,32.0,4,135,84,2295,11.6,82,1,dodge rampage,53405
198,28.0,4,120,79,2625,18.6,82,1,ford ranger,29954
199,31.0,4,119,82,2720,19.4,82,1,chevy s-10,45165
